Skip to content

Commit fbec8e2

Browse files
coeuvrecopybara-github
authored andcommitted
Reduce flakiness on Windows for BwoB tests
`BuildWithoutTheBytesIntegrationTest` has been flaky on Windows. Example error leads to file system errors. ``` 1) incrementalBuild_treeArtifacts_correctlyProducesNewTree(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest) java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file (Permission denied) at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:60) at com.google.devtools.build.lib.vfs.Path.delete(Path.java:587) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.bestEffortDeleteTreesBelow(BuildIntegrationTestCase.java:387) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.cleanUp(BuildIntegrationTestCase.java:364) ... 26 trimmed Caused by: java.nio.file.AccessDeniedException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.pid_file at com.google.devtools.build.lib.windows.WindowsFileOperations.deletePath(WindowsFileOperations.java:279) at com.google.devtools.build.lib.windows.WindowsFileSystem.delete(WindowsFileSystem.java:56) ... 30 more 2) downloadToplevel_treeArtifacts(com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest) java.io.IOException: C:/b/iahsqj7l/execroot/io_bazel/_tmp/b474bcb9956744b234827924ec01952d/remote.work_path already exists at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.ensureMkdir(IntegrationTestUtils.java:116) at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:87) at com.google.devtools.build.lib.remote.util.IntegrationTestUtils.startWorker(IntegrationTestUtils.java:77) at com.google.devtools.build.lib.remote.BuildWithoutTheBytesIntegrationTest.setupOptions(BuildWithoutTheBytesIntegrationTest.java:49) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createRuntimeWrapper(BuildIntegrationTestCase.java:317) at com.google.devtools.build.lib.buildtool.util.BuildIntegrationTestCase.createFilesAndMocks(BuildIntegrationTestCase.java:234) ``` This PR fixes 1) by not using pid but checking port status directly to wait for the remote worker to start up and 2) by cleaning up resources used by remote worker after each test case. Closes bazelbuild#17479. PiperOrigin-RevId: 509549826 Change-Id: Icef2be46d5f2a6025cad0cefcf766ae16b4552d8
1 parent fb056bc commit fbec8e2

File tree

2 files changed

+45
-26
lines changed

2 files changed

+45
-26
lines changed

src/test/java/com/google/devtools/build/lib/remote/BUILD

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ java_test(
149149
size = "large",
150150
srcs = ["BuildWithoutTheBytesIntegrationTest.java"],
151151
shard_count = 5,
152+
tags = ["requires-network"],
152153
runtime_deps = [
153154
"//third_party/grpc-java:grpc-jar",
154155
],
@@ -173,6 +174,7 @@ java_test(
173174
java_test(
174175
name = "DiskCacheIntegrationTest",
175176
srcs = ["DiskCacheIntegrationTest.java"],
177+
tags = ["requires-network"],
176178
runtime_deps = [
177179
"//third_party/grpc-java:grpc-jar",
178180
],

src/test/java/com/google/devtools/build/lib/remote/util/IntegrationTestUtils.java

+43-26
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@
2525
import java.io.File;
2626
import java.io.IOException;
2727
import java.net.DatagramSocket;
28+
import java.net.InetSocketAddress;
2829
import java.net.ServerSocket;
2930
import java.net.SocketException;
31+
import java.nio.channels.SocketChannel;
32+
import java.nio.file.Files;
33+
import java.nio.file.Path;
34+
import java.nio.file.Paths;
35+
import java.util.Comparator;
3036
import java.util.Random;
3137

3238
/** Integration test utilities. */
@@ -73,6 +79,30 @@ public static int pickUnusedRandomPort() throws IOException, InterruptedExceptio
7379
throw new IOException("Failed to find available port");
7480
}
7581

82+
private static void waitForPortOpen(Subprocess process, int port)
83+
throws IOException, InterruptedException {
84+
var addr = new InetSocketAddress("localhost", port);
85+
var timeout = new IOException("Timed out when waiting for port to open");
86+
for (var i = 0; i < 20; ++i) {
87+
if (!process.isAlive()) {
88+
var message = new String(process.getErrorStream().readAllBytes(), UTF_8);
89+
throw new IOException("Failed to start worker: " + message);
90+
}
91+
92+
try {
93+
try (var socketChannel = SocketChannel.open()) {
94+
socketChannel.configureBlocking(/* block= */ true);
95+
socketChannel.connect(addr);
96+
}
97+
return;
98+
} catch (IOException e) {
99+
timeout.addSuppressed(e);
100+
Thread.sleep(1000);
101+
}
102+
}
103+
throw timeout;
104+
}
105+
76106
public static WorkerInstance startWorker() throws IOException, InterruptedException {
77107
return startWorker(/* useHttp= */ false);
78108
}
@@ -82,7 +112,6 @@ public static WorkerInstance startWorker(boolean useHttp)
82112
PathFragment testTmpDir = PathFragment.create(tmpDirFile().getAbsolutePath());
83113
PathFragment workPath = testTmpDir.getRelative("remote.work_path");
84114
PathFragment casPath = testTmpDir.getRelative("remote.cas_path");
85-
PathFragment pidPath = testTmpDir.getRelative("remote.pid_file");
86115
int workerPort = pickUnusedRandomPort();
87116
ensureMkdir(workPath);
88117
ensureMkdir(casPath);
@@ -94,20 +123,10 @@ public static WorkerInstance startWorker(boolean useHttp)
94123
workerPath,
95124
"--work_path=" + workPath.getSafePathString(),
96125
"--cas_path=" + casPath.getSafePathString(),
97-
(useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort,
98-
"--pid_file=" + pidPath))
126+
(useHttp ? "--http_listen_port=" : "--listen_port=") + workerPort))
99127
.start();
100-
101-
File pidFile = new File(pidPath.getSafePathString());
102-
while (!pidFile.exists()) {
103-
if (!workerProcess.isAlive()) {
104-
String message = new String(workerProcess.getErrorStream().readAllBytes(), UTF_8);
105-
throw new IOException("Failed to start worker: " + message);
106-
}
107-
Thread.sleep(1);
108-
}
109-
110-
return new WorkerInstance(workerProcess, workerPort, workPath, casPath, pidPath);
128+
waitForPortOpen(workerProcess, workerPort);
129+
return new WorkerInstance(workerProcess, workerPort, workPath, casPath);
111130
}
112131

113132
private static void ensureMkdir(PathFragment path) throws IOException {
@@ -125,23 +144,25 @@ public static class WorkerInstance {
125144
private final int port;
126145
private final PathFragment workPath;
127146
private final PathFragment casPath;
128-
private final PathFragment pidPath;
129147

130148
private WorkerInstance(
131-
Subprocess process,
132-
int port,
133-
PathFragment workPath,
134-
PathFragment casPath,
135-
PathFragment pidPath) {
149+
Subprocess process, int port, PathFragment workPath, PathFragment casPath) {
136150
this.process = process;
137151
this.port = port;
138152
this.workPath = workPath;
139153
this.casPath = casPath;
140-
this.pidPath = pidPath;
141154
}
142155

143-
public void stop() {
156+
public void stop() throws IOException {
144157
process.destroyAndWait();
158+
deleteDir(workPath);
159+
deleteDir(casPath);
160+
}
161+
162+
private static void deleteDir(PathFragment path) throws IOException {
163+
try (var stream = Files.walk(Paths.get(path.getSafePathString()))) {
164+
stream.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
165+
}
145166
}
146167

147168
public int getPort() {
@@ -155,9 +176,5 @@ public PathFragment getWorkPath() {
155176
public PathFragment getCasPath() {
156177
return casPath;
157178
}
158-
159-
public PathFragment getPidPath() {
160-
return pidPath;
161-
}
162179
}
163180
}

0 commit comments

Comments
 (0)