Skip to content

Commit 50274a9

Browse files
[5.x] Remote: Add support for compression on gRPC cache (bazelbuild#14277)
* Add patch files for zstd-jni Partial commit for third_party/*, see bazelbuild#14203. Closes bazelbuild#14203 Signed-off-by: Yun Peng <[email protected]> * Remote: Add support for compression on gRPC cache Add support for compressed transfers from/to gRPC remote caches with flag --experimental_remote_cache_compression. Fixes bazelbuild#13344. Closes bazelbuild#14041. PiperOrigin-RevId: 409328001 Co-authored-by: Alessandro Patti <[email protected]>
1 parent 5c865ad commit 50274a9

25 files changed

+1093
-86
lines changed

BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ pkg_tar(
8181
"@com_google_protobuf//:protobuf_java",
8282
"@com_google_protobuf//:protobuf_java_util",
8383
"@com_google_protobuf//:protobuf_javalite",
84+
"@zstd-jni//:zstd-jni",
8485
],
8586
package_dir = "derived/jars",
8687
strip_prefix = "external",

WORKSPACE

+8
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,14 @@ dist_http_archive(
301301
patch_cmds_win = EXPORT_WORKSPACE_IN_BUILD_FILE_WIN,
302302
)
303303

304+
dist_http_archive(
305+
name = "zstd-jni",
306+
patch_cmds = EXPORT_WORKSPACE_IN_BUILD_BAZEL_FILE,
307+
patch_cmds_win = EXPORT_WORKSPACE_IN_BUILD_BAZEL_FILE_WIN,
308+
build_file = "//third_party:zstd-jni/zstd-jni.BUILD",
309+
strip_prefix = "zstd-jni-1.5.0-4"
310+
)
311+
304312
http_archive(
305313
name = "org_snakeyaml",
306314
build_file_content = """

distdir_deps.bzl

+15
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,21 @@ DIST_DEPS = {
172172
"test_WORKSPACE_files",
173173
],
174174
},
175+
"zstd-jni": {
176+
"archive": "v1.5.0-4.zip",
177+
"patch_args": ["-p1"],
178+
"patches": [
179+
"//third_party:zstd-jni/Native.java.patch",
180+
],
181+
"sha256": "d320d59b89a163c5efccbe4915ae6a49883ce653cdc670643dfa21c6063108e4",
182+
"urls": [
183+
"https://mirror.bazel.build/github.com/luben/zstd-jni/archive/v1.5.0-4.zip",
184+
"https://github.com/luben/zstd-jni/archive/v1.5.0-4.zip",
185+
],
186+
"used_in": [
187+
"additional_distfiles",
188+
],
189+
},
175190
###################################################
176191
#
177192
# Build time dependencies for testing and packaging

src/main/java/com/google/devtools/build/lib/remote/BUILD

+3
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ filegroup(
1616
"//src/main/java/com/google/devtools/build/lib/remote/merkletree:srcs",
1717
"//src/main/java/com/google/devtools/build/lib/remote/options:srcs",
1818
"//src/main/java/com/google/devtools/build/lib/remote/util:srcs",
19+
"//src/main/java/com/google/devtools/build/lib/remote/zstd:srcs",
1920
],
2021
visibility = ["//src:__subpackages__"],
2122
)
@@ -81,6 +82,7 @@ java_library(
8182
"//src/main/java/com/google/devtools/build/lib/remote/merkletree",
8283
"//src/main/java/com/google/devtools/build/lib/remote/options",
8384
"//src/main/java/com/google/devtools/build/lib/remote/util",
85+
"//src/main/java/com/google/devtools/build/lib/remote/zstd",
8486
"//src/main/java/com/google/devtools/build/lib/sandbox",
8587
"//src/main/java/com/google/devtools/build/lib/skyframe:mutable_supplier",
8688
"//src/main/java/com/google/devtools/build/lib/skyframe:tree_artifact_value",
@@ -94,6 +96,7 @@ java_library(
9496
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
9597
"//src/main/java/com/google/devtools/common/options",
9698
"//src/main/protobuf:failure_details_java_proto",
99+
"//third_party:apache_commons_compress",
97100
"//third_party:auth",
98101
"//third_party:caffeine",
99102
"//third_party:flogger",

src/main/java/com/google/devtools/build/lib/remote/ByteStreamUploader.java

+35-23
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import static com.google.common.base.Preconditions.checkArgument;
1717
import static com.google.common.base.Preconditions.checkState;
18+
import static com.google.common.util.concurrent.Futures.immediateVoidFuture;
1819
import static java.lang.String.format;
1920
import static java.util.Collections.singletonMap;
2021
import static java.util.concurrent.TimeUnit.SECONDS;
@@ -298,9 +299,11 @@ boolean uploadsInProgress() {
298299
}
299300
}
300301

301-
private static String buildUploadResourceName(String instanceName, UUID uuid, Digest digest) {
302-
String resourceName =
303-
format("uploads/%s/blobs/%s/%d", uuid, digest.getHash(), digest.getSizeBytes());
302+
private static String buildUploadResourceName(
303+
String instanceName, UUID uuid, Digest digest, boolean compressed) {
304+
String template =
305+
compressed ? "uploads/%s/compressed-blobs/zstd/%s/%d" : "uploads/%s/blobs/%s/%d";
306+
String resourceName = format(template, uuid, digest.getHash(), digest.getSizeBytes());
304307
if (!Strings.isNullOrEmpty(instanceName)) {
305308
resourceName = instanceName + "/" + resourceName;
306309
}
@@ -325,7 +328,8 @@ private ListenableFuture<Void> startAsyncUpload(
325328
}
326329

327330
UUID uploadId = UUID.randomUUID();
328-
String resourceName = buildUploadResourceName(instanceName, uploadId, digest);
331+
String resourceName =
332+
buildUploadResourceName(instanceName, uploadId, digest, chunker.isCompressed());
329333
AsyncUpload newUpload =
330334
new AsyncUpload(
331335
context,
@@ -405,7 +409,20 @@ ListenableFuture<Void> start() {
405409
() ->
406410
retrier.executeAsync(
407411
() -> {
408-
if (committedOffset.get() < chunker.getSize()) {
412+
if (chunker.getSize() == 0) {
413+
return immediateVoidFuture();
414+
}
415+
try {
416+
chunker.seek(committedOffset.get());
417+
} catch (IOException e) {
418+
try {
419+
chunker.reset();
420+
} catch (IOException resetException) {
421+
e.addSuppressed(resetException);
422+
}
423+
return Futures.immediateFailedFuture(e);
424+
}
425+
if (chunker.hasNext()) {
409426
return callAndQueryOnFailure(committedOffset, progressiveBackoff);
410427
}
411428
return Futures.immediateFuture(null);
@@ -416,13 +433,19 @@ ListenableFuture<Void> start() {
416433
return Futures.transformAsync(
417434
callFuture,
418435
(result) -> {
419-
long committedSize = committedOffset.get();
420-
long expected = chunker.getSize();
421-
if (committedSize != expected) {
422-
String message =
423-
format(
424-
"write incomplete: committed_size %d for %d total", committedSize, expected);
425-
return Futures.immediateFailedFuture(new IOException(message));
436+
if (!chunker.hasNext()) {
437+
// Only check for matching committed size if we have completed the upload.
438+
// If another client did, they might have used a different compression
439+
// level/algorithm, so we cannot know the expected committed offset
440+
long committedSize = committedOffset.get();
441+
long expected = chunker.getOffset();
442+
if (!chunker.hasNext() && committedSize != expected) {
443+
String message =
444+
format(
445+
"write incomplete: committed_size %d for %d total",
446+
committedSize, expected);
447+
return Futures.immediateFailedFuture(new IOException(message));
448+
}
426449
}
427450
return Futures.immediateFuture(null);
428451
},
@@ -517,17 +540,6 @@ private ListenableFuture<Void> call(AtomicLong committedOffset) {
517540
.withDeadlineAfter(callTimeoutSecs, SECONDS);
518541
call = channel.newCall(ByteStreamGrpc.getWriteMethod(), callOptions);
519542

520-
try {
521-
chunker.seek(committedOffset.get());
522-
} catch (IOException e) {
523-
try {
524-
chunker.reset();
525-
} catch (IOException resetException) {
526-
e.addSuppressed(resetException);
527-
}
528-
return Futures.immediateFailedFuture(e);
529-
}
530-
531543
SettableFuture<Void> uploadResult = SettableFuture.create();
532544
ClientCall.Listener<WriteResponse> callListener =
533545
new ClientCall.Listener<WriteResponse>() {

src/main/java/com/google/devtools/build/lib/remote/Chunker.java

+77-30
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,21 @@
1616

1717
import static com.google.common.base.Preconditions.checkNotNull;
1818
import static com.google.common.base.Preconditions.checkState;
19+
import static java.lang.Math.min;
1920

2021
import com.google.common.annotations.VisibleForTesting;
2122
import com.google.common.base.Throwables;
2223
import com.google.common.io.ByteStreams;
2324
import com.google.devtools.build.lib.actions.ActionInput;
2425
import com.google.devtools.build.lib.actions.ActionInputHelper;
2526
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
27+
import com.google.devtools.build.lib.remote.zstd.ZstdCompressingInputStream;
2628
import com.google.devtools.build.lib.vfs.Path;
2729
import com.google.protobuf.ByteString;
2830
import java.io.ByteArrayInputStream;
29-
import java.io.EOFException;
3031
import java.io.IOException;
3132
import java.io.InputStream;
33+
import java.io.PushbackInputStream;
3234
import java.util.NoSuchElementException;
3335
import java.util.Objects;
3436
import java.util.function.Supplier;
@@ -55,6 +57,10 @@ static int getDefaultChunkSize() {
5557
return defaultChunkSize;
5658
}
5759

60+
public boolean isCompressed() {
61+
return compressed;
62+
}
63+
5864
/** A piece of a byte[] blob. */
5965
public static final class Chunk {
6066

@@ -98,19 +104,22 @@ public int hashCode() {
98104
private final int chunkSize;
99105
private final Chunk emptyChunk;
100106

101-
private InputStream data;
107+
private ChunkerInputStream data;
102108
private long offset;
103109
private byte[] chunkCache;
104110

111+
private final boolean compressed;
112+
105113
// Set to true on the first call to next(). This is so that the Chunker can open its data source
106114
// lazily on the first call to next(), as opposed to opening it in the constructor or on reset().
107115
private boolean initialized;
108116

109-
Chunker(Supplier<InputStream> dataSupplier, long size, int chunkSize) {
117+
Chunker(Supplier<InputStream> dataSupplier, long size, int chunkSize, boolean compressed) {
110118
this.dataSupplier = checkNotNull(dataSupplier);
111119
this.size = size;
112120
this.chunkSize = chunkSize;
113121
this.emptyChunk = new Chunk(ByteString.EMPTY, 0);
122+
this.compressed = compressed;
114123
}
115124

116125
public long getOffset() {
@@ -127,13 +136,9 @@ public long getSize() {
127136
* <p>Closes any open resources (file handles, ...).
128137
*/
129138
public void reset() throws IOException {
130-
if (data != null) {
131-
data.close();
132-
}
133-
data = null;
139+
close();
134140
offset = 0;
135141
initialized = false;
136-
chunkCache = null;
137142
}
138143

139144
/**
@@ -148,6 +153,9 @@ public void seek(long toOffset) throws IOException {
148153
maybeInitialize();
149154
ByteStreams.skipFully(data, toOffset - offset);
150155
offset = toOffset;
156+
if (data.finished()) {
157+
close();
158+
}
151159
}
152160

153161
/**
@@ -157,6 +165,27 @@ public boolean hasNext() {
157165
return data != null || !initialized;
158166
}
159167

168+
/** Closes the input stream and reset chunk cache */
169+
private void close() throws IOException {
170+
if (data != null) {
171+
data.close();
172+
data = null;
173+
}
174+
chunkCache = null;
175+
}
176+
177+
/** Attempts reading at most a full chunk and stores it in the chunkCache buffer */
178+
private int read() throws IOException {
179+
int count = 0;
180+
while (count < chunkCache.length) {
181+
int c = data.read(chunkCache, count, chunkCache.length - count);
182+
if (c < 0) {
183+
break;
184+
}
185+
count += c;
186+
}
187+
return count;
188+
}
160189
/**
161190
* Returns the next {@link Chunk} or throws a {@link NoSuchElementException} if no data is left.
162191
*
@@ -178,46 +207,40 @@ public Chunk next() throws IOException {
178207
return emptyChunk;
179208
}
180209

181-
// The cast to int is safe, because the return value is capped at chunkSize.
182-
int bytesToRead = (int) Math.min(bytesLeft(), chunkSize);
183-
if (bytesToRead == 0) {
210+
if (data.finished()) {
184211
chunkCache = null;
185212
data = null;
186213
throw new NoSuchElementException();
187214
}
188215

189216
if (chunkCache == null) {
217+
// If the output is compressed we can't know how many bytes there are yet to read,
218+
// so we allocate the whole chunkSize, otherwise we try to compute the smallest possible value
219+
// The cast to int is safe, because the return value is capped at chunkSize.
220+
int cacheSize = compressed ? chunkSize : (int) min(getSize() - getOffset(), chunkSize);
190221
// Lazily allocate it in order to save memory on small data.
191222
// 1) bytesToRead < chunkSize: There will only ever be one next() call.
192223
// 2) bytesToRead == chunkSize: chunkCache will be set to its biggest possible value.
193224
// 3) bytestoRead > chunkSize: Not possible, due to Math.min above.
194-
chunkCache = new byte[bytesToRead];
225+
chunkCache = new byte[cacheSize];
195226
}
196227

197228
long offsetBefore = offset;
198-
try {
199-
ByteStreams.readFully(data, chunkCache, 0, bytesToRead);
200-
} catch (EOFException e) {
201-
throw new IllegalStateException("Reached EOF, but expected "
202-
+ bytesToRead + " bytes.", e);
203-
}
204-
offset += bytesToRead;
205229

206-
ByteString blob = ByteString.copyFrom(chunkCache, 0, bytesToRead);
230+
int bytesRead = read();
207231

208-
if (bytesLeft() == 0) {
209-
data.close();
210-
data = null;
211-
chunkCache = null;
232+
ByteString blob = ByteString.copyFrom(chunkCache, 0, bytesRead);
233+
234+
// This has to happen after actualSize has been updated
235+
// or the guard in getActualSize won't work.
236+
offset += bytesRead;
237+
if (data.finished()) {
238+
close();
212239
}
213240

214241
return new Chunk(blob, offsetBefore);
215242
}
216243

217-
public long bytesLeft() {
218-
return getSize() - getOffset();
219-
}
220-
221244
private void maybeInitialize() throws IOException {
222245
if (initialized) {
223246
return;
@@ -226,7 +249,10 @@ private void maybeInitialize() throws IOException {
226249
checkState(offset == 0);
227250
checkState(chunkCache == null);
228251
try {
229-
data = dataSupplier.get();
252+
data =
253+
compressed
254+
? new ChunkerInputStream(new ZstdCompressingInputStream(dataSupplier.get()))
255+
: new ChunkerInputStream(dataSupplier.get());
230256
} catch (RuntimeException e) {
231257
Throwables.propagateIfPossible(e.getCause(), IOException.class);
232258
throw e;
@@ -242,6 +268,7 @@ public static Builder builder() {
242268
public static class Builder {
243269
private int chunkSize = getDefaultChunkSize();
244270
private long size;
271+
private boolean compressed;
245272
private Supplier<InputStream> inputStream;
246273

247274
public Builder setInput(byte[] data) {
@@ -251,6 +278,11 @@ public Builder setInput(byte[] data) {
251278
return this;
252279
}
253280

281+
public Builder setCompressed(boolean compressed) {
282+
this.compressed = compressed;
283+
return this;
284+
}
285+
254286
public Builder setInput(long size, InputStream in) {
255287
checkState(inputStream == null);
256288
checkNotNull(in);
@@ -305,7 +337,22 @@ public Builder setChunkSize(int chunkSize) {
305337

306338
public Chunker build() {
307339
checkNotNull(inputStream);
308-
return new Chunker(inputStream, size, chunkSize);
340+
return new Chunker(inputStream, size, chunkSize, compressed);
341+
}
342+
}
343+
344+
static class ChunkerInputStream extends PushbackInputStream {
345+
ChunkerInputStream(InputStream in) {
346+
super(in);
347+
}
348+
349+
public boolean finished() throws IOException {
350+
int c = super.read();
351+
if (c == -1) {
352+
return true;
353+
}
354+
super.unread(c);
355+
return false;
309356
}
310357
}
311358
}

0 commit comments

Comments
 (0)