Skip to content

Commit 755cf95

Browse files
ShreeM01joeljeske
andauthored
[6.2.0]Allow remote retry max delay to be user configurable (#18061)
* Allow remote retry max delay to be user configurable This introduces a new option `--remote_retry_max_delay` can be used to change the existing maximum exponential backoff interval used when retrying remote requests. Before this change, there was a hardcoded value controlling this maximum exponential backoff interval, set to `5s`. Rational `remote_retries` is useful in masking over temporary disruptions to a remote cluster. If a cluster experiences temporary downtime, it is useful to allow bazel clients to wait for a period of time for the cluster to recover before bailing and giving up. If users cannot configure the maximum exponential backoff delay, one must set a large number for `remote_retries`, each retry eventually waiting for up to 5s. This allows the bazel client to wait for a reasonable amount of time for the cluster to recover. The problem here is that under certain cluster failure modes, requests may not be handled and failed quickly, rather they may wait until `remote_timeout` before failing. A large `remote_timeout` combined with a large `remote_retries` could lead to waiting for a very long time before finally bailing on a given action. If a user can bump the `remote_retry_max_delay`, they can control the retry waiting semantics to their own needs. Closes #16058. PiperOrigin-RevId: 523680725 Change-Id: I21daba78b91d3157362ca85bb7b1cbbef8a94bb3 * Replace RemoteDurationConverter with RemoteTimeoutConverter --------- Co-authored-by: Joel Jeske <[email protected]>
1 parent 1940dfb commit 755cf95

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

src/main/java/com/google/devtools/build/lib/remote/RemoteRetrier.java

+5-3
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
package com.google.devtools.build.lib.remote;
1616

17+
import static java.lang.Math.max;
18+
1719
import com.google.common.annotations.VisibleForTesting;
1820
import com.google.common.base.Preconditions;
1921
import com.google.common.base.Throwables;
@@ -158,16 +160,16 @@ public static class ExponentialBackoff implements Backoff {
158160
Preconditions.checkArgument(jitter >= 0 && jitter <= 1, "jitter must be in the range (0, 1)");
159161
Preconditions.checkArgument(maxAttempts >= 0, "maxAttempts must be >= 0");
160162
nextDelayMillis = initial.toMillis();
161-
maxMillis = max.toMillis();
163+
maxMillis = max(max.toMillis(), nextDelayMillis);
162164
this.multiplier = multiplier;
163165
this.jitter = jitter;
164166
this.maxAttempts = maxAttempts;
165167
}
166168

167169
public ExponentialBackoff(RemoteOptions options) {
168170
this(
169-
/* initial = */ Duration.ofMillis(100),
170-
/* max = */ Duration.ofSeconds(5),
171+
/* initial= */ Duration.ofMillis(100),
172+
/* max= */ options.remoteRetryMaxDelay,
171173
/* multiplier= */ 2,
172174
/* jitter= */ 0.1,
173175
options.remoteMaxRetryAttempts);

src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java

+12
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,18 @@ public RemoteBuildEventUploadModeConverter() {
362362
+ "If set to 0, retries are disabled.")
363363
public int remoteMaxRetryAttempts;
364364

365+
@Option(
366+
name = "remote_retry_max_delay",
367+
defaultValue = "5s",
368+
documentationCategory = OptionDocumentationCategory.REMOTE,
369+
effectTags = {OptionEffectTag.UNKNOWN},
370+
converter = RemoteTimeoutConverter.class,
371+
help =
372+
"The maximum backoff delay between remote retry attempts. Following units can be used:"
373+
+ " Days (d), hours (h), minutes (m), seconds (s), and milliseconds (ms). If"
374+
+ " the unit is omitted, the value is interpreted as seconds.")
375+
public Duration remoteRetryMaxDelay;
376+
365377
@Option(
366378
name = "disk_cache",
367379
defaultValue = "null",

0 commit comments

Comments
 (0)