Skip to content

Commit 8b97bdf

Browse files
authoredJun 5, 2023
fix(cli): deployment gets stuck deploying stacks with shared assets (#25846)
In #25536 we introduced the new work graph orchestration for the CLI which had a bug when the same asset was shared by multiple stacks. #25719 was an attempt at fixing this bug, and while it fixed it for some cases it didn't fix all of them. The issue is that for cosmetic reasons, asset publishing steps inherit the dependencies of the stacks they are publishing for (so that the printed output of asset publishing doesn't interfere with the in-place updated progress bar of stack deployment and make it lose track of the terminal state). There were two bugs: * These extra dependencies could cause cycles (#25719 addressed direct cycles, where `Publish <--> Stack`, but not indirect cycles, where `Publish -> StackA -> StackB -> Publish`). * Publishing nodes would overwrite other nodes with the same ID, and the dependency set they would end up with would be somewhat nondeterministic, making this problem hard to isolate. Fixes #25806. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
1 parent 164a6bc commit 8b97bdf

File tree

4 files changed

+165
-90
lines changed

4 files changed

+165
-90
lines changed
 

‎packages/aws-cdk/lib/cdk-toolkit.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ export class CdkToolkit {
359359
const graphConcurrency: Concurrency = {
360360
'stack': concurrency,
361361
'asset-build': 1, // This will be CPU-bound/memory bound, mostly matters for Docker builds
362-
'asset-publish': options.assetParallelism ? 8 : 1, // This will be I/O-bound, 8 in parallel seems reasonable
362+
'asset-publish': (options.assetParallelism ?? true) ? 8 : 1, // This will be I/O-bound, 8 in parallel seems reasonable
363363
};
364364

365365
await workGraph.doParallel(graphConcurrency, {

‎packages/aws-cdk/lib/util/work-graph-builder.ts

+44-35
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ export class WorkGraphBuilder {
5050
id: buildId,
5151
dependencies: new Set([
5252
...this.getDepIds(assetArtifact.dependencies),
53-
// If we disable prebuild, then assets inherit dependencies from their parent stack
54-
...!this.prebuildAssets ? this.getDepIds(parentStack.dependencies) : [],
53+
// If we disable prebuild, then assets inherit (stack) dependencies from their parent stack
54+
...!this.prebuildAssets ? this.getDepIds(onlyStacks(parentStack.dependencies)) : [],
5555
]),
5656
parentStack,
5757
assetManifestArtifact: assetArtifact,
@@ -66,27 +66,35 @@ export class WorkGraphBuilder {
6666

6767
// Always add the publish
6868
const publishNodeId = `${this.idPrefix}${asset.id}-publish`;
69-
this.graph.addNodes({
70-
type: 'asset-publish',
71-
id: publishNodeId,
72-
dependencies: new Set([
73-
buildId,
74-
// The asset publish step also depends on the stacks that the parent depends on.
75-
// This is purely cosmetic: if we don't do this, the progress printing of asset publishing
76-
// is going to interfere with the progress bar of the stack deployment. We could remove this
77-
// for overall faster deployments if we ever have a better method of progress displaying.
78-
// Note: this may introduce a cycle if one of the parent's dependencies is another stack that
79-
// depends on this asset. To workaround this we remove these cycles once all nodes have
80-
// been added to the graph.
81-
...this.getDepIds(parentStack.dependencies.filter(cxapi.CloudFormationStackArtifact.isCloudFormationStackArtifact)),
82-
]),
83-
parentStack,
84-
assetManifestArtifact: assetArtifact,
85-
assetManifest,
86-
asset,
87-
deploymentState: DeploymentState.PENDING,
88-
priority: WorkGraphBuilder.PRIORITIES['asset-publish'],
89-
});
69+
70+
const publishNode = this.graph.tryGetNode(publishNodeId);
71+
if (!publishNode) {
72+
this.graph.addNodes({
73+
type: 'asset-publish',
74+
id: publishNodeId,
75+
dependencies: new Set([
76+
buildId,
77+
]),
78+
parentStack,
79+
assetManifestArtifact: assetArtifact,
80+
assetManifest,
81+
asset,
82+
deploymentState: DeploymentState.PENDING,
83+
priority: WorkGraphBuilder.PRIORITIES['asset-publish'],
84+
});
85+
}
86+
87+
for (const inheritedDep of this.getDepIds(onlyStacks(parentStack.dependencies))) {
88+
// The asset publish step also depends on the stacks that the parent depends on.
89+
// This is purely cosmetic: if we don't do this, the progress printing of asset publishing
90+
// is going to interfere with the progress bar of the stack deployment. We could remove this
91+
// for overall faster deployments if we ever have a better method of progress displaying.
92+
// Note: this may introduce a cycle if one of the parent's dependencies is another stack that
93+
// depends on this asset. To workaround this we remove these cycles once all nodes have
94+
// been added to the graph.
95+
this.graph.addDependency(publishNodeId, inheritedDep);
96+
}
97+
9098
// This will work whether the stack node has been added yet or not
9199
this.graph.addDependency(`${this.idPrefix}${parentStack.id}`, publishNodeId);
92100
}
@@ -137,20 +145,17 @@ export class WorkGraphBuilder {
137145
return ids;
138146
}
139147

148+
/**
149+
* We may have accidentally introduced cycles in an attempt to make the messages printed to the
150+
* console not interfere with each other too much. Remove them again.
151+
*/
140152
private removeStackPublishCycles() {
141-
const stacks = this.graph.nodesOfType('stack');
142-
for (const stack of stacks) {
143-
for (const dep of stack.dependencies) {
144-
const node = this.graph.nodes[dep];
145-
146-
if (!node || node.type !== 'asset-publish' || !node.dependencies.has(stack.id)) {
147-
continue;
153+
const publishSteps = this.graph.nodesOfType('asset-publish');
154+
for (const publishStep of publishSteps) {
155+
for (const dep of publishStep.dependencies) {
156+
if (this.graph.reachable(dep, publishStep.id)) {
157+
publishStep.dependencies.delete(dep);
148158
}
149-
150-
// Delete the dependency from the asset-publish onto the stack.
151-
// The publish -> stack dependencies are purely cosmetic to prevent publish output
152-
// from interfering with the progress bar of the stack deployment.
153-
node.dependencies.delete(stack.id);
154159
}
155160
}
156161
}
@@ -166,4 +171,8 @@ function stacksFromAssets(artifacts: cxapi.CloudArtifact[]) {
166171
}
167172

168173
return ret;
174+
}
175+
176+
function onlyStacks(artifacts: cxapi.CloudArtifact[]) {
177+
return artifacts.filter(cxapi.CloudFormationStackArtifact.isCloudFormationStackArtifact);
169178
}

‎packages/aws-cdk/lib/util/work-graph.ts

+73-27
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ export class WorkGraph {
1414

1515
public addNodes(...nodes: WorkNode[]) {
1616
for (const node of nodes) {
17+
if (this.nodes[node.id]) {
18+
throw new Error(`Duplicate use of node id: ${node.id}`);
19+
}
20+
1721
const ld = this.lazyDependencies.get(node.id);
1822
if (ld) {
1923
for (const x of ld) {
@@ -72,6 +76,10 @@ export class WorkGraph {
7276
lazyDeps.push(toId);
7377
}
7478

79+
public tryGetNode(id: string): WorkNode | undefined {
80+
return this.nodes[id];
81+
}
82+
7583
public node(id: string) {
7684
const ret = this.nodes[id];
7785
if (!ret) {
@@ -198,9 +206,24 @@ export class WorkGraph {
198206
}
199207

200208
public toString() {
201-
return Object.entries(this.nodes).map(([id, node]) =>
202-
`${id} := ${node.deploymentState} ${node.type} ${node.dependencies.size > 0 ? `(${Array.from(node.dependencies)})` : ''}`.trim(),
203-
).join(', ');
209+
return [
210+
'digraph D {',
211+
...Object.entries(this.nodes).flatMap(([id, node]) => renderNode(id, node)),
212+
'}',
213+
].join('\n');
214+
215+
function renderNode(id: string, node: WorkNode): string[] {
216+
const ret = [];
217+
if (node.deploymentState === DeploymentState.COMPLETED) {
218+
ret.push(` "${id}" [style=filled,fillcolor=yellow];`);
219+
} else {
220+
ret.push(` "${id}";`);
221+
}
222+
for (const dep of node.dependencies) {
223+
ret.push(` "${id}" -> "${dep}";`);
224+
}
225+
return ret;
226+
}
204227
}
205228

206229
/**
@@ -244,35 +267,28 @@ export class WorkGraph {
244267
}
245268

246269
private updateReadyPool() {
247-
let activeCount = 0;
248-
let pendingCount = 0;
249-
for (const node of Object.values(this.nodes)) {
250-
switch (node.deploymentState) {
251-
case DeploymentState.DEPLOYING:
252-
activeCount += 1;
253-
break;
254-
case DeploymentState.PENDING:
255-
pendingCount += 1;
256-
if (Array.from(node.dependencies).every((id) => this.node(id).deploymentState === DeploymentState.COMPLETED)) {
257-
node.deploymentState = DeploymentState.QUEUED;
258-
this.readyPool.push(node);
259-
}
260-
break;
261-
}
262-
}
270+
const activeCount = Object.values(this.nodes).filter((x) => x.deploymentState === DeploymentState.DEPLOYING).length;
271+
const pendingCount = Object.values(this.nodes).filter((x) => x.deploymentState === DeploymentState.PENDING).length;
263272

264-
for (let i = 0; i < this.readyPool.length; i++) {
265-
const node = this.readyPool[i];
266-
if (node.deploymentState !== DeploymentState.QUEUED) {
267-
this.readyPool.splice(i, 1);
268-
}
273+
const newlyReady = Object.values(this.nodes).filter((x) =>
274+
x.deploymentState === DeploymentState.PENDING &&
275+
Array.from(x.dependencies).every((id) => this.node(id).deploymentState === DeploymentState.COMPLETED));
276+
277+
// Add newly available nodes to the ready pool
278+
for (const node of newlyReady) {
279+
node.deploymentState = DeploymentState.QUEUED;
280+
this.readyPool.push(node);
269281
}
270282

283+
// Remove nodes from the ready pool that have already started deploying
284+
retainOnly(this.readyPool, (node) => node.deploymentState === DeploymentState.QUEUED);
285+
271286
// Sort by reverse priority
272287
this.readyPool.sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0));
273288

274289
if (this.readyPool.length === 0 && activeCount === 0 && pendingCount > 0) {
275-
throw new Error(`Unable to make progress anymore, dependency cycle between remaining artifacts: ${this.findCycle().join(' -> ')}`);
290+
const cycle = this.findCycle() ?? ['No cycle found!'];
291+
throw new Error(`Unable to make progress anymore, dependency cycle between remaining artifacts: ${cycle.join(' -> ')}`);
276292
}
277293
}
278294

@@ -289,14 +305,14 @@ export class WorkGraph {
289305
*
290306
* Not the fastest, but effective and should be rare
291307
*/
292-
private findCycle(): string[] {
308+
public findCycle(): string[] | undefined {
293309
const seen = new Set<string>();
294310
const self = this;
295311
for (const nodeId of Object.keys(this.nodes)) {
296312
const cycle = recurse(nodeId, [nodeId]);
297313
if (cycle) { return cycle; }
298314
}
299-
return ['No cycle found!'];
315+
return undefined;
300316

301317
function recurse(nodeId: string, path: string[]): string[] | undefined {
302318
if (seen.has(nodeId)) {
@@ -319,6 +335,32 @@ export class WorkGraph {
319335
}
320336
}
321337
}
338+
339+
/**
340+
* Whether the `end` node is reachable from the `start` node, following the dependency arrows
341+
*/
342+
public reachable(start: string, end: string): boolean {
343+
const seen = new Set<string>();
344+
const self = this;
345+
return recurse(start);
346+
347+
function recurse(current: string) {
348+
if (seen.has(current)) {
349+
return false;
350+
}
351+
seen.add(current);
352+
353+
if (current === end) {
354+
return true;
355+
}
356+
for (const dep of self.nodes[current].dependencies) {
357+
if (recurse(dep)) {
358+
return true;
359+
}
360+
}
361+
return false;
362+
}
363+
}
322364
}
323365

324366
export interface WorkGraphActions {
@@ -334,3 +376,7 @@ function sum(xs: number[]) {
334376
}
335377
return ret;
336378
}
379+
380+
function retainOnly<A>(xs: A[], pred: (x: A) => boolean) {
381+
xs.splice(0, xs.length, ...xs.filter(pred));
382+
}

‎packages/aws-cdk/test/work-graph-builder.test.ts

+47-27
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ test('dependencies on unselected artifacts are silently ignored', async () => {
109109
}));
110110
});
111111

112-
test('assets with shared contents between dependant stacks', async () => {
112+
describe('tests that use assets', () => {
113113
const files = {
114114
// Referencing an existing file on disk is important here.
115115
// It means these two assets will have the same AssetManifest
@@ -121,36 +121,56 @@ test('assets with shared contents between dependant stacks', async () => {
121121
},
122122
},
123123
};
124+
const environment = 'aws://11111/us-east-1';
124125

125-
addStack(rootBuilder, 'StackA', {
126-
environment: 'aws://11111/us-east-1',
127-
dependencies: ['StackA.assets'],
128-
});
129-
addAssets(rootBuilder, 'StackA.assets', { files });
126+
test('assets with shared contents between dependant stacks', async () => {
127+
addStack(rootBuilder, 'StackA', {
128+
environment: 'aws://11111/us-east-1',
129+
dependencies: ['StackA.assets'],
130+
});
131+
addAssets(rootBuilder, 'StackA.assets', { files });
130132

131-
addStack(rootBuilder, 'StackB', {
132-
environment: 'aws://11111/us-east-1',
133-
dependencies: ['StackB.assets', 'StackA'],
133+
addStack(rootBuilder, 'StackB', {
134+
environment: 'aws://11111/us-east-1',
135+
dependencies: ['StackB.assets', 'StackA'],
136+
});
137+
addAssets(rootBuilder, 'StackB.assets', { files });
138+
139+
const assembly = rootBuilder.buildAssembly();
140+
141+
const traversal: string[] = [];
142+
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);
143+
await graph.doParallel(1, {
144+
deployStack: async (node) => { traversal.push(node.id); },
145+
buildAsset: async (node) => { traversal.push(node.id); },
146+
publishAsset: async (node) => { traversal.push(node.id); },
147+
});
148+
149+
expect(traversal).toHaveLength(4); // 1 asset build, 1 asset publish, 2 stacks
150+
expect(traversal).toEqual([
151+
'work-graph-builder.test.js:D1-build',
152+
'work-graph-builder.test.js:D1-publish',
153+
'StackA',
154+
'StackB',
155+
]);
134156
});
135-
addAssets(rootBuilder, 'StackB.assets', { files });
136157

137-
const assembly = rootBuilder.buildAssembly();
158+
test('a more complex way to make a cycle', async () => {
159+
// A -> B -> C | A and C share an asset. The asset will have a dependency on B, that is not a *direct* reverse dependency, and will cause a cycle.
160+
addStack(rootBuilder, 'StackA', { environment, dependencies: ['StackA.assets', 'StackB'] });
161+
addAssets(rootBuilder, 'StackA.assets', { files });
138162

139-
const traversal: string[] = [];
140-
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);
141-
await graph.doParallel(1, {
142-
deployStack: async (node) => { traversal.push(node.id); },
143-
buildAsset: async (node) => { traversal.push(node.id); },
144-
publishAsset: async (node) => { traversal.push(node.id); },
145-
});
146-
147-
expect(traversal).toHaveLength(4); // 1 asset build, 1 asset publish, 2 stacks
148-
expect(traversal).toEqual([
149-
'work-graph-builder.test.js:D1-build',
150-
'work-graph-builder.test.js:D1-publish',
151-
'StackA',
152-
'StackB',
153-
]);
163+
addStack(rootBuilder, 'StackB', { environment, dependencies: ['StackC'] });
164+
165+
addStack(rootBuilder, 'StackC', { environment, dependencies: ['StackC.assets'] });
166+
addAssets(rootBuilder, 'StackC.assets', { files });
167+
168+
const assembly = rootBuilder.buildAssembly();
169+
const graph = new WorkGraphBuilder(true).build(assembly.artifacts);
170+
171+
// THEN
172+
expect(graph.findCycle()).toBeUndefined();
173+
});
154174
});
155175

156176
/**
@@ -230,4 +250,4 @@ function assertableNode<A extends WorkNode>(x: A) {
230250
...x,
231251
dependencies: Array.from(x.dependencies),
232252
};
233-
}
253+
}

0 commit comments

Comments
 (0)
Please sign in to comment.