Skip to content

Commit 5e7cc16

Browse files
Fix parallel pip cache downloads causing crash (#12364)
Co-authored-by: Itamar Turner-Trauring <[email protected]>
1 parent 8a0f77c commit 5e7cc16

File tree

3 files changed

+36
-4
lines changed

3 files changed

+36
-4
lines changed

news/12361.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix bug where installing the same package at the same time with multiple pip processes could fail.

src/pip/_internal/network/cache.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,18 @@ class SafeFileCache(SeparateBodyBaseCache):
3333
"""
3434
A file based cache which is safe to use even when the target directory may
3535
not be accessible or writable.
36+
37+
There is a race condition when two processes try to write and/or read the
38+
same entry at the same time, since each entry consists of two separate
39+
files (https://github.com/psf/cachecontrol/issues/324). We therefore have
40+
additional logic that makes sure that both files to be present before
41+
returning an entry; this fixes the read side of the race condition.
42+
43+
For the write side, we assume that the server will only ever return the
44+
same data for the same URL, which ought to be the case for files pip is
45+
downloading. PyPI does not have a mechanism to swap out a wheel for
46+
another wheel, for example. If this assumption is not true, the
47+
CacheControl issue will need to be fixed.
3648
"""
3749

3850
def __init__(self, directory: str) -> None:
@@ -49,9 +61,13 @@ def _get_cache_path(self, name: str) -> str:
4961
return os.path.join(self.directory, *parts)
5062

5163
def get(self, key: str) -> Optional[bytes]:
52-
path = self._get_cache_path(key)
64+
# The cache entry is only valid if both metadata and body exist.
65+
metadata_path = self._get_cache_path(key)
66+
body_path = metadata_path + ".body"
67+
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
68+
return None
5369
with suppressed_cache_errors():
54-
with open(path, "rb") as f:
70+
with open(metadata_path, "rb") as f:
5571
return f.read()
5672

5773
def _write(self, path: str, data: bytes) -> None:
@@ -77,9 +93,13 @@ def delete(self, key: str) -> None:
7793
os.remove(path + ".body")
7894

7995
def get_body(self, key: str) -> Optional[BinaryIO]:
80-
path = self._get_cache_path(key) + ".body"
96+
# The cache entry is only valid if both metadata and body exist.
97+
metadata_path = self._get_cache_path(key)
98+
body_path = metadata_path + ".body"
99+
if not (os.path.exists(metadata_path) and os.path.exists(body_path)):
100+
return None
81101
with suppressed_cache_errors():
82-
return open(path, "rb")
102+
return open(body_path, "rb")
83103

84104
def set_body(self, key: str, body: bytes) -> None:
85105
path = self._get_cache_path(key) + ".body"

tests/unit/test_network_cache.py

+11
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ def test_cache_roundtrip(self, cache_tmpdir: Path) -> None:
2727
cache = SafeFileCache(os.fspath(cache_tmpdir))
2828
assert cache.get("test key") is None
2929
cache.set("test key", b"a test string")
30+
# Body hasn't been stored yet, so the entry isn't valid yet
31+
assert cache.get("test key") is None
32+
33+
# With a body, the cache entry is valid:
34+
cache.set_body("test key", b"body")
3035
assert cache.get("test key") == b"a test string"
3136
cache.delete("test key")
3237
assert cache.get("test key") is None
@@ -35,6 +40,12 @@ def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None:
3540
cache = SafeFileCache(os.fspath(cache_tmpdir))
3641
assert cache.get_body("test key") is None
3742
cache.set_body("test key", b"a test string")
43+
# Metadata isn't available, so the entry isn't valid yet (this
44+
# shouldn't happen, but just in case)
45+
assert cache.get_body("test key") is None
46+
47+
# With metadata, the cache entry is valid:
48+
cache.set("test key", b"metadata")
3849
body = cache.get_body("test key")
3950
assert body is not None
4051
with body:

0 commit comments

Comments
 (0)