Skip to content

Commit 41772d8

Browse files
committed
Merge pull request pypa#12799 from encukou/pypagh-12781-tar-hardlink
untar_file: remove common leading directory before unpacking
1 parent a432c7f commit 41772d8

File tree

4 files changed

+183
-2
lines changed

4 files changed

+183
-2
lines changed

news/12781.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix finding hardlink targets in tar files with an ignored top-level directory.

src/pip/_internal/utils/unpacking.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,19 @@ def untar_file(filename: str, location: str) -> None:
190190
else:
191191
default_mode_plus_executable = _get_default_mode_plus_executable()
192192

193+
if leading:
194+
# Strip the leading directory from all files in the archive,
195+
# including hardlink targets (which are relative to the
196+
# unpack location).
197+
for member in tar.getmembers():
198+
name_lead, name_rest = split_leading_dir(member.name)
199+
member.name = name_rest
200+
if member.islnk():
201+
lnk_lead, lnk_rest = split_leading_dir(member.linkname)
202+
if lnk_lead == name_lead:
203+
member.linkname = lnk_rest
204+
193205
def pip_filter(member: tarfile.TarInfo, path: str) -> tarfile.TarInfo:
194-
if leading:
195-
member.name = split_leading_dir(member.name)[1]
196206
orig_mode = member.mode
197207
try:
198208
try:

tests/functional/test_install.py

+129
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import hashlib
2+
import io
23
import os
34
import re
45
import ssl
56
import sys
67
import sysconfig
8+
import tarfile
79
import textwrap
810
from os.path import curdir, join, pardir
911
from pathlib import Path
@@ -2590,3 +2592,130 @@ def test_install_pip_prints_req_chain_pypi(script: PipTestEnvironment) -> None:
25902592
f"Collecting python-openid "
25912593
f"(from Paste[openid]==1.7.5.1->-r {req_path} (line 1))" in result.stdout
25922594
)
2595+
2596+
2597+
@pytest.mark.parametrize("common_prefix", ("", "linktest-1.0/"))
2598+
def test_install_sdist_links(script: PipTestEnvironment, common_prefix: str) -> None:
2599+
"""
2600+
Test installing an sdist with hard and symbolic links.
2601+
"""
2602+
2603+
# Build an unpack an sdist that contains data files:
2604+
# - root.dat
2605+
# - sub/inner.dat
2606+
# and links (symbolic and hard) to both of those, both in the top-level
2607+
# and 'sub/' directories. That's 8 links total.
2608+
2609+
# We build the sdist from in-memory data, since the filesystem
2610+
# might not support both kinds of links.
2611+
2612+
sdist_path = script.scratch_path.joinpath("linktest-1.0.tar.gz")
2613+
2614+
def add_file(tar: tarfile.TarFile, name: str, content: str) -> None:
2615+
info = tarfile.TarInfo(common_prefix + name)
2616+
content_bytes = content.encode("utf-8")
2617+
info.size = len(content_bytes)
2618+
tar.addfile(info, io.BytesIO(content_bytes))
2619+
2620+
def add_link(tar: tarfile.TarFile, name: str, linktype: str, target: str) -> None:
2621+
info = tarfile.TarInfo(common_prefix + name)
2622+
info.type = {"sym": tarfile.SYMTYPE, "hard": tarfile.LNKTYPE}[linktype]
2623+
info.linkname = target
2624+
tar.addfile(info)
2625+
2626+
with tarfile.open(sdist_path, "w:gz") as sdist_tar:
2627+
add_file(
2628+
sdist_tar,
2629+
"PKG-INFO",
2630+
textwrap.dedent(
2631+
"""
2632+
Metadata-Version: 2.1
2633+
Name: linktest
2634+
Version: 1.0
2635+
"""
2636+
),
2637+
)
2638+
2639+
add_file(sdist_tar, "src/linktest/__init__.py", "")
2640+
add_file(sdist_tar, "src/linktest/root.dat", "Data")
2641+
add_file(sdist_tar, "src/linktest/sub/__init__.py", "")
2642+
add_file(sdist_tar, "src/linktest/sub/inner.dat", "Data")
2643+
linknames = []
2644+
2645+
# Windows requires native path separators in symlink targets.
2646+
# (see https://github.com/python/cpython/issues/57911)
2647+
# (This is not needed for hardlinks, nor for the workaround tarfile
2648+
# uses if symlinking is disabled.)
2649+
SEP = os.path.sep
2650+
2651+
pkg_root = f"{common_prefix}src/linktest"
2652+
for prefix, target_tag, linktype, target in [
2653+
("", "root", "sym", "root.dat"),
2654+
("", "root", "hard", f"{pkg_root}/root.dat"),
2655+
("", "inner", "sym", f"sub{SEP}inner.dat"),
2656+
("", "inner", "hard", f"{pkg_root}/sub/inner.dat"),
2657+
("sub/", "root", "sym", f"..{SEP}root.dat"),
2658+
("sub/", "root", "hard", f"{pkg_root}/root.dat"),
2659+
("sub/", "inner", "sym", "inner.dat"),
2660+
("sub/", "inner", "hard", f"{pkg_root}/sub/inner.dat"),
2661+
]:
2662+
name = f"{prefix}link.{target_tag}.{linktype}.dat"
2663+
add_link(sdist_tar, "src/linktest/" + name, linktype, target)
2664+
linknames.append(name)
2665+
2666+
add_file(
2667+
sdist_tar,
2668+
"pyproject.toml",
2669+
textwrap.dedent(
2670+
"""
2671+
[build-system]
2672+
requires = ["setuptools"]
2673+
build-backend = "setuptools.build_meta"
2674+
[project]
2675+
name = "linktest"
2676+
version = "1.0"
2677+
[tool.setuptools]
2678+
include-package-data = true
2679+
[tool.setuptools.packages.find]
2680+
where = ["src"]
2681+
[tool.setuptools.package-data]
2682+
"*" = ["*.dat"]
2683+
"""
2684+
),
2685+
)
2686+
2687+
add_file(
2688+
sdist_tar,
2689+
"src/linktest/__main__.py",
2690+
textwrap.dedent(
2691+
f"""
2692+
from pathlib import Path
2693+
linknames = {linknames!r}
2694+
2695+
# we could use importlib.resources here once
2696+
# it has stable convenient API across supported versions
2697+
res_path = Path(__file__).parent
2698+
2699+
for name in linknames:
2700+
data_text = res_path.joinpath(name).read_text()
2701+
assert data_text == "Data"
2702+
print(str(len(linknames)) + ' files checked')
2703+
"""
2704+
),
2705+
)
2706+
2707+
# Show sdist content, for debugging the test
2708+
result = script.run("python", "-m", "tarfile", "-vl", str(sdist_path))
2709+
print(result)
2710+
2711+
# Install the package
2712+
result = script.pip("install", str(sdist_path))
2713+
print(result)
2714+
2715+
# Show installed content, for debugging the test
2716+
result = script.pip("show", "-f", "linktest")
2717+
print(result)
2718+
2719+
# Run the internal test
2720+
result = script.run("python", "-m", "linktest")
2721+
assert result.stdout.strip() == "8 files checked"

tests/unit/test_utils_unpacking.py

+41
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,47 @@ def test_unpack_tar_filter(self) -> None:
197197

198198
assert "is outside the destination" in str(e.value)
199199

200+
@pytest.mark.parametrize(
201+
("input_prefix", "unpack_prefix"),
202+
[
203+
("", ""),
204+
("dir/", ""), # pip ignores a common leading directory
205+
("dir/sub/", "sub/"), # pip ignores *one* common leading directory
206+
],
207+
)
208+
def test_unpack_tar_links(self, input_prefix: str, unpack_prefix: str) -> None:
209+
"""
210+
Test unpacking a *.tar with file containing hard & soft links
211+
"""
212+
test_tar = os.path.join(self.tempdir, "test_tar_links.tar")
213+
content = b"file content"
214+
with tarfile.open(test_tar, "w") as mytar:
215+
file_tarinfo = tarfile.TarInfo(input_prefix + "regular_file.txt")
216+
file_tarinfo.size = len(content)
217+
mytar.addfile(file_tarinfo, io.BytesIO(content))
218+
219+
hardlink_tarinfo = tarfile.TarInfo(input_prefix + "hardlink.txt")
220+
hardlink_tarinfo.type = tarfile.LNKTYPE
221+
hardlink_tarinfo.linkname = input_prefix + "regular_file.txt"
222+
mytar.addfile(hardlink_tarinfo)
223+
224+
symlink_tarinfo = tarfile.TarInfo(input_prefix + "symlink.txt")
225+
symlink_tarinfo.type = tarfile.SYMTYPE
226+
symlink_tarinfo.linkname = "regular_file.txt"
227+
mytar.addfile(symlink_tarinfo)
228+
229+
untar_file(test_tar, self.tempdir)
230+
231+
unpack_dir = os.path.join(self.tempdir, unpack_prefix)
232+
with open(os.path.join(unpack_dir, "regular_file.txt"), "rb") as f:
233+
assert f.read() == content
234+
235+
with open(os.path.join(unpack_dir, "hardlink.txt"), "rb") as f:
236+
assert f.read() == content
237+
238+
with open(os.path.join(unpack_dir, "symlink.txt"), "rb") as f:
239+
assert f.read() == content
240+
200241

201242
def test_unpack_tar_unicode(tmpdir: Path) -> None:
202243
test_tar = tmpdir / "test.tar"

0 commit comments

Comments
 (0)