(original) (raw)

diff -r 031fc0231f3d Doc/library/tarfile.rst --- a/Doc/library/tarfile.rst Thu Jan 15 22:53:21 2015 +0100 +++ b/Doc/library/tarfile.rst Wed Jan 21 09:34:05 2015 +0100 @@ -346,7 +346,7 @@ available. -.. method:: TarFile.extractall(path=".", members=None) +.. method:: TarFile.extractall(path=".", members=None, numeric_owner=False) Extract all members from the archive to the current working directory or directory *path*. If optional *members* is given, it must be a subset of the @@ -356,6 +356,9 @@ reset each time a file is created in it. And, if a directory's permissions do not allow writing, extracting files to it will fail. + If *numeric_owner* is used only the (uid, gid) numbers are used to + set the owner/group for the extracted files. + .. warning:: Never extract archives from untrusted sources without prior inspection. @@ -364,13 +367,14 @@ dots ``".."``. -.. method:: TarFile.extract(member, path="", set_attrs=True) +.. method:: TarFile.extract(member, path="", set_attrs=True, numeric_owner=False) Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. *member* may be a filename or a :class:`TarInfo` object. You can specify a different directory using *path*. File attributes (owner, mtime, mode) are set unless - *set_attrs* is false. + *set_attrs* is false. If *numeric_owner* is used only the (uid, gid) numbers + are used to set the owner/group for the extracted files. .. note:: diff -r 031fc0231f3d Lib/tarfile.py --- a/Lib/tarfile.py Thu Jan 15 22:53:21 2015 +0100 +++ b/Lib/tarfile.py Wed Jan 21 09:34:05 2015 +0100 @@ -1961,12 +1961,13 @@ self.members.append(tarinfo) - def extractall(self, path=".", members=None): + def extractall(self, path=".", members=None, numeric_owner=False): """Extract all members from the archive to the current working directory and set owner, modification time and permissions on directories afterwards. `path' specifies a different directory to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). + list returned by getmembers(). If `numeric_owner` is used, only + the numbers for user/group names are used and not the names. """ directories = [] @@ -1980,7 +1981,8 @@ tarinfo = copy.copy(tarinfo) tarinfo.mode = 0o700 # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) + self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), + numeric_owner=numeric_owner) # Reverse sort directories. directories.sort(key=lambda a: a.name) @@ -1999,12 +2001,14 @@ else: self._dbg(1, "tarfile: %s" % e) - def extract(self, member, path="", set_attrs=True): + def extract(self, member, path="", set_attrs=True, numeric_owner=False): """Extract a member from the archive to the current working directory, using its full name. Its file information is extracted as accurately as possible. `member' may be a filename or a TarInfo object. You can specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. + mtime, mode) are set unless `set_attrs' is False. If `numeric_owner` + is used, only the numbers for user/group names are used and not + the names. """ self._check("r") @@ -2019,7 +2023,8 @@ try: self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) + set_attrs=set_attrs, + numeric_owner=numeric_owner) except OSError as e: if self.errorlevel > 0: raise @@ -2065,7 +2070,8 @@ # blkdev, etc.), return None instead of a file object. return None - def _extract_member(self, tarinfo, targetpath, set_attrs=True): + def _extract_member(self, tarinfo, targetpath, set_attrs=True, + numeric_owner=False): """Extract the TarInfo object tarinfo to a physical file called targetpath. """ @@ -2103,7 +2109,7 @@ self.makefile(tarinfo, targetpath) if set_attrs: - self.chown(tarinfo, targetpath) + self.chown(tarinfo, targetpath, numeric_owner) if not tarinfo.issym(): self.chmod(tarinfo, targetpath) self.utime(tarinfo, targetpath) @@ -2192,19 +2198,23 @@ except KeyError: raise ExtractError("unable to resolve link inside archive") - def chown(self, tarinfo, targetpath): + def chown(self, tarinfo, targetpath, numeric_owner=False): """Set owner of targetpath according to tarinfo. """ if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: + if numeric_owner: g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: u = tarinfo.uid + else: + try: + g = grp.getgrnam(tarinfo.gname)[2] + except KeyError: + g = tarinfo.gid + try: + u = pwd.getpwnam(tarinfo.uname)[2] + except KeyError: + u = tarinfo.uid try: if tarinfo.issym() and hasattr(os, "lchown"): os.lchown(targetpath, u, g) diff -r 031fc0231f3d Lib/test/test_tarfile.py --- a/Lib/test/test_tarfile.py Thu Jan 15 22:53:21 2015 +0100 +++ b/Lib/test/test_tarfile.py Wed Jan 21 09:34:05 2015 +0100 @@ -4,6 +4,7 @@ from hashlib import md5 import unittest +import unittest.mock import tarfile from test import support, script_helper @@ -2167,6 +2168,65 @@ self._test_partial_input("r:bz2") +def root_is_uid_gid_0(): + try: + import pwd, grp + except ImportError: + return False + if pwd.getpwuid(0)[0] != "root": + return False + if grp.getgrgid(0)[0] != "root": + return False + return True + + +@unittest.skipUnless(root_is_uid_gid_0(), "uid=0,gid=0 must be named 'root'") +class NumericOwnerTest(unittest.TestCase): + + def _make_test_archive(self, member_name): + fobj = io.BytesIO(b"content") + with tarfile.open(tmpname, "w") as w: + t = tarfile.TarInfo(member_name) + t.uid = t.gid = 99 + t.uname = "root" + t.gname = "root" + w.addfile(t, fobj) + return tmpname + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extract_with_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = "numeric-owner-testfile" + testtar = self._make_test_archive(fname) + with tarfile.open(testtar) as r: + r.extract(fname, TEMPDIR, numeric_owner=True) + targetpath = os.path.join(TEMPDIR, fname) + mock_chown.assert_called_with(targetpath, 99, 99) + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = "numeric-owner-testfile" + testtar = self._make_test_archive(fname) + with tarfile.open(testtar) as r: + r.extractall(TEMPDIR, numeric_owner=True) + targetpath = os.path.join(TEMPDIR, fname) + mock_chown.assert_called_with(targetpath, 99, 99) + + @unittest.mock.patch("os.chown") + @unittest.mock.patch("os.geteuid") + def test_extract_without_numeric_owner(self, mock_geteuid, mock_chown): + mock_geteuid.return_value = 0 + fname = "numeric-owner-testfile" + testtar = self._make_test_archive(fname) + with tarfile.open(testtar) as r: + r.extract(fname, TEMPDIR, numeric_owner=False) + targetpath = os.path.join(TEMPDIR, fname) + mock_chown.assert_called_with(targetpath, 0, 0) + + def setUpModule(): support.unlink(TEMPDIR) os.makedirs(TEMPDIR)