From 9199e4f668e59447e1ace76a5c512e42e0207038 Mon Sep 17 00:00:00 2001 From: Shai Avraham Date: Mon, 19 Aug 2019 11:55:55 +0300 Subject: [PATCH] Improve performance of scandir by not converting it to list When the scandir function is used in the pathlib module, it's first converted into a list and then used in a for loop. The conversion to list is unnecessary since the list isn't used except for the iteration in the loop, so it's a waste of performance. I got rid of the conversions to list and used the scandir iterator directly in the loop. In addition, I wrapped the use of scandir in the with statement to close its resources properly. --- Lib/pathlib.py | 54 +++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 80923c768268fc..b50d1d0920be43 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -519,21 +519,21 @@ def __init__(self, pat, child_parts): def _select_from(self, parent_path, is_dir, exists, scandir): try: cf = parent_path._flavour.casefold - entries = list(scandir(parent_path)) - for entry in entries: - entry_is_dir = False - try: - entry_is_dir = entry.is_dir() - except OSError as e: - if not _ignore_error(e): - raise - if not self.dironly or entry_is_dir: - name = entry.name - casefolded = cf(name) - if self.pat.match(casefolded): - path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir): - yield p + with scandir(parent_path) as entries: + for entry in entries: + entry_is_dir = False + try: + entry_is_dir = entry.is_dir() + except OSError as e: + if not _ignore_error(e): + raise + if not self.dironly or entry_is_dir: + name = entry.name + casefolded = cf(name) + if self.pat.match(casefolded): + path = parent_path._make_child_relpath(name) + for p in self.successor._select_from(path, is_dir, exists, scandir): + yield p except PermissionError: return @@ -547,18 +547,18 @@ def __init__(self, pat, child_parts): def _iterate_directories(self, parent_path, is_dir, scandir): yield parent_path try: - entries = list(scandir(parent_path)) - for entry in entries: - entry_is_dir = False - try: - entry_is_dir = entry.is_dir() - except OSError as e: - if not _ignore_error(e): - raise - if entry_is_dir and not entry.is_symlink(): - path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): - yield p + with scandir(parent_path) as entries: + for entry in entries: + entry_is_dir = False + try: + entry_is_dir = entry.is_dir() + except OSError as e: + if not _ignore_error(e): + raise + if entry_is_dir and not entry.is_symlink(): + path = parent_path._make_child_relpath(entry.name) + for p in self._iterate_directories(path, is_dir, scandir): + yield p except PermissionError: return