glob.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. """
  2. Filename globbing utility. Mostly a copy of `glob` from Python 3.5.
  3. Changes include:
  4. * `yield from` and PEP3102 `*` removed.
  5. * `bytes` changed to `six.binary_type`.
  6. * Hidden files are not ignored.
  7. """
  8. import os
  9. import re
  10. import fnmatch
  11. from setuptools.extern.six import binary_type
  12. __all__ = ["glob", "iglob", "escape"]
  13. def glob(pathname, recursive=False):
  14. """Return a list of paths matching a pathname pattern.
  15. The pattern may contain simple shell-style wildcards a la
  16. fnmatch. However, unlike fnmatch, filenames starting with a
  17. dot are special cases that are not matched by '*' and '?'
  18. patterns.
  19. If recursive is true, the pattern '**' will match any files and
  20. zero or more directories and subdirectories.
  21. """
  22. return list(iglob(pathname, recursive=recursive))
  23. def iglob(pathname, recursive=False):
  24. """Return an iterator which yields the paths matching a pathname pattern.
  25. The pattern may contain simple shell-style wildcards a la
  26. fnmatch. However, unlike fnmatch, filenames starting with a
  27. dot are special cases that are not matched by '*' and '?'
  28. patterns.
  29. If recursive is true, the pattern '**' will match any files and
  30. zero or more directories and subdirectories.
  31. """
  32. it = _iglob(pathname, recursive)
  33. if recursive and _isrecursive(pathname):
  34. s = next(it) # skip empty string
  35. assert not s
  36. return it
  37. def _iglob(pathname, recursive):
  38. dirname, basename = os.path.split(pathname)
  39. if not has_magic(pathname):
  40. if basename:
  41. if os.path.lexists(pathname):
  42. yield pathname
  43. else:
  44. # Patterns ending with a slash should match only directories
  45. if os.path.isdir(dirname):
  46. yield pathname
  47. return
  48. if not dirname:
  49. if recursive and _isrecursive(basename):
  50. for x in glob2(dirname, basename):
  51. yield x
  52. else:
  53. for x in glob1(dirname, basename):
  54. yield x
  55. return
  56. # `os.path.split()` returns the argument itself as a dirname if it is a
  57. # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
  58. # contains magic characters (i.e. r'\\?\C:').
  59. if dirname != pathname and has_magic(dirname):
  60. dirs = _iglob(dirname, recursive)
  61. else:
  62. dirs = [dirname]
  63. if has_magic(basename):
  64. if recursive and _isrecursive(basename):
  65. glob_in_dir = glob2
  66. else:
  67. glob_in_dir = glob1
  68. else:
  69. glob_in_dir = glob0
  70. for dirname in dirs:
  71. for name in glob_in_dir(dirname, basename):
  72. yield os.path.join(dirname, name)
  73. # These 2 helper functions non-recursively glob inside a literal directory.
  74. # They return a list of basenames. `glob1` accepts a pattern while `glob0`
  75. # takes a literal basename (so it only has to check for its existence).
  76. def glob1(dirname, pattern):
  77. if not dirname:
  78. if isinstance(pattern, binary_type):
  79. dirname = os.curdir.encode('ASCII')
  80. else:
  81. dirname = os.curdir
  82. try:
  83. names = os.listdir(dirname)
  84. except OSError:
  85. return []
  86. return fnmatch.filter(names, pattern)
  87. def glob0(dirname, basename):
  88. if not basename:
  89. # `os.path.split()` returns an empty basename for paths ending with a
  90. # directory separator. 'q*x/' should match only directories.
  91. if os.path.isdir(dirname):
  92. return [basename]
  93. else:
  94. if os.path.lexists(os.path.join(dirname, basename)):
  95. return [basename]
  96. return []
  97. # This helper function recursively yields relative pathnames inside a literal
  98. # directory.
  99. def glob2(dirname, pattern):
  100. assert _isrecursive(pattern)
  101. yield pattern[:0]
  102. for x in _rlistdir(dirname):
  103. yield x
  104. # Recursively yields relative pathnames inside a literal directory.
  105. def _rlistdir(dirname):
  106. if not dirname:
  107. if isinstance(dirname, binary_type):
  108. dirname = binary_type(os.curdir, 'ASCII')
  109. else:
  110. dirname = os.curdir
  111. try:
  112. names = os.listdir(dirname)
  113. except os.error:
  114. return
  115. for x in names:
  116. yield x
  117. path = os.path.join(dirname, x) if dirname else x
  118. for y in _rlistdir(path):
  119. yield os.path.join(x, y)
  120. magic_check = re.compile('([*?[])')
  121. magic_check_bytes = re.compile(b'([*?[])')
  122. def has_magic(s):
  123. if isinstance(s, binary_type):
  124. match = magic_check_bytes.search(s)
  125. else:
  126. match = magic_check.search(s)
  127. return match is not None
  128. def _isrecursive(pattern):
  129. if isinstance(pattern, binary_type):
  130. return pattern == b'**'
  131. else:
  132. return pattern == '**'
  133. def escape(pathname):
  134. """Escape all special characters.
  135. """
  136. # Escaping is done by wrapping any of "*?[" between square brackets.
  137. # Metacharacters do not work in the drive part and shouldn't be escaped.
  138. drive, pathname = os.path.splitdrive(pathname)
  139. if isinstance(pathname, binary_type):
  140. pathname = magic_check_bytes.sub(br'[\1]', pathname)
  141. else:
  142. pathname = magic_check.sub(r'[\1]', pathname)
  143. return drive + pathname