Skip to content

[Python] Use raw string literals for regexes #120401

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 18, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions llvm/utils/extract_symbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def nm_get_symbols(tool, lib):
# The -P flag displays the size field for symbols only when applicable,
# so the last field is optional. There's no space after the value field,
# but \s+ match newline also, so \s+\S* will match the optional size field.
match = re.match("^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
match = re.match(r"^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line)
if match:
yield (match.group(1), True)
# Look for undefined symbols, which have type U and may or may not
# (depending on which nm is being used) have value and size.
match = re.match("^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
match = re.match(r"^(\S+)\s+U\s+(\S+\s+\S*)?$", line)
if match:
yield (match.group(1), False)
process.wait()
Expand All @@ -71,7 +71,7 @@ def readobj_is_32bit_windows(tool, lib):
[tool, "--file-header", lib], universal_newlines=True
)
for line in output.splitlines():
match = re.match("Format: (\S+)", line)
match = re.match(r"Format: (\S+)", line)
if match:
return match.group(1) == "COFF-i386"
return False
Expand All @@ -85,7 +85,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
if not "?" in symbol:
if calling_convention_decoration:
# Remove calling convention decoration from names
match = re.match("[_@]([^@]+)", symbol)
match = re.match(r"[_@]([^@]+)", symbol)
if match:
symbol = match.group(1)
# Discard floating point/SIMD constants.
Expand All @@ -100,10 +100,10 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
# An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol
# that mentions an anonymous namespace can be discarded, as the anonymous
# namespace doesn't exist outside of that translation unit.
elif re.search("\?A(0x\w+)?@", symbol):
elif re.search(r"\?A(0x\w+)?@", symbol):
return None
# Skip X86GenMnemonicTables functions, they are not exposed from llvm/include/.
elif re.match("\?is[A-Z0-9]*@X86@llvm", symbol):
elif re.match(r"\?is[A-Z0-9]*@X86@llvm", symbol):
return None
# Keep mangled llvm:: and clang:: function symbols. How we detect these is a
# bit of a mess and imprecise, but that avoids having to completely demangle
Expand All @@ -123,7 +123,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
# ::= .+@ (list of types)
# ::= .*Z (list of types, varargs)
# <throw-spec> ::= exceptions are not allowed
elif re.search("(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol):
elif re.search(r"(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol):
return symbol
return None

Expand All @@ -140,7 +140,7 @@ def should_keep_itanium_symbol(symbol, calling_convention_decoration):
if not symbol.startswith("_") and not symbol.startswith("."):
return symbol
# Discard manglings that aren't nested names
match = re.match("\.?_Z(T[VTIS])?(N.+)", symbol)
match = re.match(r"\.?_Z(T[VTIS])?(N.+)", symbol)
if not match:
return None
# Demangle the name. If the name is too complex then we don't need to keep
Expand Down Expand Up @@ -169,19 +169,19 @@ class TooComplexName(Exception):
# (name, rest of string) pair.
def parse_itanium_name(arg):
# Check for a normal name
match = re.match("(\d+)(.+)", arg)
match = re.match(r"(\d+)(.+)", arg)
if match:
n = int(match.group(1))
name = match.group(1) + match.group(2)[:n]
rest = match.group(2)[n:]
return name, rest
# Check for constructor/destructor names
match = re.match("([CD][123])(.+)", arg)
match = re.match(r"([CD][123])(.+)", arg)
if match:
return match.group(1), match.group(2)
# Assume that a sequence of characters that doesn't end a nesting is an
# operator (this is very imprecise, but appears to be good enough)
match = re.match("([^E]+)(.+)", arg)
match = re.match(r"([^E]+)(.+)", arg)
if match:
return match.group(1), match.group(2)
# Anything else: we can't handle it
Expand All @@ -196,13 +196,13 @@ def skip_itanium_template(arg):
tmp = arg[1:]
while tmp:
# Check for names
match = re.match("(\d+)(.+)", tmp)
match = re.match(r"(\d+)(.+)", tmp)
if match:
n = int(match.group(1))
tmp = match.group(2)[n:]
continue
# Check for substitutions
match = re.match("S[A-Z0-9]*_(.+)", tmp)
match = re.match(r"S[A-Z0-9]*_(.+)", tmp)
if match:
tmp = match.group(1)
# Start of a template
Expand Down Expand Up @@ -231,14 +231,14 @@ def parse_itanium_nested_name(arg):
ret = []

# Skip past the N, and possibly a substitution
match = re.match("NS[A-Z0-9]*_(.+)", arg)
match = re.match(r"NS[A-Z0-9]*_(.+)", arg)
if match:
tmp = match.group(1)
else:
tmp = arg[1:]

# Skip past CV-qualifiers and ref qualifiers
match = re.match("[rVKRO]*(.+)", tmp)
match = re.match(r"[rVKRO]*(.+)", tmp)
if match:
tmp = match.group(1)

Expand Down Expand Up @@ -280,19 +280,19 @@ def parse_microsoft_mangling(arg):
if arg.startswith("@"):
return components
# Check for a simple name
match = re.match("(\w+)@(.+)", arg)
match = re.match(r"(\w+)@(.+)", arg)
if match:
components.append((match.group(1), False))
arg = match.group(2)
continue
# Check for a special function name
match = re.match("(\?_?\w)(.+)", arg)
match = re.match(r"(\?_?\w)(.+)", arg)
if match:
components.append((match.group(1), False))
arg = match.group(2)
continue
# Check for a template name
match = re.match("\?\$(\w+)@[^@]+@(.+)", arg)
match = re.match(r"\?\$(\w+)@[^@]+@(.+)", arg)
if match:
components.append((match.group(1), True))
arg = match.group(2)
Expand Down Expand Up @@ -323,7 +323,7 @@ def get_template_name(sym, mangling):
if mangling == "microsoft":
names = parse_microsoft_mangling(sym)
else:
match = re.match("\.?_Z(T[VTIS])?(N.+)", sym)
match = re.match(r"\.?_Z(T[VTIS])?(N.+)", sym)
if match:
names, _ = parse_itanium_nested_name(match.group(2))
else:
Expand Down
Loading