366 lines
8.6 KiB
Python
Raw Normal View History

2025-02-27 17:59:18 +08:00
"""
Process raw qstr file and output qstr data with length, hash and data bytes.
This script works with Python 2.6, 2.7, 3.3 and 3.4.
"""
from __future__ import print_function
import re
import sys
# Python 2/3 compatibility:
# - iterating through bytes is different
# - codepoint2name lives in a different module
import platform
if platform.python_version_tuple()[0] == "2":
bytes_cons = lambda val, enc=None: bytearray(val)
from htmlentitydefs import codepoint2name
elif platform.python_version_tuple()[0] == "3":
bytes_cons = bytes
from html.entities import codepoint2name
# end compatibility code
codepoint2name[ord("-")] = "hyphen"
# add some custom names to map characters that aren't in HTML
codepoint2name[ord(" ")] = "space"
codepoint2name[ord("'")] = "squot"
codepoint2name[ord(",")] = "comma"
codepoint2name[ord(".")] = "dot"
codepoint2name[ord(":")] = "colon"
codepoint2name[ord(";")] = "semicolon"
codepoint2name[ord("/")] = "slash"
codepoint2name[ord("%")] = "percent"
codepoint2name[ord("#")] = "hash"
codepoint2name[ord("(")] = "paren_open"
codepoint2name[ord(")")] = "paren_close"
codepoint2name[ord("[")] = "bracket_open"
codepoint2name[ord("]")] = "bracket_close"
codepoint2name[ord("{")] = "brace_open"
codepoint2name[ord("}")] = "brace_close"
codepoint2name[ord("*")] = "star"
codepoint2name[ord("!")] = "bang"
codepoint2name[ord("\\")] = "backslash"
codepoint2name[ord("+")] = "plus"
codepoint2name[ord("$")] = "dollar"
codepoint2name[ord("=")] = "equals"
codepoint2name[ord("?")] = "question"
codepoint2name[ord("@")] = "at_sign"
codepoint2name[ord("^")] = "caret"
codepoint2name[ord("|")] = "pipe"
codepoint2name[ord("~")] = "tilde"
# static qstrs, should be sorted
static_qstr_list = [
"",
"__dir__", # Put __dir__ after empty qstr for builtin dir() to work
"\n",
" ",
"*",
"/",
"<module>",
"_",
"__call__",
"__class__",
"__delitem__",
"__enter__",
"__exit__",
"__getattr__",
"__getitem__",
"__hash__",
"__init__",
"__int__",
"__iter__",
"__len__",
"__main__",
"__module__",
"__name__",
"__new__",
"__next__",
"__qualname__",
"__repr__",
"__setitem__",
"__str__",
"ArithmeticError",
"AssertionError",
"AttributeError",
"BaseException",
"EOFError",
"Ellipsis",
"Exception",
"GeneratorExit",
"ImportError",
"IndentationError",
"IndexError",
"KeyError",
"KeyboardInterrupt",
"LookupError",
"MemoryError",
"NameError",
"NoneType",
"NotImplementedError",
"OSError",
"OverflowError",
"RuntimeError",
"StopIteration",
"SyntaxError",
"SystemExit",
"TypeError",
"ValueError",
"ZeroDivisionError",
"abs",
"all",
"any",
"append",
"args",
"bool",
"builtins",
"bytearray",
"bytecode",
"bytes",
"callable",
"chr",
"classmethod",
"clear",
"close",
"const",
"copy",
"count",
"dict",
"dir",
"divmod",
"end",
"endswith",
"eval",
"exec",
"extend",
"find",
"format",
"from_bytes",
"get",
"getattr",
"globals",
"hasattr",
"hash",
"id",
"index",
"insert",
"int",
"isalpha",
"isdigit",
"isinstance",
"islower",
"isspace",
"issubclass",
"isupper",
"items",
"iter",
"join",
"key",
"keys",
"len",
"list",
"little",
"locals",
"lower",
"lstrip",
"main",
"map",
"micropython",
"next",
"object",
"open",
"ord",
"pop",
"popitem",
"pow",
"print",
"range",
"read",
"readinto",
"readline",
"remove",
"replace",
"repr",
"reverse",
"rfind",
"rindex",
"round",
"rsplit",
"rstrip",
"self",
"send",
"sep",
"set",
"setattr",
"setdefault",
"sort",
"sorted",
"split",
"start",
"startswith",
"staticmethod",
"step",
"stop",
"str",
"strip",
"sum",
"super",
"throw",
"to_bytes",
"tuple",
"type",
"update",
"upper",
"utf-8",
"value",
"values",
"write",
"zip",
]
# this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash):
hash = 5381
for b in qstr:
hash = (hash * 33) ^ b
# Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
def qstr_escape(qst):
def esc_char(m):
c = ord(m.group(0))
try:
name = codepoint2name[c]
except KeyError:
name = "0x%02x" % c
return "_" + name + "_"
return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)
def parse_input_headers(infiles):
qcfgs = {}
qstrs = {}
# add static qstrs
for qstr in static_qstr_list:
# work out the corresponding qstr name
ident = qstr_escape(qstr)
# don't add duplicates
assert ident not in qstrs
# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)
# read the qstrs in from the input files
for infile in infiles:
with open(infile, "rt") as f:
for line in f:
line = line.strip()
# is this a config line?
match = re.match(r"^QCFG\((.+), (.+)\)", line)
if match:
value = match.group(2)
if value[0] == "(" and value[-1] == ")":
# strip parenthesis from config value
value = value[1:-1]
qcfgs[match.group(1)] = value
continue
# is this a QSTR line?
match = re.match(r"^Q\((.*)\)$", line)
if not match:
continue
# get the qstr value
qstr = match.group(1)
# special cases to specify control characters
if qstr == "\\n":
qstr = "\n"
elif qstr == "\\r\\n":
qstr = "\r\n"
# work out the corresponding qstr name
ident = qstr_escape(qstr)
# don't add duplicates
if ident in qstrs:
continue
# add the qstr to the list, with order number to retain original order in file
order = len(qstrs)
# but put special method names like __add__ at the top of list, so
# that their id's fit into a byte
if ident == "":
# Sort empty qstr above all still
order = -200000
elif ident == "__dir__":
# Put __dir__ after empty qstr for builtin dir() to work
order = -190000
elif ident.startswith("__"):
order -= 100000
qstrs[ident] = (order, ident, qstr)
if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
sys.exit(1)
return qcfgs, qstrs
def escape_bytes(qstr, qbytes):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
return "".join(("\\x%02x" % b) for b in qbytes)
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qdata = escape_bytes(qstr, qbytes)
return '%d, %d, "%s"' % (qhash, qlen, qdata)
def print_qstr_data(qcfgs, qstrs):
# get config variables
cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"])
cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"])
# print out the starter of the generated C header file
print("// This file was automatically generated by makeqstrdata.py")
print("")
# add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")')
# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
def do_work(infiles):
qcfgs, qstrs = parse_input_headers(infiles)
print_qstr_data(qcfgs, qstrs)
if __name__ == "__main__":
do_work(sys.argv[1:])