parser.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. import string, re, sys, datetime
  2. from .core import TomlError
  3. if sys.version_info[0] == 2:
  4. _chr = unichr
  5. else:
  6. _chr = chr
  7. def load(fin, translate=lambda t, x, v: v):
  8. return loads(fin.read(), translate=translate, filename=getattr(fin, 'name', repr(fin)))
  9. def loads(s, filename='<string>', translate=lambda t, x, v: v):
  10. if isinstance(s, bytes):
  11. s = s.decode('utf-8')
  12. s = s.replace('\r\n', '\n')
  13. root = {}
  14. tables = {}
  15. scope = root
  16. src = _Source(s, filename=filename)
  17. ast = _p_toml(src)
  18. def error(msg):
  19. raise TomlError(msg, pos[0], pos[1], filename)
  20. def process_value(v):
  21. kind, text, value, pos = v
  22. if kind == 'str' and value.startswith('\n'):
  23. value = value[1:]
  24. if kind == 'array':
  25. if value and any(k != value[0][0] for k, t, v, p in value[1:]):
  26. error('array-type-mismatch')
  27. value = [process_value(item) for item in value]
  28. elif kind == 'table':
  29. value = dict([(k, process_value(value[k])) for k in value])
  30. return translate(kind, text, value)
  31. for kind, value, pos in ast:
  32. if kind == 'kv':
  33. k, v = value
  34. if k in scope:
  35. error('duplicate_keys. Key "{0}" was used more than once.'.format(k))
  36. scope[k] = process_value(v)
  37. else:
  38. is_table_array = (kind == 'table_array')
  39. cur = tables
  40. for name in value[:-1]:
  41. if isinstance(cur.get(name), list):
  42. d, cur = cur[name][-1]
  43. else:
  44. d, cur = cur.setdefault(name, (None, {}))
  45. scope = {}
  46. name = value[-1]
  47. if name not in cur:
  48. if is_table_array:
  49. cur[name] = [(scope, {})]
  50. else:
  51. cur[name] = (scope, {})
  52. elif isinstance(cur[name], list):
  53. if not is_table_array:
  54. error('table_type_mismatch')
  55. cur[name].append((scope, {}))
  56. else:
  57. if is_table_array:
  58. error('table_type_mismatch')
  59. old_scope, next_table = cur[name]
  60. if old_scope is not None:
  61. error('duplicate_tables')
  62. cur[name] = (scope, next_table)
  63. def merge_tables(scope, tables):
  64. if scope is None:
  65. scope = {}
  66. for k in tables:
  67. if k in scope:
  68. error('key_table_conflict')
  69. v = tables[k]
  70. if isinstance(v, list):
  71. scope[k] = [merge_tables(sc, tbl) for sc, tbl in v]
  72. else:
  73. scope[k] = merge_tables(v[0], v[1])
  74. return scope
  75. return merge_tables(root, tables)
  76. class _Source:
  77. def __init__(self, s, filename=None):
  78. self.s = s
  79. self._pos = (1, 1)
  80. self._last = None
  81. self._filename = filename
  82. self.backtrack_stack = []
  83. def last(self):
  84. return self._last
  85. def pos(self):
  86. return self._pos
  87. def fail(self):
  88. return self._expect(None)
  89. def consume_dot(self):
  90. if self.s:
  91. self._last = self.s[0]
  92. self.s = self[1:]
  93. self._advance(self._last)
  94. return self._last
  95. return None
  96. def expect_dot(self):
  97. return self._expect(self.consume_dot())
  98. def consume_eof(self):
  99. if not self.s:
  100. self._last = ''
  101. return True
  102. return False
  103. def expect_eof(self):
  104. return self._expect(self.consume_eof())
  105. def consume(self, s):
  106. if self.s.startswith(s):
  107. self.s = self.s[len(s):]
  108. self._last = s
  109. self._advance(s)
  110. return True
  111. return False
  112. def expect(self, s):
  113. return self._expect(self.consume(s))
  114. def consume_re(self, re):
  115. m = re.match(self.s)
  116. if m:
  117. self.s = self.s[len(m.group(0)):]
  118. self._last = m
  119. self._advance(m.group(0))
  120. return m
  121. return None
  122. def expect_re(self, re):
  123. return self._expect(self.consume_re(re))
  124. def __enter__(self):
  125. self.backtrack_stack.append((self.s, self._pos))
  126. def __exit__(self, type, value, traceback):
  127. if type is None:
  128. self.backtrack_stack.pop()
  129. else:
  130. self.s, self._pos = self.backtrack_stack.pop()
  131. return type == TomlError
  132. def commit(self):
  133. self.backtrack_stack[-1] = (self.s, self._pos)
  134. def _expect(self, r):
  135. if not r:
  136. raise TomlError('msg', self._pos[0], self._pos[1], self._filename)
  137. return r
  138. def _advance(self, s):
  139. suffix_pos = s.rfind('\n')
  140. if suffix_pos == -1:
  141. self._pos = (self._pos[0], self._pos[1] + len(s))
  142. else:
  143. self._pos = (self._pos[0] + s.count('\n'), len(s) - suffix_pos)
  144. _ews_re = re.compile(r'(?:[ \t]|#[^\n]*\n|#[^\n]*\Z|\n)*')
  145. def _p_ews(s):
  146. s.expect_re(_ews_re)
  147. _ws_re = re.compile(r'[ \t]*')
  148. def _p_ws(s):
  149. s.expect_re(_ws_re)
  150. _escapes = { 'b': '\b', 'n': '\n', 'r': '\r', 't': '\t', '"': '"', '\'': '\'',
  151. '\\': '\\', '/': '/', 'f': '\f' }
  152. _basicstr_re = re.compile(r'[^"\\\000-\037]*')
  153. _short_uni_re = re.compile(r'u([0-9a-fA-F]{4})')
  154. _long_uni_re = re.compile(r'U([0-9a-fA-F]{8})')
  155. _escapes_re = re.compile('[bnrt"\'\\\\/f]')
  156. _newline_esc_re = re.compile('\n[ \t\n]*')
  157. def _p_basicstr_content(s, content=_basicstr_re):
  158. res = []
  159. while True:
  160. res.append(s.expect_re(content).group(0))
  161. if not s.consume('\\'):
  162. break
  163. if s.consume_re(_newline_esc_re):
  164. pass
  165. elif s.consume_re(_short_uni_re) or s.consume_re(_long_uni_re):
  166. res.append(_chr(int(s.last().group(1), 16)))
  167. else:
  168. s.expect_re(_escapes_re)
  169. res.append(_escapes[s.last().group(0)])
  170. return ''.join(res)
  171. _key_re = re.compile(r'[0-9a-zA-Z-_]+')
  172. def _p_key(s):
  173. with s:
  174. s.expect('"')
  175. r = _p_basicstr_content(s, _basicstr_re)
  176. s.expect('"')
  177. return r
  178. if s.consume('\''):
  179. if s.consume('\'\''):
  180. r = s.expect_re(_litstr_ml_re).group(0)
  181. s.expect('\'\'\'')
  182. else:
  183. r = s.expect_re(_litstr_re).group(0)
  184. s.expect('\'')
  185. return r
  186. return s.expect_re(_key_re).group(0)
  187. _float_re = re.compile(r'[+-]?(?:0|[1-9](?:_?\d)*)(?:\.\d(?:_?\d)*)?(?:[eE][+-]?(?:\d(?:_?\d)*))?')
  188. _datetime_re = re.compile(r'(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(?:Z|([+-]\d{2}):(\d{2}))')
  189. _basicstr_ml_re = re.compile(r'(?:(?:|"|"")[^"\\\000-\011\013-\037])*')
  190. _litstr_re = re.compile(r"[^'\000-\037]*")
  191. _litstr_ml_re = re.compile(r"(?:(?:|'|'')(?:[^'\000-\011\013-\037]))*")
  192. def _p_value(s):
  193. pos = s.pos()
  194. if s.consume('true'):
  195. return 'bool', s.last(), True, pos
  196. if s.consume('false'):
  197. return 'bool', s.last(), False, pos
  198. if s.consume('"'):
  199. if s.consume('""'):
  200. r = _p_basicstr_content(s, _basicstr_ml_re)
  201. s.expect('"""')
  202. else:
  203. r = _p_basicstr_content(s, _basicstr_re)
  204. s.expect('"')
  205. return 'str', r, r, pos
  206. if s.consume('\''):
  207. if s.consume('\'\''):
  208. r = s.expect_re(_litstr_ml_re).group(0)
  209. s.expect('\'\'\'')
  210. else:
  211. r = s.expect_re(_litstr_re).group(0)
  212. s.expect('\'')
  213. return 'str', r, r, pos
  214. if s.consume_re(_datetime_re):
  215. m = s.last()
  216. s0 = m.group(0)
  217. r = map(int, m.groups()[:6])
  218. if m.group(7):
  219. micro = float(m.group(7))
  220. else:
  221. micro = 0
  222. if m.group(8):
  223. g = int(m.group(8), 10) * 60 + int(m.group(9), 10)
  224. tz = _TimeZone(datetime.timedelta(0, g * 60))
  225. else:
  226. tz = _TimeZone(datetime.timedelta(0, 0))
  227. y, m, d, H, M, S = r
  228. dt = datetime.datetime(y, m, d, H, M, S, int(micro * 1000000), tz)
  229. return 'datetime', s0, dt, pos
  230. if s.consume_re(_float_re):
  231. m = s.last().group(0)
  232. r = m.replace('_','')
  233. if '.' in m or 'e' in m or 'E' in m:
  234. return 'float', m, float(r), pos
  235. else:
  236. return 'int', m, int(r, 10), pos
  237. if s.consume('['):
  238. items = []
  239. with s:
  240. while True:
  241. _p_ews(s)
  242. items.append(_p_value(s))
  243. s.commit()
  244. _p_ews(s)
  245. s.expect(',')
  246. s.commit()
  247. _p_ews(s)
  248. s.expect(']')
  249. return 'array', None, items, pos
  250. if s.consume('{'):
  251. _p_ws(s)
  252. items = {}
  253. if not s.consume('}'):
  254. k = _p_key(s)
  255. _p_ws(s)
  256. s.expect('=')
  257. _p_ws(s)
  258. items[k] = _p_value(s)
  259. _p_ws(s)
  260. while s.consume(','):
  261. _p_ws(s)
  262. k = _p_key(s)
  263. _p_ws(s)
  264. s.expect('=')
  265. _p_ws(s)
  266. items[k] = _p_value(s)
  267. _p_ws(s)
  268. s.expect('}')
  269. return 'table', None, items, pos
  270. s.fail()
  271. def _p_stmt(s):
  272. pos = s.pos()
  273. if s.consume( '['):
  274. is_array = s.consume('[')
  275. _p_ws(s)
  276. keys = [_p_key(s)]
  277. _p_ws(s)
  278. while s.consume('.'):
  279. _p_ws(s)
  280. keys.append(_p_key(s))
  281. _p_ws(s)
  282. s.expect(']')
  283. if is_array:
  284. s.expect(']')
  285. return 'table_array' if is_array else 'table', keys, pos
  286. key = _p_key(s)
  287. _p_ws(s)
  288. s.expect('=')
  289. _p_ws(s)
  290. value = _p_value(s)
  291. return 'kv', (key, value), pos
  292. _stmtsep_re = re.compile(r'(?:[ \t]*(?:#[^\n]*)?\n)+[ \t]*')
  293. def _p_toml(s):
  294. stmts = []
  295. _p_ews(s)
  296. with s:
  297. stmts.append(_p_stmt(s))
  298. while True:
  299. s.commit()
  300. s.expect_re(_stmtsep_re)
  301. stmts.append(_p_stmt(s))
  302. _p_ews(s)
  303. s.expect_eof()
  304. return stmts
  305. class _TimeZone(datetime.tzinfo):
  306. def __init__(self, offset):
  307. self._offset = offset
  308. def utcoffset(self, dt):
  309. return self._offset
  310. def dst(self, dt):
  311. return None
  312. def tzname(self, dt):
  313. m = self._offset.total_seconds() // 60
  314. if m < 0:
  315. res = '-'
  316. m = -m
  317. else:
  318. res = '+'
  319. h = m // 60
  320. m = m - h * 60
  321. return '{}{:.02}{:.02}'.format(res, h, m)