2 Pure-python parsing backend.
4 from __future__ import unicode_literals
5 from decimal import Decimal
7 from codecs import unicode_escape_decode
9 from ijson import common
10 from ijson.compat import chr
14 NONWS = re.compile(r'\S')
15 LEXTERM = re.compile(r'[^a-z0-9\.+-]')
18 class UnexpectedSymbol(common.JSONError):
19 def __init__(self, symbol, reader):
20 super(UnexpectedSymbol, self).__init__('Unexpected symbol "%s" at %d' % (symbol[0], reader.pos - len(symbol)))
24 JSON lexer. Supports iterator interface.
26 def __init__(self, f):
36 match = NONWS.search(self.buffer, self.pos)
38 self.pos = match.start()
39 char = self.buffer[self.pos]
40 if 'a' <= char <= 'z' or '0' <= char <= '9' or char == '-':
43 return self.stringlexem()
47 self.buffer = self.f.read(BUFSIZE).decode('utf-8')
49 if not len(self.buffer):
56 match = LEXTERM.search(self.buffer, current)
58 current = match.start()
61 current = len(self.buffer)
62 self.buffer += self.f.read(BUFSIZE).decode('utf-8')
63 if len(self.buffer) == current:
65 result = self.buffer[self.pos:current]
67 if self.pos > BUFSIZE:
68 self.buffer = self.buffer[self.pos:]
72 def stringlexem(self):
76 end = self.buffer.index('"', start)
78 while self.buffer[escpos] == '\\':
80 if (end - escpos) % 2 == 0:
83 result = self.buffer[self.pos:end + 1]
87 old_len = len(self.buffer)
88 self.buffer += self.f.read(BUFSIZE).decode('utf-8')
89 if len(self.buffer) == old_len:
90 raise common.IncompleteJSONError()
95 pos = s.find('\\', start)
113 yield chr(int(s[pos + 1:pos + 5], 16))
119 def parse_value(lexer, symbol=None):
125 elif symbol == 'true':
126 yield ('boolean', True)
127 elif symbol == 'false':
128 yield ('boolean', False)
130 for event in parse_array(lexer):
133 for event in parse_object(lexer):
135 elif symbol[0] == '"':
136 yield ('string', ''.join(unescape(symbol[1:-1])))
139 number = Decimal(symbol) if '.' in symbol else int(symbol)
140 yield ('number', number)
142 raise UnexpectedSymbol(symbol, lexer)
143 except StopIteration:
144 raise common.IncompleteJSONError()
146 def parse_array(lexer):
147 yield ('start_array', None)
151 for event in parse_value(lexer, symbol):
157 raise UnexpectedSymbol(symbol, lexer)
159 yield ('end_array', None)
161 def parse_object(lexer):
162 yield ('start_map', None)
167 raise UnexpectedSymbol(symbol, lexer)
168 yield ('map_key', symbol[1:-1])
171 raise UnexpectedSymbol(symbol, lexer)
172 for event in parse_value(lexer):
178 raise UnexpectedSymbol(symbol, lexer)
180 yield ('end_map', None)
182 def basic_parse(file):
184 Iterator yielding unprefixed events.
188 - file: a readable file-like object with JSON input
190 lexer = iter(Lexer(file))
191 for value in parse_value(lexer):
195 except StopIteration:
198 raise common.JSONError('Additional data')
202 Backend-specific wrapper for ijson.common.parse.
204 return common.parse(basic_parse(file))
206 def items(file, prefix):
208 Backend-specific wrapper for ijson.common.items.
210 return common.items(parse(file), prefix)