1 # YPL parser 1.5 |
1 # YPL parser 1.6 |
|
2 # adapted for Python 3.x |
2 |
3 |
3 # written by VB. |
4 # written by VB. |
4 |
5 |
5 import re |
6 import re |
6 import sys, codecs |
7 import sys, codecs |
7 import exceptions |
8 |
8 |
9 class keyword(str): pass |
9 class keyword(unicode): pass |
10 class code(str): pass |
10 class code(unicode): pass |
|
11 class ignore(object): |
11 class ignore(object): |
12 def __init__(self, regex_text, *args): |
12 def __init__(self, regex_text, *args): |
13 self.regex = re.compile(regex_text, *args) |
13 self.regex = re.compile(regex_text, *args) |
14 |
14 |
15 class _and(object): |
15 class _and(object): |
16 def __init__(self, something): |
16 def __init__(self, something): |
17 self.obj = something |
17 self.obj = something |
18 |
18 |
19 class _not(_and): pass |
19 class _not(_and): pass |
20 |
20 |
21 class Name(unicode): |
21 class Name(str): |
22 def __init__(self, *args): |
22 def __init__(self, *args): |
23 self.line = 0 |
23 self.line = 0 |
24 self.file = u"" |
24 self.file = "" |
25 |
25 |
26 class Symbol(list): |
26 class Symbol(list): |
27 def __init__(self, name, what): |
27 def __init__(self, name, what): |
28 self.__name__ = name |
28 self.__name__ = name |
29 self.append(name) |
29 self.append(name) |
30 self.what = what |
30 self.what = what |
31 self.append(what) |
31 self.append(what) |
32 def __call__(self): |
32 def __call__(self): |
33 return self.what |
33 return self.what |
34 def __unicode__(self): |
34 def __str__(self): |
35 return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')' |
35 return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')' |
36 def __repr__(self): |
36 def __repr__(self): |
37 return unicode(self) |
37 return str(self) |
38 |
38 |
39 word_regex = re.compile(ur"\w+") |
39 word_regex = re.compile(r"\w+") |
40 rest_regex = re.compile(ur".*") |
40 rest_regex = re.compile(r".*") |
41 |
41 |
42 print_trace = False |
42 print_trace = False |
43 |
43 |
44 def u(text): |
44 def u(text): |
45 if isinstance(text, exceptions.BaseException): |
45 if isinstance(text, BaseException): |
46 text = text.args[0] |
46 text = text.args[0] |
47 if type(text) is unicode: |
47 if type(text) is str: |
48 return text |
48 return text |
49 if isinstance(text, str): |
49 if isinstance(text, bytes): |
50 if sys.stdin.encoding: |
50 if sys.stdin.encoding: |
51 return codecs.decode(text, sys.stdin.encoding) |
51 return codecs.decode(text, sys.stdin.encoding) |
52 else: |
52 else: |
53 return codecs.decode(text, "utf-8") |
53 return codecs.decode(text, "utf-8") |
54 return unicode(text) |
54 return str(text) |
55 |
55 |
56 def skip(skipper, text, skipWS, skipComments): |
56 def skip(skipper, text, skipWS, skipComments): |
57 if skipWS: |
57 if skipWS: |
58 t = text.lstrip() |
58 t = text.lstrip() |
59 else: |
59 else: |
144 if callable(pattern): |
144 if callable(pattern): |
145 if __debug__: |
145 if __debug__: |
146 if print_trace: |
146 if print_trace: |
147 try: |
147 try: |
148 if pattern.__name__ != "comment": |
148 if pattern.__name__ != "comment": |
149 sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n") |
149 sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n") |
150 except: pass |
150 except: pass |
151 |
151 |
152 if pattern.__name__[0] != "_": |
152 if pattern.__name__[0] != "_": |
153 name = Name(pattern.__name__) |
153 name = Name(pattern.__name__) |
154 |
154 |
158 |
158 |
159 text = skip(self.skipper, textline, skipWS, skipComments) |
159 text = skip(self.skipper, textline, skipWS, skipComments) |
160 |
160 |
161 pattern_type = type(pattern) |
161 pattern_type = type(pattern) |
162 |
162 |
163 if pattern_type is str or pattern_type is unicode: |
163 if pattern_type is str: |
164 if text[:len(pattern)] == pattern: |
164 if text[:len(pattern)] == pattern: |
165 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) |
165 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) |
166 return R(None, text) |
166 return R(None, text) |
167 else: |
167 else: |
168 syntaxError() |
168 syntaxError() |
249 return R(result, text) |
249 return R(result, text) |
250 else: |
250 else: |
251 syntaxError() |
251 syntaxError() |
252 |
252 |
253 else: |
253 else: |
254 raise SyntaxError(u"illegal type in grammar: " + u(pattern_type)) |
254 raise SyntaxError("illegal type in grammar: " + u(pattern_type)) |
255 |
255 |
256 def lineNo(self): |
256 def lineNo(self): |
257 if not(self.lines): return u"" |
257 if not(self.lines): return "" |
258 if self.restlen == -1: return u"" |
258 if self.restlen == -1: return "" |
259 parsed = self.textlen - self.restlen |
259 parsed = self.textlen - self.restlen |
260 |
260 |
261 left, right = 0, len(self.lines) |
261 left, right = 0, len(self.lines) |
262 |
262 |
263 while True: |
263 while True: |
264 mid = int((right + left) / 2) |
264 mid = int((right + left) / 2) |
265 if self.lines[mid][0] <= parsed: |
265 if self.lines[mid][0] <= parsed: |
266 try: |
266 try: |
267 if self.lines[mid + 1][0] >= parsed: |
267 if self.lines[mid + 1][0] >= parsed: |
268 try: |
268 try: |
269 return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) |
269 return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) |
270 except: |
270 except: |
271 return u"" |
271 return "" |
272 else: |
272 else: |
273 left = mid + 1 |
273 left = mid + 1 |
274 except: |
274 except: |
275 try: |
275 try: |
276 return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) |
276 return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2]) |
277 except: |
277 except: |
278 return u"" |
278 return "" |
279 else: |
279 else: |
280 right = mid - 1 |
280 right = mid - 1 |
281 if left > right: |
281 if left > right: |
282 return u"" |
282 return "" |
283 |
283 |
284 # plain module API |
284 # plain module API |
285 |
285 |
286 def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): |
286 def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): |
287 p = parser(p=packrat) |
287 p = parser(p=packrat) |
306 lines, lineNo = [], 0 |
306 lines, lineNo = [], 0 |
307 |
307 |
308 while callable(language): |
308 while callable(language): |
309 language = language() |
309 language = language() |
310 |
310 |
311 orig, ld = u"", 0 |
311 orig, ld = "", 0 |
312 for line in lineSource: |
312 for line in lineSource: |
313 if lineSource.isfirstline(): |
313 if lineSource.isfirstline(): |
314 ld = 1 |
314 ld = 1 |
315 else: |
315 else: |
316 ld += 1 |
316 ld += 1 |
329 text = skip(p.skipper, orig, skipWS, skipComments) |
329 text = skip(p.skipper, orig, skipWS, skipComments) |
330 result, text = p.parseLine(text, language, [], skipWS, skipComments) |
330 result, text = p.parseLine(text, language, [], skipWS, skipComments) |
331 if text: |
331 if text: |
332 raise SyntaxError() |
332 raise SyntaxError() |
333 |
333 |
334 except SyntaxError, msg: |
334 except SyntaxError as msg: |
335 parsed = textlen - p.restlen |
335 parsed = textlen - p.restlen |
336 textlen = 0 |
336 textlen = 0 |
337 nn, lineNo, file = 0, 0, u"" |
337 nn, lineNo, file = 0, 0, "" |
338 for n, ld, l in lines: |
338 for n, ld, l in lines: |
339 if n >= parsed: |
339 if n >= parsed: |
340 break |
340 break |
341 else: |
341 else: |
342 lineNo = l |
342 lineNo = l |