|
1 # YPL parser 1.5 |
|
2 |
|
3 # written by VB. |
|
4 |
|
5 import re |
|
6 import sys, codecs |
|
7 import exceptions |
|
8 |
|
9 class keyword(unicode): pass |
|
10 class code(unicode): pass |
|
11 class ignore(object): |
|
12 def __init__(self, regex_text, *args): |
|
13 self.regex = re.compile(regex_text, *args) |
|
14 |
|
15 class _and(object): |
|
16 def __init__(self, something): |
|
17 self.obj = something |
|
18 |
|
19 class _not(_and): pass |
|
20 |
|
21 class Name(unicode): |
|
22 def __init__(self, *args): |
|
23 self.line = 0 |
|
24 self.file = u"" |
|
25 |
|
26 class Symbol(list): |
|
27 def __init__(self, name, what): |
|
28 self.__name__ = name |
|
29 self.append(name) |
|
30 self.what = what |
|
31 self.append(what) |
|
32 def __call__(self): |
|
33 return self.what |
|
34 def __unicode__(self): |
|
35 return u'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + u')' |
|
36 def __repr__(self): |
|
37 return unicode(self) |
|
38 |
|
39 word_regex = re.compile(ur"\w+") |
|
40 rest_regex = re.compile(ur".*") |
|
41 |
|
42 print_trace = False |
|
43 |
|
44 def u(text): |
|
45 if isinstance(text, exceptions.BaseException): |
|
46 text = text.args[0] |
|
47 if type(text) is unicode: |
|
48 return text |
|
49 if isinstance(text, str): |
|
50 if sys.stdin.encoding: |
|
51 return codecs.decode(text, sys.stdin.encoding) |
|
52 else: |
|
53 return codecs.decode(text, "utf-8") |
|
54 return unicode(text) |
|
55 |
|
56 def skip(skipper, text, skipWS, skipComments): |
|
57 if skipWS: |
|
58 t = text.lstrip() |
|
59 else: |
|
60 t = text |
|
61 if skipComments: |
|
62 try: |
|
63 while True: |
|
64 skip, t = skipper.parseLine(t, skipComments, [], skipWS, None) |
|
65 if skipWS: |
|
66 t = t.lstrip() |
|
67 except: pass |
|
68 return t |
|
69 |
|
70 class parser(object): |
|
71 def __init__(self, another = False, p = False): |
|
72 self.restlen = -1 |
|
73 if not(another): |
|
74 self.skipper = parser(True, p) |
|
75 self.skipper.packrat = p |
|
76 else: |
|
77 self.skipper = self |
|
78 self.lines = None |
|
79 self.textlen = 0 |
|
80 self.memory = {} |
|
81 self.packrat = p |
|
82 |
|
83 # parseLine(): |
|
84 # textline: text to parse |
|
85 # pattern: pyPEG language description |
|
86 # resultSoFar: parsing result so far (default: blank list []) |
|
87 # skipWS: Flag if whitespace should be skipped (default: True) |
|
88 # skipComments: Python functions returning pyPEG for matching comments |
|
89 # |
|
90 # returns: pyAST, textrest |
|
91 # |
|
92 # raises: SyntaxError(reason) if textline is detected not being in language |
|
93 # described by pattern |
|
94 # |
|
95 # SyntaxError(reason) if pattern is an illegal language description |
|
96 |
|
97 def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None): |
|
98 name = None |
|
99 _textline = textline |
|
100 _pattern = pattern |
|
101 |
|
102 def R(result, text): |
|
103 if __debug__: |
|
104 if print_trace: |
|
105 try: |
|
106 if _pattern.__name__ != "comment": |
|
107 sys.stderr.write(u"match: " + _pattern.__name__ + u"\n") |
|
108 except: pass |
|
109 |
|
110 if self.restlen == -1: |
|
111 self.restlen = len(text) |
|
112 else: |
|
113 self.restlen = min(self.restlen, len(text)) |
|
114 res = resultSoFar |
|
115 if name and result: |
|
116 name.line = self.lineNo() |
|
117 res.append(Symbol(name, result)) |
|
118 elif name: |
|
119 name.line = self.lineNo() |
|
120 res.append(Symbol(name, [])) |
|
121 elif result: |
|
122 if type(result) is type([]): |
|
123 res.extend(result) |
|
124 else: |
|
125 res.extend([result]) |
|
126 if self.packrat: |
|
127 self.memory[(len(_textline), id(_pattern))] = (res, text) |
|
128 return res, text |
|
129 |
|
130 def syntaxError(): |
|
131 if self.packrat: |
|
132 self.memory[(len(_textline), id(_pattern))] = False |
|
133 raise SyntaxError() |
|
134 |
|
135 if self.packrat: |
|
136 try: |
|
137 result = self.memory[(len(textline), id(pattern))] |
|
138 if result: |
|
139 return result |
|
140 else: |
|
141 raise SyntaxError() |
|
142 except: pass |
|
143 |
|
144 if callable(pattern): |
|
145 if __debug__: |
|
146 if print_trace: |
|
147 try: |
|
148 if pattern.__name__ != "comment": |
|
149 sys.stderr.write(u"testing with " + pattern.__name__ + u": " + textline[:40] + u"\n") |
|
150 except: pass |
|
151 |
|
152 if pattern.__name__[0] != "_": |
|
153 name = Name(pattern.__name__) |
|
154 |
|
155 pattern = pattern() |
|
156 if callable(pattern): |
|
157 pattern = (pattern,) |
|
158 |
|
159 text = skip(self.skipper, textline, skipWS, skipComments) |
|
160 |
|
161 pattern_type = type(pattern) |
|
162 |
|
163 if pattern_type is str or pattern_type is unicode: |
|
164 if text[:len(pattern)] == pattern: |
|
165 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) |
|
166 return R(None, text) |
|
167 else: |
|
168 syntaxError() |
|
169 |
|
170 elif pattern_type is keyword: |
|
171 m = word_regex.match(text) |
|
172 if m: |
|
173 if m.group(0) == pattern: |
|
174 text = skip(self.skipper, text[len(pattern):], skipWS, skipComments) |
|
175 return R(None, text) |
|
176 else: |
|
177 syntaxError() |
|
178 else: |
|
179 syntaxError() |
|
180 |
|
181 elif pattern_type is _not: |
|
182 try: |
|
183 r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) |
|
184 except: |
|
185 return resultSoFar, textline |
|
186 syntaxError() |
|
187 |
|
188 elif pattern_type is _and: |
|
189 r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments) |
|
190 return resultSoFar, textline |
|
191 |
|
192 elif pattern_type is type(word_regex) or pattern_type is ignore: |
|
193 if pattern_type is ignore: |
|
194 pattern = pattern.regex |
|
195 m = pattern.match(text) |
|
196 if m: |
|
197 text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments) |
|
198 if pattern_type is ignore: |
|
199 return R(None, text) |
|
200 else: |
|
201 return R(m.group(0), text) |
|
202 else: |
|
203 syntaxError() |
|
204 |
|
205 elif pattern_type is tuple: |
|
206 result = [] |
|
207 n = 1 |
|
208 for p in pattern: |
|
209 if type(p) is type(0): |
|
210 n = p |
|
211 else: |
|
212 if n>0: |
|
213 for i in range(n): |
|
214 result, text = self.parseLine(text, p, result, skipWS, skipComments) |
|
215 elif n==0: |
|
216 if text == "": |
|
217 pass |
|
218 else: |
|
219 try: |
|
220 newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) |
|
221 result, text = newResult, newText |
|
222 except SyntaxError: |
|
223 pass |
|
224 elif n<0: |
|
225 found = False |
|
226 while True: |
|
227 try: |
|
228 newResult, newText = self.parseLine(text, p, result, skipWS, skipComments) |
|
229 result, text, found = newResult, newText, True |
|
230 except SyntaxError: |
|
231 break |
|
232 if n == -2 and not(found): |
|
233 syntaxError() |
|
234 n = 1 |
|
235 return R(result, text) |
|
236 |
|
237 elif pattern_type is list: |
|
238 result = [] |
|
239 found = False |
|
240 for p in pattern: |
|
241 try: |
|
242 result, text = self.parseLine(text, p, result, skipWS, skipComments) |
|
243 found = True |
|
244 except SyntaxError: |
|
245 pass |
|
246 if found: |
|
247 break |
|
248 if found: |
|
249 return R(result, text) |
|
250 else: |
|
251 syntaxError() |
|
252 |
|
253 else: |
|
254 raise SyntaxError(u"illegal type in grammar: " + u(pattern_type)) |
|
255 |
|
256 def lineNo(self): |
|
257 if not(self.lines): return u"" |
|
258 if self.restlen == -1: return u"" |
|
259 parsed = self.textlen - self.restlen |
|
260 |
|
261 left, right = 0, len(self.lines) |
|
262 |
|
263 while True: |
|
264 mid = int((right + left) / 2) |
|
265 if self.lines[mid][0] <= parsed: |
|
266 try: |
|
267 if self.lines[mid + 1][0] >= parsed: |
|
268 try: |
|
269 return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) |
|
270 except: |
|
271 return u"" |
|
272 else: |
|
273 left = mid + 1 |
|
274 except: |
|
275 try: |
|
276 return u(self.lines[mid + 1][1]) + u":" + u(self.lines[mid + 1][2]) |
|
277 except: |
|
278 return u"" |
|
279 else: |
|
280 right = mid - 1 |
|
281 if left > right: |
|
282 return u"" |
|
283 |
|
284 # plain module API |
|
285 |
|
286 def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False): |
|
287 p = parser(p=packrat) |
|
288 text = skip(p.skipper, textline, skipWS, skipComments) |
|
289 ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments) |
|
290 return ast, text |
|
291 |
|
292 # parse(): |
|
293 # language: pyPEG language description |
|
294 # lineSource: a fileinput.FileInput object |
|
295 # skipWS: Flag if whitespace should be skipped (default: True) |
|
296 # skipComments: Python function which returns pyPEG for matching comments |
|
297 # packrat: use memoization |
|
298 # lineCount: add line number information to AST |
|
299 # |
|
300 # returns: pyAST |
|
301 # |
|
302 # raises: SyntaxError(reason), if a parsed line is not in language |
|
303 # SyntaxError(reason), if the language description is illegal |
|
304 |
|
305 def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True): |
|
306 lines, lineNo = [], 0 |
|
307 |
|
308 while callable(language): |
|
309 language = language() |
|
310 |
|
311 orig, ld = u"", 0 |
|
312 for line in lineSource: |
|
313 if lineSource.isfirstline(): |
|
314 ld = 1 |
|
315 else: |
|
316 ld += 1 |
|
317 lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1)) |
|
318 orig += u(line) |
|
319 |
|
320 textlen = len(orig) |
|
321 |
|
322 try: |
|
323 p = parser(p=packrat) |
|
324 p.textlen = len(orig) |
|
325 if lineCount: |
|
326 p.lines = lines |
|
327 else: |
|
328 p.line = None |
|
329 text = skip(p.skipper, orig, skipWS, skipComments) |
|
330 result, text = p.parseLine(text, language, [], skipWS, skipComments) |
|
331 if text: |
|
332 raise SyntaxError() |
|
333 |
|
334 except SyntaxError, msg: |
|
335 parsed = textlen - p.restlen |
|
336 textlen = 0 |
|
337 nn, lineNo, file = 0, 0, u"" |
|
338 for n, ld, l in lines: |
|
339 if n >= parsed: |
|
340 break |
|
341 else: |
|
342 lineNo = l |
|
343 nn += 1 |
|
344 file = ld |
|
345 |
|
346 lineNo += 1 |
|
347 nn -= 1 |
|
348 lineCont = orig.splitlines()[nn] |
|
349 raise SyntaxError(u"syntax error in " + u(file) + u":" + u(lineNo) + u": " + lineCont) |
|
350 |
|
351 return result |