31
|
1 |
# YPL parser 1.6
|
|
2 |
# adapted for Python 3.x
|
0
|
3 |
|
|
4 |
# written by VB.
|
|
5 |
|
|
6 |
import re
|
|
7 |
import sys, codecs
|
31
|
8 |
|
|
9 |
class keyword(str): pass
|
|
10 |
class code(str): pass
|
0
|
11 |
class ignore(object):
|
|
12 |
def __init__(self, regex_text, *args):
|
|
13 |
self.regex = re.compile(regex_text, *args)
|
|
14 |
|
|
15 |
class _and(object):
|
|
16 |
def __init__(self, something):
|
|
17 |
self.obj = something
|
|
18 |
|
|
19 |
class _not(_and): pass
|
|
20 |
|
31
|
21 |
class Name(str):
|
0
|
22 |
def __init__(self, *args):
|
|
23 |
self.line = 0
|
31
|
24 |
self.file = ""
|
0
|
25 |
|
|
26 |
class Symbol(list):
|
|
27 |
def __init__(self, name, what):
|
|
28 |
self.__name__ = name
|
|
29 |
self.append(name)
|
|
30 |
self.what = what
|
|
31 |
self.append(what)
|
|
32 |
def __call__(self):
|
|
33 |
return self.what
|
31
|
34 |
def __str__(self):
|
|
35 |
return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
|
0
|
36 |
def __repr__(self):
|
31
|
37 |
return str(self)
|
|
38 |
|
|
39 |
word_regex = re.compile(r"\w+")
|
|
40 |
rest_regex = re.compile(r".*")
|
0
|
41 |
|
|
42 |
print_trace = False
|
|
43 |
|
|
44 |
def u(text):
|
31
|
45 |
if isinstance(text, BaseException):
|
0
|
46 |
text = text.args[0]
|
31
|
47 |
if type(text) is str:
|
0
|
48 |
return text
|
31
|
49 |
if isinstance(text, bytes):
|
0
|
50 |
if sys.stdin.encoding:
|
|
51 |
return codecs.decode(text, sys.stdin.encoding)
|
|
52 |
else:
|
|
53 |
return codecs.decode(text, "utf-8")
|
31
|
54 |
return str(text)
|
0
|
55 |
|
|
56 |
def skip(skipper, text, skipWS, skipComments):
|
|
57 |
if skipWS:
|
|
58 |
t = text.lstrip()
|
|
59 |
else:
|
|
60 |
t = text
|
|
61 |
if skipComments:
|
|
62 |
try:
|
|
63 |
while True:
|
|
64 |
skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
|
|
65 |
if skipWS:
|
|
66 |
t = t.lstrip()
|
|
67 |
except: pass
|
|
68 |
return t
|
|
69 |
|
|
70 |
class parser(object):
|
|
71 |
def __init__(self, another = False, p = False):
|
|
72 |
self.restlen = -1
|
|
73 |
if not(another):
|
|
74 |
self.skipper = parser(True, p)
|
|
75 |
self.skipper.packrat = p
|
|
76 |
else:
|
|
77 |
self.skipper = self
|
|
78 |
self.lines = None
|
|
79 |
self.textlen = 0
|
|
80 |
self.memory = {}
|
|
81 |
self.packrat = p
|
|
82 |
|
|
83 |
# parseLine():
|
|
84 |
# textline: text to parse
|
|
85 |
# pattern: pyPEG language description
|
|
86 |
# resultSoFar: parsing result so far (default: blank list [])
|
|
87 |
# skipWS: Flag if whitespace should be skipped (default: True)
|
|
88 |
# skipComments: Python functions returning pyPEG for matching comments
|
|
89 |
#
|
|
90 |
# returns: pyAST, textrest
|
|
91 |
#
|
|
92 |
# raises: SyntaxError(reason) if textline is detected not being in language
|
|
93 |
# described by pattern
|
|
94 |
#
|
|
95 |
# SyntaxError(reason) if pattern is an illegal language description
|
|
96 |
|
|
97 |
def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
|
|
98 |
name = None
|
|
99 |
_textline = textline
|
|
100 |
_pattern = pattern
|
|
101 |
|
|
102 |
def R(result, text):
|
|
103 |
if __debug__:
|
|
104 |
if print_trace:
|
|
105 |
try:
|
|
106 |
if _pattern.__name__ != "comment":
|
31
|
107 |
sys.stderr.write("match: " + _pattern.__name__ + "\n")
|
0
|
108 |
except: pass
|
|
109 |
|
|
110 |
if self.restlen == -1:
|
|
111 |
self.restlen = len(text)
|
|
112 |
else:
|
|
113 |
self.restlen = min(self.restlen, len(text))
|
|
114 |
res = resultSoFar
|
|
115 |
if name and result:
|
|
116 |
name.line = self.lineNo()
|
|
117 |
res.append(Symbol(name, result))
|
|
118 |
elif name:
|
|
119 |
name.line = self.lineNo()
|
|
120 |
res.append(Symbol(name, []))
|
|
121 |
elif result:
|
|
122 |
if type(result) is type([]):
|
|
123 |
res.extend(result)
|
|
124 |
else:
|
|
125 |
res.extend([result])
|
|
126 |
if self.packrat:
|
|
127 |
self.memory[(len(_textline), id(_pattern))] = (res, text)
|
|
128 |
return res, text
|
|
129 |
|
|
130 |
def syntaxError():
|
|
131 |
if self.packrat:
|
|
132 |
self.memory[(len(_textline), id(_pattern))] = False
|
|
133 |
raise SyntaxError()
|
|
134 |
|
|
135 |
if self.packrat:
|
|
136 |
try:
|
|
137 |
result = self.memory[(len(textline), id(pattern))]
|
|
138 |
if result:
|
|
139 |
return result
|
|
140 |
else:
|
|
141 |
raise SyntaxError()
|
|
142 |
except: pass
|
|
143 |
|
|
144 |
if callable(pattern):
|
|
145 |
if __debug__:
|
|
146 |
if print_trace:
|
|
147 |
try:
|
|
148 |
if pattern.__name__ != "comment":
|
31
|
149 |
sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
|
0
|
150 |
except: pass
|
|
151 |
|
|
152 |
if pattern.__name__[0] != "_":
|
|
153 |
name = Name(pattern.__name__)
|
|
154 |
|
|
155 |
pattern = pattern()
|
|
156 |
if callable(pattern):
|
|
157 |
pattern = (pattern,)
|
|
158 |
|
|
159 |
text = skip(self.skipper, textline, skipWS, skipComments)
|
|
160 |
|
|
161 |
pattern_type = type(pattern)
|
|
162 |
|
31
|
163 |
if pattern_type is str:
|
0
|
164 |
if text[:len(pattern)] == pattern:
|
|
165 |
text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
|
|
166 |
return R(None, text)
|
|
167 |
else:
|
|
168 |
syntaxError()
|
|
169 |
|
|
170 |
elif pattern_type is keyword:
|
|
171 |
m = word_regex.match(text)
|
|
172 |
if m:
|
|
173 |
if m.group(0) == pattern:
|
|
174 |
text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
|
|
175 |
return R(None, text)
|
|
176 |
else:
|
|
177 |
syntaxError()
|
|
178 |
else:
|
|
179 |
syntaxError()
|
|
180 |
|
|
181 |
elif pattern_type is _not:
|
|
182 |
try:
|
|
183 |
r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
|
|
184 |
except:
|
|
185 |
return resultSoFar, textline
|
|
186 |
syntaxError()
|
|
187 |
|
|
188 |
elif pattern_type is _and:
|
|
189 |
r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
|
|
190 |
return resultSoFar, textline
|
|
191 |
|
|
192 |
elif pattern_type is type(word_regex) or pattern_type is ignore:
|
|
193 |
if pattern_type is ignore:
|
|
194 |
pattern = pattern.regex
|
|
195 |
m = pattern.match(text)
|
|
196 |
if m:
|
|
197 |
text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
|
|
198 |
if pattern_type is ignore:
|
|
199 |
return R(None, text)
|
|
200 |
else:
|
|
201 |
return R(m.group(0), text)
|
|
202 |
else:
|
|
203 |
syntaxError()
|
|
204 |
|
|
205 |
elif pattern_type is tuple:
|
|
206 |
result = []
|
|
207 |
n = 1
|
|
208 |
for p in pattern:
|
|
209 |
if type(p) is type(0):
|
|
210 |
n = p
|
|
211 |
else:
|
|
212 |
if n>0:
|
|
213 |
for i in range(n):
|
|
214 |
result, text = self.parseLine(text, p, result, skipWS, skipComments)
|
|
215 |
elif n==0:
|
|
216 |
if text == "":
|
|
217 |
pass
|
|
218 |
else:
|
|
219 |
try:
|
|
220 |
newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
|
|
221 |
result, text = newResult, newText
|
|
222 |
except SyntaxError:
|
|
223 |
pass
|
|
224 |
elif n<0:
|
|
225 |
found = False
|
|
226 |
while True:
|
|
227 |
try:
|
|
228 |
newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
|
|
229 |
result, text, found = newResult, newText, True
|
|
230 |
except SyntaxError:
|
|
231 |
break
|
|
232 |
if n == -2 and not(found):
|
|
233 |
syntaxError()
|
|
234 |
n = 1
|
|
235 |
return R(result, text)
|
|
236 |
|
|
237 |
elif pattern_type is list:
|
|
238 |
result = []
|
|
239 |
found = False
|
|
240 |
for p in pattern:
|
|
241 |
try:
|
|
242 |
result, text = self.parseLine(text, p, result, skipWS, skipComments)
|
|
243 |
found = True
|
|
244 |
except SyntaxError:
|
|
245 |
pass
|
|
246 |
if found:
|
|
247 |
break
|
|
248 |
if found:
|
|
249 |
return R(result, text)
|
|
250 |
else:
|
|
251 |
syntaxError()
|
|
252 |
|
|
253 |
else:
|
31
|
254 |
raise SyntaxError("illegal type in grammar: " + u(pattern_type))
|
0
|
255 |
|
|
256 |
def lineNo(self):
|
31
|
257 |
if not(self.lines): return ""
|
|
258 |
if self.restlen == -1: return ""
|
0
|
259 |
parsed = self.textlen - self.restlen
|
|
260 |
|
|
261 |
left, right = 0, len(self.lines)
|
|
262 |
|
|
263 |
while True:
|
|
264 |
mid = int((right + left) / 2)
|
|
265 |
if self.lines[mid][0] <= parsed:
|
|
266 |
try:
|
|
267 |
if self.lines[mid + 1][0] >= parsed:
|
|
268 |
try:
|
31
|
269 |
return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
|
0
|
270 |
except:
|
31
|
271 |
return ""
|
0
|
272 |
else:
|
|
273 |
left = mid + 1
|
|
274 |
except:
|
|
275 |
try:
|
31
|
276 |
return u(self.lines[mid + 1][1]) + ":" + u(self.lines[mid + 1][2])
|
0
|
277 |
except:
|
31
|
278 |
return ""
|
0
|
279 |
else:
|
|
280 |
right = mid - 1
|
|
281 |
if left > right:
|
31
|
282 |
return ""
|
0
|
283 |
|
|
284 |
# plain module API
|
|
285 |
|
|
286 |
def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
|
|
287 |
p = parser(p=packrat)
|
|
288 |
text = skip(p.skipper, textline, skipWS, skipComments)
|
|
289 |
ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
|
|
290 |
return ast, text
|
|
291 |
|
|
292 |
# parse():
|
|
293 |
# language: pyPEG language description
|
|
294 |
# lineSource: a fileinput.FileInput object
|
|
295 |
# skipWS: Flag if whitespace should be skipped (default: True)
|
|
296 |
# skipComments: Python function which returns pyPEG for matching comments
|
|
297 |
# packrat: use memoization
|
|
298 |
# lineCount: add line number information to AST
|
|
299 |
#
|
|
300 |
# returns: pyAST
|
|
301 |
#
|
|
302 |
# raises: SyntaxError(reason), if a parsed line is not in language
|
|
303 |
# SyntaxError(reason), if the language description is illegal
|
|
304 |
|
|
305 |
def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
|
|
306 |
lines, lineNo = [], 0
|
|
307 |
|
|
308 |
while callable(language):
|
|
309 |
language = language()
|
|
310 |
|
31
|
311 |
orig, ld = "", 0
|
0
|
312 |
for line in lineSource:
|
|
313 |
if lineSource.isfirstline():
|
|
314 |
ld = 1
|
|
315 |
else:
|
|
316 |
ld += 1
|
|
317 |
lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
|
|
318 |
orig += u(line)
|
|
319 |
|
|
320 |
textlen = len(orig)
|
|
321 |
|
|
322 |
try:
|
|
323 |
p = parser(p=packrat)
|
|
324 |
p.textlen = len(orig)
|
|
325 |
if lineCount:
|
|
326 |
p.lines = lines
|
|
327 |
else:
|
|
328 |
p.line = None
|
|
329 |
text = skip(p.skipper, orig, skipWS, skipComments)
|
|
330 |
result, text = p.parseLine(text, language, [], skipWS, skipComments)
|
|
331 |
if text:
|
|
332 |
raise SyntaxError()
|
|
333 |
|
31
|
334 |
except SyntaxError as msg:
|
0
|
335 |
parsed = textlen - p.restlen
|
|
336 |
textlen = 0
|
31
|
337 |
nn, lineNo, file = 0, 0, ""
|
0
|
338 |
for n, ld, l in lines:
|
|
339 |
if n >= parsed:
|
|
340 |
break
|
|
341 |
else:
|
|
342 |
lineNo = l
|
|
343 |
nn += 1
|
|
344 |
file = ld
|
|
345 |
|
|
346 |
lineNo += 1
|
|
347 |
nn -= 1
|
|
348 |
lineCont = orig.splitlines()[nn]
|
31
|
349 |
raise SyntaxError("syntax error in " + u(file) + ":" + u(lineNo) + ": " + lineCont)
|
0
|
350 |
|
|
351 |
return result
|