0
|
1 |
# YPL parser 1.5
|
|
2 |
|
|
3 |
# written by VB.
|
|
4 |
|
|
5 |
import re
|
|
6 |
import sys, codecs
|
25
|
7 |
|
|
8 |
class keyword(str): pass
|
|
9 |
class code(str): pass
|
0
|
10 |
class ignore(object):
|
|
11 |
def __init__(self, regex_text, *args):
|
|
12 |
self.regex = re.compile(regex_text, *args)
|
|
13 |
|
|
14 |
class _and(object):
|
|
15 |
def __init__(self, something):
|
|
16 |
self.obj = something
|
|
17 |
|
|
18 |
class _not(_and): pass
|
|
19 |
|
25
|
20 |
class Name(str):
|
0
|
21 |
def __init__(self, *args):
|
|
22 |
self.line = 0
|
25
|
23 |
self.file = ""
|
0
|
24 |
|
|
25 |
class Symbol(list):
|
|
26 |
def __init__(self, name, what):
|
|
27 |
self.__name__ = name
|
|
28 |
self.append(name)
|
|
29 |
self.what = what
|
|
30 |
self.append(what)
|
|
31 |
def __call__(self):
|
|
32 |
return self.what
|
25
|
33 |
def __str__(self):
|
|
34 |
return 'Symbol(' + repr(self.__name__) + ', ' + repr(self.what) + ')'
|
0
|
35 |
def __repr__(self):
|
25
|
36 |
return str(self)
|
|
37 |
|
|
38 |
word_regex = re.compile(r"\w+")
|
|
39 |
rest_regex = re.compile(r".*")
|
0
|
40 |
|
|
41 |
print_trace = False
|
|
42 |
|
|
43 |
def skip(skipper, text, skipWS, skipComments):
|
|
44 |
if skipWS:
|
|
45 |
t = text.lstrip()
|
|
46 |
else:
|
|
47 |
t = text
|
|
48 |
if skipComments:
|
|
49 |
try:
|
|
50 |
while True:
|
|
51 |
skip, t = skipper.parseLine(t, skipComments, [], skipWS, None)
|
|
52 |
if skipWS:
|
|
53 |
t = t.lstrip()
|
|
54 |
except: pass
|
|
55 |
return t
|
|
56 |
|
|
57 |
class parser(object):
|
|
58 |
def __init__(self, another = False, p = False):
|
|
59 |
self.restlen = -1
|
|
60 |
if not(another):
|
|
61 |
self.skipper = parser(True, p)
|
|
62 |
self.skipper.packrat = p
|
|
63 |
else:
|
|
64 |
self.skipper = self
|
|
65 |
self.lines = None
|
|
66 |
self.textlen = 0
|
|
67 |
self.memory = {}
|
|
68 |
self.packrat = p
|
|
69 |
|
|
70 |
# parseLine():
|
|
71 |
# textline: text to parse
|
|
72 |
# pattern: pyPEG language description
|
|
73 |
# resultSoFar: parsing result so far (default: blank list [])
|
|
74 |
# skipWS: Flag if whitespace should be skipped (default: True)
|
|
75 |
# skipComments: Python functions returning pyPEG for matching comments
|
|
76 |
#
|
|
77 |
# returns: pyAST, textrest
|
|
78 |
#
|
|
79 |
# raises: SyntaxError(reason) if textline is detected not being in language
|
|
80 |
# described by pattern
|
|
81 |
#
|
|
82 |
# SyntaxError(reason) if pattern is an illegal language description
|
|
83 |
|
|
84 |
def parseLine(self, textline, pattern, resultSoFar = [], skipWS = True, skipComments = None):
|
|
85 |
name = None
|
|
86 |
_textline = textline
|
|
87 |
_pattern = pattern
|
|
88 |
|
|
89 |
def R(result, text):
|
|
90 |
if __debug__:
|
|
91 |
if print_trace:
|
|
92 |
try:
|
|
93 |
if _pattern.__name__ != "comment":
|
25
|
94 |
sys.stderr.write("match: " + _pattern.__name__ + "\n")
|
0
|
95 |
except: pass
|
|
96 |
|
|
97 |
if self.restlen == -1:
|
|
98 |
self.restlen = len(text)
|
|
99 |
else:
|
|
100 |
self.restlen = min(self.restlen, len(text))
|
|
101 |
res = resultSoFar
|
|
102 |
if name and result:
|
|
103 |
name.line = self.lineNo()
|
|
104 |
res.append(Symbol(name, result))
|
|
105 |
elif name:
|
|
106 |
name.line = self.lineNo()
|
|
107 |
res.append(Symbol(name, []))
|
|
108 |
elif result:
|
|
109 |
if type(result) is type([]):
|
|
110 |
res.extend(result)
|
|
111 |
else:
|
|
112 |
res.extend([result])
|
|
113 |
if self.packrat:
|
|
114 |
self.memory[(len(_textline), id(_pattern))] = (res, text)
|
|
115 |
return res, text
|
|
116 |
|
|
117 |
def syntaxError():
|
|
118 |
if self.packrat:
|
|
119 |
self.memory[(len(_textline), id(_pattern))] = False
|
|
120 |
raise SyntaxError()
|
|
121 |
|
|
122 |
if self.packrat:
|
|
123 |
try:
|
|
124 |
result = self.memory[(len(textline), id(pattern))]
|
|
125 |
if result:
|
|
126 |
return result
|
|
127 |
else:
|
|
128 |
raise SyntaxError()
|
|
129 |
except: pass
|
|
130 |
|
|
131 |
if callable(pattern):
|
|
132 |
if __debug__:
|
|
133 |
if print_trace:
|
|
134 |
try:
|
|
135 |
if pattern.__name__ != "comment":
|
25
|
136 |
sys.stderr.write("testing with " + pattern.__name__ + ": " + textline[:40] + "\n")
|
0
|
137 |
except: pass
|
|
138 |
|
|
139 |
if pattern.__name__[0] != "_":
|
|
140 |
name = Name(pattern.__name__)
|
|
141 |
|
|
142 |
pattern = pattern()
|
|
143 |
if callable(pattern):
|
|
144 |
pattern = (pattern,)
|
|
145 |
|
|
146 |
text = skip(self.skipper, textline, skipWS, skipComments)
|
|
147 |
|
|
148 |
pattern_type = type(pattern)
|
|
149 |
|
25
|
150 |
if pattern_type is str:
|
0
|
151 |
if text[:len(pattern)] == pattern:
|
|
152 |
text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
|
|
153 |
return R(None, text)
|
|
154 |
else:
|
|
155 |
syntaxError()
|
|
156 |
|
|
157 |
elif pattern_type is keyword:
|
|
158 |
m = word_regex.match(text)
|
|
159 |
if m:
|
|
160 |
if m.group(0) == pattern:
|
|
161 |
text = skip(self.skipper, text[len(pattern):], skipWS, skipComments)
|
|
162 |
return R(None, text)
|
|
163 |
else:
|
|
164 |
syntaxError()
|
|
165 |
else:
|
|
166 |
syntaxError()
|
|
167 |
|
|
168 |
elif pattern_type is _not:
|
|
169 |
try:
|
|
170 |
r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
|
|
171 |
except:
|
|
172 |
return resultSoFar, textline
|
|
173 |
syntaxError()
|
|
174 |
|
|
175 |
elif pattern_type is _and:
|
|
176 |
r, t = self.parseLine(text, pattern.obj, [], skipWS, skipComments)
|
|
177 |
return resultSoFar, textline
|
|
178 |
|
|
179 |
elif pattern_type is type(word_regex) or pattern_type is ignore:
|
|
180 |
if pattern_type is ignore:
|
|
181 |
pattern = pattern.regex
|
|
182 |
m = pattern.match(text)
|
|
183 |
if m:
|
|
184 |
text = skip(self.skipper, text[len(m.group(0)):], skipWS, skipComments)
|
|
185 |
if pattern_type is ignore:
|
|
186 |
return R(None, text)
|
|
187 |
else:
|
|
188 |
return R(m.group(0), text)
|
|
189 |
else:
|
|
190 |
syntaxError()
|
|
191 |
|
|
192 |
elif pattern_type is tuple:
|
|
193 |
result = []
|
|
194 |
n = 1
|
|
195 |
for p in pattern:
|
|
196 |
if type(p) is type(0):
|
|
197 |
n = p
|
|
198 |
else:
|
|
199 |
if n>0:
|
|
200 |
for i in range(n):
|
|
201 |
result, text = self.parseLine(text, p, result, skipWS, skipComments)
|
|
202 |
elif n==0:
|
|
203 |
if text == "":
|
|
204 |
pass
|
|
205 |
else:
|
|
206 |
try:
|
|
207 |
newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
|
|
208 |
result, text = newResult, newText
|
|
209 |
except SyntaxError:
|
|
210 |
pass
|
|
211 |
elif n<0:
|
|
212 |
found = False
|
|
213 |
while True:
|
|
214 |
try:
|
|
215 |
newResult, newText = self.parseLine(text, p, result, skipWS, skipComments)
|
|
216 |
result, text, found = newResult, newText, True
|
|
217 |
except SyntaxError:
|
|
218 |
break
|
|
219 |
if n == -2 and not(found):
|
|
220 |
syntaxError()
|
|
221 |
n = 1
|
|
222 |
return R(result, text)
|
|
223 |
|
|
224 |
elif pattern_type is list:
|
|
225 |
result = []
|
|
226 |
found = False
|
|
227 |
for p in pattern:
|
|
228 |
try:
|
|
229 |
result, text = self.parseLine(text, p, result, skipWS, skipComments)
|
|
230 |
found = True
|
|
231 |
except SyntaxError:
|
|
232 |
pass
|
|
233 |
if found:
|
|
234 |
break
|
|
235 |
if found:
|
|
236 |
return R(result, text)
|
|
237 |
else:
|
|
238 |
syntaxError()
|
|
239 |
|
|
240 |
else:
|
25
|
241 |
raise SyntaxError("illegal type in grammar: " + pattern_type)
|
0
|
242 |
|
|
243 |
def lineNo(self):
|
25
|
244 |
if not(self.lines): return ""
|
|
245 |
if self.restlen == -1: return ""
|
0
|
246 |
parsed = self.textlen - self.restlen
|
|
247 |
|
|
248 |
left, right = 0, len(self.lines)
|
|
249 |
|
|
250 |
while True:
|
|
251 |
mid = int((right + left) / 2)
|
|
252 |
if self.lines[mid][0] <= parsed:
|
|
253 |
try:
|
|
254 |
if self.lines[mid + 1][0] >= parsed:
|
|
255 |
try:
|
25
|
256 |
return self.lines[mid + 1][1] + ":" + self.lines[mid + 1][2]
|
0
|
257 |
except:
|
25
|
258 |
return ""
|
0
|
259 |
else:
|
|
260 |
left = mid + 1
|
|
261 |
except:
|
|
262 |
try:
|
25
|
263 |
return self.lines[mid + 1][1] + ":" + self.lines[mid + 1][2]
|
0
|
264 |
except:
|
25
|
265 |
return ""
|
0
|
266 |
else:
|
|
267 |
right = mid - 1
|
|
268 |
if left > right:
|
25
|
269 |
return ""
|
0
|
270 |
|
|
271 |
# plain module API
|
|
272 |
|
|
273 |
def parseLine(textline, pattern, resultSoFar = [], skipWS = True, skipComments = None, packrat = False):
|
|
274 |
p = parser(p=packrat)
|
|
275 |
text = skip(p.skipper, textline, skipWS, skipComments)
|
|
276 |
ast, text = p.parseLine(text, pattern, resultSoFar, skipWS, skipComments)
|
|
277 |
return ast, text
|
|
278 |
|
|
279 |
# parse():
|
|
280 |
# language: pyPEG language description
|
|
281 |
# lineSource: a fileinput.FileInput object
|
|
282 |
# skipWS: Flag if whitespace should be skipped (default: True)
|
|
283 |
# skipComments: Python function which returns pyPEG for matching comments
|
|
284 |
# packrat: use memoization
|
|
285 |
# lineCount: add line number information to AST
|
|
286 |
#
|
|
287 |
# returns: pyAST
|
|
288 |
#
|
|
289 |
# raises: SyntaxError(reason), if a parsed line is not in language
|
|
290 |
# SyntaxError(reason), if the language description is illegal
|
|
291 |
|
|
292 |
def parse(language, lineSource, skipWS = True, skipComments = None, packrat = False, lineCount = True):
|
|
293 |
lines, lineNo = [], 0
|
|
294 |
|
|
295 |
while callable(language):
|
|
296 |
language = language()
|
|
297 |
|
25
|
298 |
orig, ld = "", 0
|
0
|
299 |
for line in lineSource:
|
|
300 |
if lineSource.isfirstline():
|
|
301 |
ld = 1
|
|
302 |
else:
|
|
303 |
ld += 1
|
|
304 |
lines.append((len(orig), lineSource.filename(), lineSource.lineno() - 1))
|
25
|
305 |
orig += line
|
0
|
306 |
|
|
307 |
textlen = len(orig)
|
|
308 |
|
|
309 |
try:
|
|
310 |
p = parser(p=packrat)
|
|
311 |
p.textlen = len(orig)
|
|
312 |
if lineCount:
|
|
313 |
p.lines = lines
|
|
314 |
else:
|
|
315 |
p.line = None
|
|
316 |
text = skip(p.skipper, orig, skipWS, skipComments)
|
|
317 |
result, text = p.parseLine(text, language, [], skipWS, skipComments)
|
|
318 |
if text:
|
|
319 |
raise SyntaxError()
|
|
320 |
|
25
|
321 |
except SyntaxError as msg:
|
0
|
322 |
parsed = textlen - p.restlen
|
|
323 |
textlen = 0
|
25
|
324 |
nn, lineNo, file = 0, 0, ""
|
0
|
325 |
for n, ld, l in lines:
|
|
326 |
if n >= parsed:
|
|
327 |
break
|
|
328 |
else:
|
|
329 |
lineNo = l
|
|
330 |
nn += 1
|
|
331 |
file = ld
|
|
332 |
|
|
333 |
lineNo += 1
|
|
334 |
nn -= 1
|
|
335 |
lineCont = orig.splitlines()[nn]
|
25
|
336 |
raise SyntaxError("syntax error in " + file + ":" + lineNo + ": " + lineCont)
|
0
|
337 |
|
|
338 |
return result
|