|
1 /* |
|
2 * (c) 2003 Mario de Sousa |
|
3 * |
|
4 * Offered to the public under the terms of the GNU General Public License |
|
5 * as published by the Free Software Foundation; either version 2 of the |
|
6 * License, or (at your option) any later version. |
|
7 * |
|
8 * This program is distributed in the hope that it will be useful, but |
|
9 * WITHOUT ANY WARRANTY; without even the implied warranty of |
|
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
|
11 * Public License for more details. |
|
12 * |
|
13 * This code is made available on the understanding that it will not be |
|
14 * used in safety-critical situations without a full and competent review. |
|
15 */ |
|
16 |
|
17 /* |
|
18 * An IEC 61131-3 IL and ST compiler. |
|
19 * |
|
20 * Based on the |
|
21 * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10) |
|
22 * |
|
23 */ |
|
24 |
|
25 /* |
|
26 * Stage 1 |
|
27 * ======= |
|
28 * |
|
29 * This file contains the lexical tokens definitions, from which |
|
30 * the flex utility will generate a lexical parser function. |
|
31 */ |
|
32 |
|
33 |
|
34 |
|
35 |
|
36 /*****************************/ |
|
37 /* Lexical Parser Options... */ |
|
38 /*****************************/ |
|
39 |
|
40 /* The lexical analyser will never work in interactive mode, |
|
41 * i.e., it will only process programs saved to files, and never |
|
42 * programs being written inter-actively by the user. |
|
43 * This option saves the resulting parser from calling the |
|
44 * isatty() function, that seems to be generating some compile |
|
45 * errors under some (older?) versions of flex. |
|
46 */ |
|
47 %option never-interactive |
|
48 |
|
49 /* Have the lexical analyser use a 'char *yytext' instead of an |
|
50 * array of char 'char yytext[??]' to store the lexical token. |
|
51 */ |
|
52 %pointer |
|
53 |
|
54 |
|
55 /* Have the lexical analyser ignore the case of letters. |
|
56 * This will occur for all the tokens and keywords, but |
|
57 * the resulting text handed up to the syntax parser |
|
58 * will not be changed, and keep the original case |
|
59 * of the letters in the input file. |
|
60 */ |
|
61 %option case-insensitive |
|
62 |
|
63 /* Have the generated lexical analyser keep track of the |
|
64 * line number it is currently analysing. |
|
65 * This is used to pass up to the syntax parser |
|
66 * the number of the line on which the current |
|
67 * token was found. It will enable the syntax parser |
|
68 * to generate more informatve error messages... |
|
69 */ |
|
70 %option yylineno |
|
71 |
|
72 /* required for the use of the yy_pop_state() and |
|
73 * yy_push_state() functions |
|
74 */ |
|
75 %option stack |
|
76 |
|
77 /* The '%option stack' also requests the inclusion of |
|
78 * the yy_top_state(), however this function is not |
|
79 * currently being used. This means that the compiler |
|
80 * is complaining about the existance of this function. |
|
81 * The following option removes the yy_top_state() |
|
82 * function from the resulting c code, so the compiler |
|
83 * no longer complains. |
|
84 */ |
|
85 %option noyy_top_state |
|
86 |
|
87 /**************************************************/ |
|
88 /* External Variable and Function declarations... */ |
|
89 /**************************************************/ |
|
90 |
|
91 |
|
92 %{ |
|
93 /* Define TEST_MAIN to include a main() function. |
|
94 * Useful for testing the parser generated by flex. |
|
95 */ |
|
96 /* |
|
97 #define TEST_MAIN |
|
98 */ |
|
99 /* If lexical parser is compiled by itself, we need to define the following |
|
100 * constant to some string. Under normal circumstances LIBDIRECTORY is set |
|
101 * in the syntax parser header file... |
|
102 */ |
|
103 #ifdef TEST_MAIN |
|
104 #define LIBDIRECTORY "just_testing" |
|
105 #endif |
|
106 |
|
107 |
|
108 |
|
109 /* Required for strdup() */ |
|
110 #include <string.h> |
|
111 |
|
112 /* Required only for the declaration of abstract syntax classes |
|
113 * (class symbol_c; class token_c; class list_c;) |
|
114 * These will not be used in flex, but the token type union defined |
|
115 * in iec.hh contains pointers to these classes, so we must include |
|
116 * it here. |
|
117 */ |
|
118 #include "../absyntax/absyntax.hh" |
|
119 |
|
120 /* generated by bison. |
|
121 * Contains the definition of the token constants, and the |
|
122 * token value type YYSTYPE (in our case, a 'const char *') |
|
123 */ |
|
124 #include "iec.y.hh" |
|
125 |
|
126 /* Variable defined by the bison parser, |
|
127 * where the value of the tokens will be stored |
|
128 */ |
|
129 extern YYSTYPE yylval; |
|
130 |
|
131 /* The name of the file currently being parsed... |
|
132 * This variable is declared and read from the code generated by bison! |
|
133 * Note that flex accesses and updates this global variable |
|
134 * apropriately whenever it comes across an (*#include <filename> *) |
|
135 * directive... |
|
136 */ |
|
137 extern const char *current_filename; |
|
138 |
|
139 /* We will not be using unput() in our flex code... */ |
|
140 #define YY_NO_UNPUT |
|
141 |
|
142 /* Variable defined by the bison parser. |
|
143 * It must be initialised with the location |
|
144 * of the token being parsed. |
|
145 * This is only needed if we want to keep |
|
146 * track of the locations, in order to give |
|
147 * more meaningful error messages! |
|
148 */ |
|
149 extern YYLTYPE yylloc; |
|
150 |
|
151 /* Macro that is executed for every action. |
|
152 * We use it to pass the location of the token |
|
153 * back to the bison parser... |
|
154 */ |
|
155 #define YY_USER_ACTION { \ |
|
156 yylloc.first_line = yylloc.last_line = yylineno; \ |
|
157 yylloc.first_column = yylloc.last_column = 0; \ |
|
158 } |
|
159 |
|
160 |
|
161 /* Since this lexical parser we defined only works in ASCII based |
|
162 * systems, we might as well make sure it is being compiled on |
|
163 * one... |
|
164 * Lets check a few random characters... |
|
165 */ |
|
166 #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \ |
|
167 ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B)) |
|
168 #error This lexical analyser is not portable to a non ASCII based system. |
|
169 #endif |
|
170 |
|
171 |
|
172 /* Function only called from within flex, but defined |
|
173 * in iec.y! |
|
174 * We delcare it here... |
|
175 * |
|
176 * Search for a symbol in either of the two symbol tables |
|
177 * and return the token id of the first symbol found. |
|
178 * Searches first in the variables, and only if not found |
|
179 * does it continue searching in the library elements |
|
180 */ |
|
181 //token_id_t get_identifier_token(const char *identifier_str); |
|
182 int get_identifier_token(const char *identifier_str); |
|
183 %} |
|
184 |
|
185 |
|
186 /***************************************************/ |
|
187 /* Forward Declaration of functions defined later. */ |
|
188 /***************************************************/ |
|
189 |
|
190 %{ |
|
191 /* return all the text in the current token back to the input stream. */ |
|
192 void unput_text(unsigned int n); |
|
193 %} |
|
194 |
|
195 |
|
196 |
|
197 /****************************/ |
|
198 /* Lexical Parser States... */ |
|
199 /****************************/ |
|
200 |
|
201 /* NOTE: Our psrser can parse st or il code, intermixed |
|
202 * within the same file. |
|
203 * With IL we come across the issue of the EOL (end of line) token. |
|
204 * ST, and the declaration parts of IL do not use this token! |
|
205 * If the lexical analyser were to issue this token during ST |
|
206 * language parsing, or during the declaration of data types, |
|
207 * function headers, etc. in IL, the syntax parser would crash. |
|
208 * |
|
209 * We can solve this issue using one of three methods: |
|
210 * (1) Augment all the syntax that does not accept the EOL |
|
211 * token to simply ignore it. This makes the syntax |
|
212 * definition (in iec.y) very cluttered! |
|
213 * (2) Let the lexical parser figure out which language |
|
214 * it is parsing, and decide whether or not to issue |
|
215 * the EOL token. This requires the lexical parser |
|
216 * to have knowledge of the syntax!, making for a poor |
|
217 * overall organisation of the code. It would also make it |
|
218 * very difficult to understand the lexical parser as it |
|
219 * would use several states, and a state machine to transition |
|
220 * between the states. The state transitions would be |
|
221 * intermingled with the lexical parser defintion! |
|
222 * (3) Use a mixture of (1) and (2). The lexical analyser |
|
223 * merely distinguishes between function headers and function |
|
224 * bodies, but no longer makes a distinction between il and |
|
225 * st language bodies. When parsing a body, it will return |
|
226 * the EOL token. In other states '\n' will be ignored as |
|
227 * whitespace. |
|
228 * The ST language syntax has been augmented in the syntax |
|
229 * parser configuration to ignore any EOL tokens that it may |
|
230 * come across! |
|
231 * This option has both drawbacks of option (1) and (2), but |
|
232 * much less intensely. |
|
233 * The syntax that gets cluttered is limited to the ST statements |
|
234 * (which is rather limited, compared to the function headers and |
|
235 * data type declarations, etc...), while the state machine in |
|
236 * the lexical parser becomes very simple. All state transitions |
|
237 * can be handled within the lexical parser by itself, and can be |
|
238 * easily identified. Thus knowledge of the syntax required by |
|
239 * the lexical parser is very limited! |
|
240 * |
|
241 * Amazingly enough, I (Mario) got to implement option (3) |
|
242 * at first, requiring two basic states, decl and body. |
|
243 * The lexical parser will enter the body state when |
|
244 * it is parsing the body of a function/program/function block. The |
|
245 * state transition is done when we find a VAR_END that is not followed |
|
246 * by a VAR! This is the syntax knowledge that gets included in the |
|
247 * lexical analyser with this option! |
|
248 * Unfortunately, getting the st syntax parser to ignore EOL anywhere |
|
249 * where they might appear leads to conflicts. This is due to the fact |
|
250 * that the syntax parser uses the single look-ahead token to remove |
|
251 * possible conflicts. When we insert a possible EOL, the single |
|
252 * look ahead token becomes the EOL, which means the potential conflicts |
|
253 * could no longer be resolved. |
|
254 * Removing these conflicts would make the st syntax parser very convoluted, |
|
255 * and adding the extraneous EOL would make it very cluttered. |
|
256 * This option was therefore dropped in favour of another! |
|
257 * |
|
258 * I ended up implementing (2). Unfortunately the lexical analyser can |
|
259 * not easily distinguish between il and st code, since function |
|
260 * calls in il are very similar to function block calls in st. |
|
261 * We therefore use an extra 'body' state. When the lexical parser |
|
262 * finds that last END_VAR, it enters the body state. This state |
|
263 * must figure out what language is being parsed from the first few |
|
264 * tokens, and switch to the correct state (st or il) according to the |
|
265 * language. This means that we insert quite a bit of knowledge of the |
|
266 * syntax of the languages into the lexical parser. This is ugly, but it |
|
267 * works, and at least it is possible to keep all the state changes together |
|
268 * to make it easier to remove them later on if need be. |
|
269 * The body state returns any matched text back to the buffer with unput(), |
|
270 * to be later matched correctly by the apropriate language parser (st or il). |
|
271 * The state machine has 6 possible states (INITIAL, config, decl, body, st, il) |
|
272 * Possible state changes are: |
|
273 * INITIAL -> decl (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found, |
|
274 * and followed by a VAR declaration) |
|
275 * INITIAL -> body (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found, |
|
276 * and _not_ followed by a VAR declaration) |
|
277 * INITIAL -> config (when a CONFIGURATION is found) |
|
278 * decl -> body (when the last END_VAR is found, i.e. the function body starts) |
|
279 * body -> st (when it figures out it is parsing st language) |
|
280 * body -> il (when it figures out it is parsing il language) |
|
281 * decl -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) |
|
282 * st -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) |
|
283 * il -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) |
|
284 * config -> INITIAL (when a END_CONFIGURATION is found) |
|
285 */ |
|
286 /* we are parsing a configuration. */ |
|
287 %s config |
|
288 |
|
289 /* we are parsing a function, program or function block declaration */ |
|
290 %s decl |
|
291 |
|
292 /* we will be parsing a function body. Whether il/st is remains unknown */ |
|
293 %x body |
|
294 |
|
295 /* we are parsing il code -> flex must return the EOL tokens! */ |
|
296 %s il |
|
297 |
|
298 /* we are parsing st code -> flex must not return the EOL tokens! */ |
|
299 %s st |
|
300 |
|
301 |
|
302 |
|
303 |
|
304 /*******************/ |
|
305 /* File #include's */ |
|
306 /*******************/ |
|
307 |
|
308 /* We extend the IEC 61131-3 standard syntax to allow inclusion |
|
309 * of other files, using the IEC 61131-3 pragma directive... |
|
310 * The accepted syntax is: |
|
311 * {#include "<filename>"} |
|
312 */ |
|
313 |
|
314 /* the "include" states are used for picking up the name of an include file */ |
|
315 %x include_beg |
|
316 %x include_filename |
|
317 %x include_end |
|
318 |
|
319 |
|
320 file_include_pragma_filename [^\"]* |
|
321 file_include_pragma_beg "{#include"{st_whitespace_only}\" |
|
322 file_include_pragma_end \"{st_whitespace_only}"}" |
|
323 file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end} |
|
324 |
|
325 |
|
326 %{ |
|
327 #define MAX_INCLUDE_DEPTH 16 |
|
328 |
|
329 typedef struct { |
|
330 YY_BUFFER_STATE buffer_state; |
|
331 int lineno; |
|
332 const char *filename; |
|
333 } include_stack_t; |
|
334 |
|
335 include_stack_t include_stack[MAX_INCLUDE_DEPTH]; |
|
336 int include_stack_ptr = 0; |
|
337 |
|
338 const char *INCLUDE_DIRECTORIES[] = { |
|
339 "", |
|
340 "lib/", |
|
341 "/lib/", |
|
342 "/usr/lib/", |
|
343 "/usr/lib/iec/", |
|
344 LIBDIRECTORY "/", |
|
345 NULL /* must end with NULL!! */ |
|
346 }; |
|
347 |
|
348 |
|
349 /* |
|
350 * Join two strings together. Allocate space with malloc(3). |
|
351 */ |
|
352 static char *strdup2(const char *a, const char *b) { |
|
353 char *res = (char *)malloc(strlen(a) + strlen(b) + 1); |
|
354 |
|
355 if (!res) |
|
356 return NULL; |
|
357 return strcat(strcpy(res, a), b); /* safe, actually */ |
|
358 } |
|
359 %} |
|
360 |
|
361 |
|
362 |
|
363 /*****************************/ |
|
364 /* Prelimenary constructs... */ |
|
365 /*****************************/ |
|
366 |
|
367 |
|
368 /* A pragma... */ |
|
369 |
|
370 pragma "{"[^}]*"}" |
|
371 |
|
372 /* NOTE: this seemingly unnecessary complex definition is required |
|
373 * to be able to eat up comments such as: |
|
374 * '(* Testing... ! ***** ******)' |
|
375 * without using the trailing context command in flex (/{context}) |
|
376 * since {comment} itself will later be used with |
|
377 * trailing context ({comment}/{context}) |
|
378 */ |
|
379 not_asterisk [^*] |
|
380 not_close_parenthesis_nor_asterisk [^*)] |
|
381 asterisk "*" |
|
382 comment_text {not_asterisk}|(({asterisk}+){not_close_parenthesis_nor_asterisk}) |
|
383 |
|
384 comment "(*"({comment_text}*)({asterisk}+)")" |
|
385 |
|
386 |
|
387 /* |
|
388 3.1 Whitespace |
|
389 (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!) |
|
390 No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.). |
|
391 The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace. |
|
392 On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple3.1 Whitespace |
|
393 (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!) |
|
394 No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.). |
|
395 The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace. |
|
396 On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple lines, which means that the newline character is being considered as whitespace. |
|
397 Our implementation works around this issue by including the new line character in the whitespace while parsing function declarations and the ST language, and parsing it as the EOL token only while parsing IL language statements. This requires the use of a state machine in the lexical parser that needs at least some knowledge of the syntax itself. |
|
398 */ |
|
399 /* NOTE: Our definition of whitespace will only work in ASCII! |
|
400 * |
|
401 * Since the IL language needs to know the location of newline |
|
402 * (token EOL -> '\n' ), we need one definition of whitespace |
|
403 * for each language... |
|
404 */ |
|
405 /* |
|
406 * NOTE: we cannot use |
|
407 * st_whitespace [:space:]* |
|
408 * since we use {st_whitespace} as trailing context. In our case |
|
409 * this would not constitute "dangerous trailing context", but the |
|
410 * lexical generator (i.e. flex) does not know this (since it does |
|
411 * not know which characters belong to the set [:space:]), and will |
|
412 * generate a "dangerous trailing context" warning! |
|
413 * We use this alternative just to stop the flex utility from |
|
414 * generating the invalid (in this case) warning... |
|
415 */ |
|
416 |
|
417 st_whitespace_only [ \f\n\r\t\v]* |
|
418 il_whitespace_only [ \f\r\t\v]* |
|
419 |
|
420 st_whitespace_text {st_whitespace_only}|{comment}|{pragma} |
|
421 il_whitespace_text {il_whitespace_only}|{comment}|{pragma} |
|
422 |
|
423 st_whitespace {st_whitespace_text}* |
|
424 il_whitespace {il_whitespace_text}* |
|
425 |
|
426 st_whitespace_text_no_pragma {st_whitespace_only}|{comment} |
|
427 il_whitespace_text_no_pragma {il_whitespace_only}|{comment} |
|
428 |
|
429 st_whitespace_no_pragma {st_whitespace_text_no_pragma}* |
|
430 il_whitespace_no_pragma {il_whitespace_text_no_pragma}* |
|
431 |
|
432 qualified_identifier {identifier}(\.{identifier})? |
|
433 |
|
434 |
|
435 |
|
436 /*****************************************/ |
|
437 /* B.1.1 Letters, digits and identifiers */ |
|
438 /*****************************************/ |
|
439 /* NOTE: The following definitions only work if the host computer |
|
440 * is using the ASCII maping. For e.g., with EBCDIC [A-Z] |
|
441 * contains non-alphabetic characters! |
|
442 * The correct way of doing it would be to use |
|
443 * the [:upper:] etc... definitions. |
|
444 * |
|
445 * Unfortunately, further on we need all printable |
|
446 * characters (i.e. [:print:]), but excluding '$'. |
|
447 * Flex does not allow sets to be composed by excluding |
|
448 * elements. Sets may only be constructed by adding new |
|
449 * elements, which means that we have to revert to |
|
450 * [\x20\x21\x23\x25\x26\x28-x7E] for the definition |
|
451 * of the printable characters with the required exceptions. |
|
452 * The above also implies the use of ASCII, but now we have |
|
453 * no way to work around it| |
|
454 * |
|
455 * The conclusion is that our parser is limited to ASCII |
|
456 * based host computers!! |
|
457 */ |
|
458 letter [A-Za-z] |
|
459 digit [0-9] |
|
460 octal_digit [0-7] |
|
461 hex_digit {digit}|[A-F] |
|
462 identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*) |
|
463 |
|
464 |
|
465 /*******************/ |
|
466 /* B.1.2 Constants */ |
|
467 /*******************/ |
|
468 |
|
469 /******************************/ |
|
470 /* B.1.2.1 Numeric literals */ |
|
471 /******************************/ |
|
472 integer {digit}((_?{digit})*) |
|
473 binary_integer 2#{bit}((_?{bit})*) |
|
474 bit [0-1] |
|
475 octal_integer 8#{octal_digit}((_?{octal_digit})*) |
|
476 hex_integer 16#{hex_digit}((_?{hex_digit})*) |
|
477 exponent [Ee]([+-]?){integer} |
|
478 /* The correct definition for real would be: |
|
479 * real {integer}\.{integer}({exponent}?) |
|
480 * |
|
481 * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as: |
|
482 * fixed_point {integer}\.{integer} |
|
483 * |
|
484 * This means that {integer}\.{integer} could be interpreted |
|
485 * as either a fixed_point or a real. |
|
486 * I have opted to interpret {integer}\.{integer} as a fixed_point. |
|
487 * In order to do this, the definition of real has been changed to: |
|
488 * real {integer}\.{integer}{exponent} |
|
489 * |
|
490 * This means that the syntax parser now needs to define a real to be |
|
491 * either a real_token or a fixed_point_token! |
|
492 */ |
|
493 real {integer}\.{integer}{exponent} |
|
494 |
|
495 |
|
496 /*******************************/ |
|
497 /* B.1.2.2 Character Strings */ |
|
498 /*******************************/ |
|
499 /* |
|
500 common_character_representation := |
|
501 <any printable character except '$', '"' or "'"> |
|
502 |'$$' |
|
503 |'$L'|'$N'|'$P'|'$R'|'$T' |
|
504 |'$l'|'$n'|'$p'|'$r'|'$t' |
|
505 |
|
506 NOTE: $ = 0x24 |
|
507 " = 0x22 |
|
508 ' = 0x27 |
|
509 |
|
510 printable chars in ASCII: 0x20-0x7E |
|
511 */ |
|
512 |
|
513 esc_char_u $L|$N|$P|$R|$T |
|
514 esc_char_l $l|$n|$p|$r|$t |
|
515 esc_char $$|{esc_char_u}|{esc_char_l} |
|
516 double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit}) |
|
517 single_byte_char (${hex_digit}{hex_digit}) |
|
518 |
|
519 /* WARNING: |
|
520 * This definition is only valid in ASCII... |
|
521 * |
|
522 * Flex includes the function print_char() that defines |
|
523 * all printable characters portably (i.e. whatever character |
|
524 * encoding is currently being used , ASCII, EBCDIC, etc...) |
|
525 * Unfortunately, we cannot generate the definition of |
|
526 * common_character_representation portably, since flex |
|
527 * does not allow definition of sets by subtracting |
|
528 * elements in one set from another set. |
|
529 * This means we must build up the defintion of |
|
530 * common_character_representation using only set addition, |
|
531 * which leaves us with the only choice of defining the |
|
532 * characters non-portably... |
|
533 */ |
|
534 common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char} |
|
535 double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation} |
|
536 single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation} |
|
537 |
|
538 |
|
539 double_byte_character_string \"({double_byte_character_representation}*)\" |
|
540 single_byte_character_string '({single_byte_character_representation}*)' |
|
541 |
|
542 |
|
543 /************************/ |
|
544 /* B 1.2.3.1 - Duration */ |
|
545 /************************/ |
|
546 fixed_point {integer}\.{integer} |
|
547 |
|
548 fixed_point_d {fixed_point}d |
|
549 integer_d {integer}d |
|
550 |
|
551 fixed_point_h {fixed_point}h |
|
552 integer_h {integer}h |
|
553 |
|
554 fixed_point_m {fixed_point}m |
|
555 integer_m {integer}m |
|
556 |
|
557 fixed_point_s {fixed_point}s |
|
558 integer_s {integer}s |
|
559 |
|
560 fixed_point_ms {fixed_point}ms |
|
561 integer_ms {integer}ms |
|
562 |
|
563 |
|
564 /********************************************/ |
|
565 /* B.1.4.1 Directly Represented Variables */ |
|
566 /********************************************/ |
|
567 /* The correct definition, if the standard were to be followed... */ |
|
568 /* |
|
569 location_prefix [IQM] |
|
570 size_prefix [XBWDL] |
|
571 direct_variable %{location_prefix}({size_prefix}?){integer}((.{integer})*) |
|
572 */ |
|
573 |
|
574 /* For the MatPLC, we will accept %<identifier> |
|
575 * as a direct variable, this being mapped onto the MatPLC point |
|
576 * named <identifier> |
|
577 */ |
|
578 /* TODO: we should not restrict it to only the accepted syntax |
|
579 * of <identifier> as specified by the standard. MatPLC point names |
|
580 * have a more permissive syntax. |
|
581 * |
|
582 * e.g. "P__234" |
|
583 * Is a valid MatPLC point name, but not a valid <identifier> !! |
|
584 * The same happens with names such as "333", "349+23", etc... |
|
585 * How can we handle these more expressive names in our case? |
|
586 * Remember that some direct variable may remain anonymous, with |
|
587 * declarations such as: |
|
588 * VAR |
|
589 * AT %I3 : BYTE := 255; |
|
590 * END_VAR |
|
591 * in which case we are currently using "I3" as the variable |
|
592 * name. For the other names, this would create havoc!!! |
|
593 */ |
|
594 direct_variable %{identifier} |
|
595 |
|
596 /******************************************/ |
|
597 /* B 1.4.3 - Declaration & Initialisation */ |
|
598 /******************************************/ |
|
599 incompl_location %[IQM]\* |
|
600 |
|
601 |
|
602 |
|
603 |
|
604 %% |
|
605 /* fprintf(stderr, "flex: state %d\n", YY_START); */ |
|
606 |
|
607 /*****************************************************/ |
|
608 /*****************************************************/ |
|
609 /*****************************************************/ |
|
610 /***** *****/ |
|
611 /***** *****/ |
|
612 /***** F I R S T T H I N G S F I R S T *****/ |
|
613 /***** *****/ |
|
614 /***** *****/ |
|
615 /*****************************************************/ |
|
616 /*****************************************************/ |
|
617 /*****************************************************/ |
|
618 |
|
619 /*********************************/ |
|
620 /* Handle the pragmas! */ |
|
621 /*********************************/ |
|
622 |
|
623 /* We start off by searching for the pragmas we handle in the lexical parser. */ |
|
624 <INITIAL>{file_include_pragma} unput_text(0); yy_push_state(include_beg); |
|
625 |
|
626 /* Any other pragma we find, we just pass it up to the syntax parser... */ |
|
627 /* Note that the <body> state is exclusive, so we have to include it here too. */ |
|
628 {pragma} {/* return the pragmma without the enclosing '{' and '}' */ |
|
629 yytext[strlen(yytext)-2] = '\0'; |
|
630 yylval.ID=strdup(yytext+1); |
|
631 return pragma_token; |
|
632 } |
|
633 <body>{pragma} {/* return the pragmma without the enclosing '{' and '}' */ |
|
634 yytext[strlen(yytext)-2] = '\0'; |
|
635 yylval.ID=strdup(yytext+1); |
|
636 return pragma_token; |
|
637 } |
|
638 |
|
639 |
|
640 /*********************************/ |
|
641 /* Handle the file includes! */ |
|
642 /*********************************/ |
|
643 <include_beg>{file_include_pragma_beg} BEGIN(include_filename); |
|
644 |
|
645 <include_filename>{file_include_pragma_filename} { |
|
646 /* got the include file name */ |
|
647 int i; |
|
648 |
|
649 if (include_stack_ptr >= MAX_INCLUDE_DEPTH) { |
|
650 fprintf(stderr, "Includes nested too deeply\n"); |
|
651 exit( 1 ); |
|
652 } |
|
653 |
|
654 (include_stack[include_stack_ptr]).buffer_state = YY_CURRENT_BUFFER; |
|
655 (include_stack[include_stack_ptr]).lineno = yylineno; |
|
656 (include_stack[include_stack_ptr]).filename = current_filename; |
|
657 include_stack_ptr++; |
|
658 yylineno = 1; |
|
659 current_filename = strdup(yytext); |
|
660 |
|
661 for (i = 0, yyin = NULL; (INCLUDE_DIRECTORIES[i] != NULL) && (yyin == NULL); i++) { |
|
662 char *full_name = strdup2(INCLUDE_DIRECTORIES[i], yytext); |
|
663 if (full_name == NULL) { |
|
664 fprintf(stderr, "Out of memory!\n"); |
|
665 exit( 1 ); |
|
666 } |
|
667 yyin = fopen(full_name, "r"); |
|
668 free(full_name); |
|
669 } |
|
670 |
|
671 if (!yyin) { |
|
672 fprintf(stderr, "Error opening included file %s\n", yytext); |
|
673 exit( 1 ); |
|
674 } |
|
675 |
|
676 /* switch input buffer to new file... */ |
|
677 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE)); |
|
678 /* switch to whatever state was active before the include file */ |
|
679 yy_pop_state(); |
|
680 /* now process the new file... */ |
|
681 } |
|
682 |
|
683 |
|
684 <<EOF>> { |
|
685 if (--include_stack_ptr < 0) { |
|
686 yyterminate(); |
|
687 } else { |
|
688 yy_delete_buffer(YY_CURRENT_BUFFER); |
|
689 yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state); |
|
690 yylineno = include_stack[include_stack_ptr].lineno; |
|
691 /* removing constness of char *. This is safe actually, |
|
692 * since the only real const char * that is stored on the stack is |
|
693 * the first one (i.e. the opne that gets stored in include_stack[0], |
|
694 * which is never free'd! |
|
695 */ |
|
696 free((char *)current_filename); |
|
697 current_filename = include_stack[include_stack_ptr].filename; |
|
698 yy_push_state(include_end); |
|
699 } |
|
700 } |
|
701 |
|
702 <include_end>{file_include_pragma_end} yy_pop_state(); |
|
703 |
|
704 |
|
705 /*********************************/ |
|
706 /* Handle all the state changes! */ |
|
707 /*********************************/ |
|
708 |
|
709 /* INITIAL -> decl */ |
|
710 <INITIAL>{ |
|
711 /* NOTE: how about functions that do not delcare variables, and go directly to the body??? |
|
712 * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION |
|
713 * must have at least one input argument, so a correct declaration will have at least |
|
714 * one VAR_INPUT ... VAR_END construct! |
|
715 * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK |
|
716 * must have at least one input argument, so a correct declaration will have at least |
|
717 * one VAR_INPUT ... VAR_END construct! |
|
718 * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input |
|
719 * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END |
|
720 * construct! |
|
721 * |
|
722 * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or |
|
723 * FUNCTION_BLOCKs that do not have at least one VAR_END before the body. |
|
724 * If the code has an error, and no VAR_END before the body, we will simply |
|
725 * continue in the <decl> state, untill the end of the FUNCTION, FUNCTION_BLOCK |
|
726 * or PROGAM. |
|
727 */ |
|
728 FUNCTION BEGIN(decl); return FUNCTION; |
|
729 FUNCTION_BLOCK BEGIN(decl); return FUNCTION_BLOCK; |
|
730 PROGRAM BEGIN(decl); return PROGRAM; |
|
731 CONFIGURATION BEGIN(config); return CONFIGURATION; |
|
732 } |
|
733 |
|
734 /* INITIAL -> body */ |
|
735 /* required if the function, program, etc.. has no VAR block! */ |
|
736 <INITIAL>{ |
|
737 FUNCTION BEGIN(body); return FUNCTION; |
|
738 FUNCTION_BLOCK BEGIN(body); return FUNCTION_BLOCK; |
|
739 PROGRAM BEGIN(body); return PROGRAM; |
|
740 } |
|
741 |
|
742 /* decl -> body */ |
|
743 <decl>{ |
|
744 END_VAR{st_whitespace}VAR unput_text(strlen("END_VAR")); return END_VAR; |
|
745 END_VAR{st_whitespace} unput_text(strlen("END_VAR")); BEGIN(body); return END_VAR; |
|
746 } |
|
747 |
|
748 /* body -> (il | st) */ |
|
749 <body>{ |
|
750 {qualified_identifier}{st_whitespace}":=" unput_text(0); BEGIN(st); |
|
751 {qualified_identifier}"[" unput_text(0); BEGIN(st); |
|
752 |
|
753 RETURN unput_text(0); BEGIN(st); |
|
754 IF unput_text(0); BEGIN(st); |
|
755 CASE unput_text(0); BEGIN(st); |
|
756 FOR unput_text(0); BEGIN(st); |
|
757 WHILE unput_text(0); BEGIN(st); |
|
758 REPEAT unput_text(0); BEGIN(st); |
|
759 EXIT unput_text(0); BEGIN(st); |
|
760 |
|
761 |
|
762 {identifier} {int token = get_identifier_token(yytext); |
|
763 if (token == prev_declared_fb_name_token) { |
|
764 /* the code has a call to a function block */ |
|
765 BEGIN(st); |
|
766 } else { |
|
767 BEGIN(il); |
|
768 } |
|
769 unput_text(0); |
|
770 } |
|
771 . unput_text(0); BEGIN(il); |
|
772 |
|
773 } /* end of body lexical parser */ |
|
774 |
|
775 /* (decl | body | il | st) -> INITIAL */ |
|
776 END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; |
|
777 END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; |
|
778 END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; |
|
779 |
|
780 /* config -> INITIAL */ |
|
781 END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; |
|
782 |
|
783 |
|
784 |
|
785 /***************************************/ |
|
786 /* Next is to to remove all whitespace */ |
|
787 /***************************************/ |
|
788 /* NOTE: pragmas are handled right at the beginning... */ |
|
789 |
|
790 <INITIAL,config,decl,st,body>{st_whitespace_no_pragma} /* Eat any whitespace */ |
|
791 <il,body>{il_whitespace_no_pragma} /* Eat any whitespace */ |
|
792 |
|
793 |
|
794 /*****************************************/ |
|
795 /* B.1.1 Letters, digits and identifiers */ |
|
796 /*****************************************/ |
|
797 /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens |
|
798 * On the other hand, the spec does not define them as keywords, |
|
799 * which means they may be re-used for variable names, etc...! |
|
800 * The syntax parser already caters for the possibility of these |
|
801 * tokens being used for variable names in their declarations. |
|
802 * When they are declared, they will be added to the variable symbol table! |
|
803 * Further appearances of these tokens must no longer be parsed |
|
804 * as R1_tokens etc..., but rather as variable_name_tokens! |
|
805 * |
|
806 * That is why the first thing we do with identifiers, even before |
|
807 * checking whether they may be a 'keyword', is to check whether |
|
808 * they have been previously declared as a variable name, |
|
809 * |
|
810 * TODO: how about function names? |
|
811 */ |
|
812 {identifier} {int token = get_identifier_token(yytext); |
|
813 if ((token == prev_declared_variable_name_token) || |
|
814 (token == prev_declared_fb_name_token)) { |
|
815 /* |
|
816 if (token != identifier_token) |
|
817 */ |
|
818 /* NOTE: if we use the above line, then 'MOD' et al must be removed |
|
819 * from the library_symbol_table as a default function name! |
|
820 */ |
|
821 yylval.ID=strdup(yytext); |
|
822 return token; |
|
823 } |
|
824 /* otherwise, leave it for the other lexical parser rules... */ |
|
825 REJECT; |
|
826 } |
|
827 |
|
828 |
|
829 /******************************************************/ |
|
830 /******************************************************/ |
|
831 /******************************************************/ |
|
832 /***** *****/ |
|
833 /***** *****/ |
|
834 /***** N O W D O T H E K E Y W O R D S *****/ |
|
835 /***** *****/ |
|
836 /***** *****/ |
|
837 /******************************************************/ |
|
838 /******************************************************/ |
|
839 /******************************************************/ |
|
840 |
|
841 |
|
842 EN return EN; |
|
843 ENO return ENO; |
|
844 |
|
845 |
|
846 /******************************/ |
|
847 /* B 1.2.1 - Numeric Literals */ |
|
848 /******************************/ |
|
849 TRUE return TRUE; |
|
850 BOOL#1 return TRUE; |
|
851 FALSE return FALSE; |
|
852 BOOL#0 return FALSE; |
|
853 |
|
854 |
|
855 /************************/ |
|
856 /* B 1.2.3.1 - Duration */ |
|
857 /************************/ |
|
858 t# return T_SHARP; |
|
859 T# return T_SHARP; |
|
860 TIME return TIME; |
|
861 |
|
862 |
|
863 /************************************/ |
|
864 /* B 1.2.3.2 - Time of day and Date */ |
|
865 /************************************/ |
|
866 TIME_OF_DAY return TIME_OF_DAY; |
|
867 TOD return TIME_OF_DAY; |
|
868 DATE return DATE; |
|
869 d# return D_SHARP; |
|
870 D# return D_SHARP; |
|
871 DATE_AND_TIME return DATE_AND_TIME; |
|
872 DT return DATE_AND_TIME; |
|
873 |
|
874 |
|
875 /***********************************/ |
|
876 /* B 1.3.1 - Elementary Data Types */ |
|
877 /***********************************/ |
|
878 BYTE return BYTE; |
|
879 WORD return WORD; |
|
880 DWORD return DWORD; |
|
881 LWORD return LWORD; |
|
882 |
|
883 |
|
884 /********************************/ |
|
885 /* B 1.3.2 - Generic data types */ |
|
886 /********************************/ |
|
887 /* Strangely, the following symbols do not seem to be required! */ |
|
888 /* But we include them so they become reserved words, and do not |
|
889 * get passed up to bison as an identifier... |
|
890 */ |
|
891 ANY return ANY; |
|
892 ANY_DERIVED return ANY_DERIVED; |
|
893 ANY_ELEMENTARY return ANY_ELEMENTARY; |
|
894 ANY_MAGNITUDE return ANY_MAGNITUDE; |
|
895 ANY_NUM return ANY_NUM; |
|
896 ANY_REAL return ANY_REAL; |
|
897 ANY_INT return ANY_INT; |
|
898 ANY_BIT return ANY_BIT; |
|
899 ANY_STRING return ANY_STRING; |
|
900 ANY_DATE return ANY_DATE; |
|
901 |
|
902 |
|
903 /********************************/ |
|
904 /* B 1.3.3 - Derived data types */ |
|
905 /********************************/ |
|
906 ":=" return ASSIGN; |
|
907 ".." return DOTDOT; |
|
908 TYPE return TYPE; |
|
909 END_TYPE return END_TYPE; |
|
910 ARRAY return ARRAY; |
|
911 OF return OF; |
|
912 STRUCT return STRUCT; |
|
913 END_STRUCT return END_STRUCT; |
|
914 |
|
915 |
|
916 /*********************/ |
|
917 /* B 1.4 - Variables */ |
|
918 /*********************/ |
|
919 REAL return REAL; |
|
920 LREAL return LREAL; |
|
921 |
|
922 SINT return SINT; |
|
923 INT return INT; |
|
924 DINT return DINT; |
|
925 LINT return LINT; |
|
926 |
|
927 USINT return USINT; |
|
928 UINT return UINT; |
|
929 UDINT return UDINT; |
|
930 ULINT return ULINT; |
|
931 |
|
932 |
|
933 WSTRING return WSTRING; |
|
934 STRING return STRING; |
|
935 BOOL return BOOL; |
|
936 |
|
937 TIME return TIME; |
|
938 DATE return DATE; |
|
939 DT return DT; |
|
940 TOD return TOD; |
|
941 DATE_AND_TIME return DATE_AND_TIME; |
|
942 TIME_OF_DAY return TIME_OF_DAY; |
|
943 |
|
944 |
|
945 /******************************************/ |
|
946 /* B 1.4.3 - Declaration & Initialisation */ |
|
947 /******************************************/ |
|
948 VAR_INPUT return VAR_INPUT; |
|
949 VAR_OUTPUT return VAR_OUTPUT; |
|
950 VAR_IN_OUT return VAR_IN_OUT; |
|
951 VAR_EXTERNAL return VAR_EXTERNAL; |
|
952 VAR_GLOBAL return VAR_GLOBAL; |
|
953 END_VAR return END_VAR; |
|
954 RETAIN return RETAIN; |
|
955 NON_RETAIN return NON_RETAIN; |
|
956 R_EDGE return R_EDGE; |
|
957 F_EDGE return F_EDGE; |
|
958 AT return AT; |
|
959 |
|
960 |
|
961 /***********************/ |
|
962 /* B 1.5.1 - Functions */ |
|
963 /***********************/ |
|
964 FUNCTION return FUNCTION; |
|
965 END_FUNCTION return END_FUNCTION; |
|
966 VAR return VAR; |
|
967 CONSTANT return CONSTANT; |
|
968 |
|
969 |
|
970 /*****************************/ |
|
971 /* B 1.5.2 - Function Blocks */ |
|
972 /*****************************/ |
|
973 FUNCTION_BLOCK return FUNCTION_BLOCK; |
|
974 END_FUNCTION_BLOCK return END_FUNCTION_BLOCK; |
|
975 VAR_TEMP return VAR_TEMP; |
|
976 VAR return VAR; |
|
977 NON_RETAIN return NON_RETAIN; |
|
978 END_VAR return END_VAR; |
|
979 |
|
980 |
|
981 /**********************/ |
|
982 /* B 1.5.3 - Programs */ |
|
983 /**********************/ |
|
984 PROGRAM return PROGRAM; |
|
985 END_PROGRAM return END_PROGRAM; |
|
986 |
|
987 |
|
988 /********************************************/ |
|
989 /* B 1.6 Sequential Function Chart elements */ |
|
990 /********************************************/ |
|
991 /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well |
|
992 .* as other identifiers that may be used as variable names inside IL and ST programs. |
|
993 * They will have to be handled when we include parsing of SFC... For now, simply |
|
994 * ignore them! |
|
995 */ |
|
996 /* |
|
997 ACTION return ACTION; |
|
998 END_ACTION return END_ACTION; |
|
999 |
|
1000 TRANSITION return TRANSITION; |
|
1001 END_TRANSITION return END_TRANSITION; |
|
1002 FROM return FROM; |
|
1003 TO return TO; |
|
1004 PRIORITY return PRIORITY; |
|
1005 |
|
1006 INITIAL_STEP return INITIAL_STEP; |
|
1007 STEP return STEP; |
|
1008 END_STEP return END_STEP; |
|
1009 |
|
1010 L return L; |
|
1011 D return D; |
|
1012 SD return SD; |
|
1013 DS return DS; |
|
1014 SL return SL; |
|
1015 |
|
1016 N return N; |
|
1017 P return P; |
|
1018 |
|
1019 R return R; |
|
1020 S return S; |
|
1021 */ |
|
1022 |
|
1023 |
|
1024 /********************************/ |
|
1025 /* B 1.7 Configuration elements */ |
|
1026 /********************************/ |
|
1027 CONFIGURATION return CONFIGURATION; |
|
1028 END_CONFIGURATION return END_CONFIGURATION; |
|
1029 TASK return TASK; |
|
1030 RESOURCE return RESOURCE; |
|
1031 ON return ON; |
|
1032 END_RESOURCE return END_RESOURCE; |
|
1033 VAR_CONFIG return VAR_CONFIG; |
|
1034 VAR_ACCESS return VAR_ACCESS; |
|
1035 END_VAR return END_VAR; |
|
1036 WITH return WITH; |
|
1037 PROGRAM return PROGRAM; |
|
1038 RETAIN return RETAIN; |
|
1039 NON_RETAIN return NON_RETAIN; |
|
1040 PRIORITY return PRIORITY; |
|
1041 SINGLE return SINGLE; |
|
1042 INTERVAL return INTERVAL; |
|
1043 READ_WRITE return READ_WRITE; |
|
1044 READ_ONLY return READ_ONLY; |
|
1045 |
|
1046 |
|
1047 /***********************************/ |
|
1048 /* B 2.1 Instructions and Operands */ |
|
1049 /***********************************/ |
|
1050 <il>\n return EOL; |
|
1051 |
|
1052 |
|
1053 /*******************/ |
|
1054 /* B 2.2 Operators */ |
|
1055 /*******************/ |
|
1056 /* NOTE: we can't have flex return the same token for |
|
1057 * ANDN and &N, neither for AND and &, since |
|
1058 * AND and ANDN are considered valid variable |
|
1059 * function or functionblock type names! |
|
1060 * This means that the parser may decide that the |
|
1061 * AND or ANDN strings found in the source code |
|
1062 * are being used as variable names |
|
1063 * and not as operators, and will therefore transform |
|
1064 * these tokens into indentifier tokens! |
|
1065 * We can't have the parser thinking that the source |
|
1066 * code contained the string AND (which may be interpreted |
|
1067 * as a vairable name) when in reality the source code |
|
1068 * merely contained the character &, so we use two |
|
1069 * different tokens for & and AND (and similarly |
|
1070 * ANDN and &N)! |
|
1071 */ |
|
1072 LD return LD; |
|
1073 LDN return LDN; |
|
1074 ST return ST; |
|
1075 STN return STN; |
|
1076 NOT return NOT; |
|
1077 S return S; |
|
1078 R return R; |
|
1079 S1 return S1; |
|
1080 R1 return R1; |
|
1081 CLK return CLK; |
|
1082 CU return CU; |
|
1083 CD return CD; |
|
1084 PV return PV; |
|
1085 IN return IN; |
|
1086 PT return PT; |
|
1087 AND return AND; |
|
1088 & return AND2; |
|
1089 OR return OR; |
|
1090 XOR return XOR; |
|
1091 ANDN return ANDN; |
|
1092 &N return ANDN2; |
|
1093 ORN return ORN; |
|
1094 XORN return XORN; |
|
1095 ADD return ADD; |
|
1096 SUB return SUB; |
|
1097 MUL return MUL; |
|
1098 DIV return DIV; |
|
1099 MOD return MOD; |
|
1100 GT return GT; |
|
1101 GE return GE; |
|
1102 EQ return EQ; |
|
1103 LT return LT; |
|
1104 LE return LE; |
|
1105 NE return NE; |
|
1106 CAL return CAL; |
|
1107 CALC return CALC; |
|
1108 CALCN return CALCN; |
|
1109 RET return RET; |
|
1110 RETC return RETC; |
|
1111 RETCN return RETCN; |
|
1112 JMP return JMP; |
|
1113 JMPC return JMPC; |
|
1114 JMPCN return JMPCN; |
|
1115 |
|
1116 |
|
1117 /***********************/ |
|
1118 /* B 3.1 - Expressions */ |
|
1119 /***********************/ |
|
1120 "**" return OPER_EXP; |
|
1121 "<>" return OPER_NE; |
|
1122 ">=" return OPER_GE; |
|
1123 "<=" return OPER_LE; |
|
1124 AND return AND; |
|
1125 XOR return XOR; |
|
1126 OR return OR; |
|
1127 NOT return NOT; |
|
1128 MOD return MOD; |
|
1129 |
|
1130 |
|
1131 /*****************************************/ |
|
1132 /* B 3.2.2 Subprogram Control Statements */ |
|
1133 /*****************************************/ |
|
1134 := return ASSIGN; |
|
1135 => return SENDTO; |
|
1136 RETURN return RETURN; |
|
1137 |
|
1138 |
|
1139 /********************************/ |
|
1140 /* B 3.2.3 Selection Statements */ |
|
1141 /********************************/ |
|
1142 IF return IF; |
|
1143 THEN return THEN; |
|
1144 ELSIF return ELSIF; |
|
1145 ELSE return ELSE; |
|
1146 END_IF return END_IF; |
|
1147 |
|
1148 CASE return CASE; |
|
1149 OF return OF; |
|
1150 ELSE return ELSE; |
|
1151 END_CASE return END_CASE; |
|
1152 |
|
1153 |
|
1154 /********************************/ |
|
1155 /* B 3.2.4 Iteration Statements */ |
|
1156 /********************************/ |
|
1157 FOR return FOR; |
|
1158 TO return TO; |
|
1159 BY return BY; |
|
1160 DO return DO; |
|
1161 END_FOR return END_FOR; |
|
1162 |
|
1163 WHILE return WHILE; |
|
1164 DO return DO; |
|
1165 END_WHILE return END_WHILE; |
|
1166 |
|
1167 REPEAT return REPEAT; |
|
1168 UNTIL return UNTIL; |
|
1169 END_REPEAT return END_REPEAT; |
|
1170 |
|
1171 EXIT return EXIT; |
|
1172 |
|
1173 |
|
1174 |
|
1175 |
|
1176 |
|
1177 /********************************************************/ |
|
1178 /********************************************************/ |
|
1179 /********************************************************/ |
|
1180 /***** *****/ |
|
1181 /***** *****/ |
|
1182 /***** N O W W O R K W I T H V A L U E S *****/ |
|
1183 /***** *****/ |
|
1184 /***** *****/ |
|
1185 /********************************************************/ |
|
1186 /********************************************************/ |
|
1187 /********************************************************/ |
|
1188 |
|
1189 |
|
1190 /********************************************/ |
|
1191 /* B.1.4.1 Directly Represented Variables */ |
|
1192 /********************************************/ |
|
1193 {direct_variable} {yylval.ID=strdup(yytext); return direct_variable_token;} |
|
1194 |
|
1195 |
|
1196 /******************************************/ |
|
1197 /* B 1.4.3 - Declaration & Initialisation */ |
|
1198 /******************************************/ |
|
1199 {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;} |
|
1200 |
|
1201 |
|
1202 /************************/ |
|
1203 /* B 1.2.3.1 - Duration */ |
|
1204 /************************/ |
|
1205 {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;} |
|
1206 |
|
1207 {fixed_point_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} |
|
1208 {integer_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} |
|
1209 |
|
1210 {fixed_point_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} |
|
1211 {integer_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} |
|
1212 |
|
1213 {fixed_point_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} |
|
1214 {integer_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} |
|
1215 |
|
1216 {fixed_point_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} |
|
1217 {integer_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} |
|
1218 |
|
1219 {fixed_point_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} |
|
1220 {integer_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} |
|
1221 |
|
1222 |
|
1223 /*******************************/ |
|
1224 /* B.1.2.2 Character Strings */ |
|
1225 /*******************************/ |
|
1226 {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;} |
|
1227 {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;} |
|
1228 |
|
1229 |
|
1230 /******************************/ |
|
1231 /* B.1.2.1 Numeric literals */ |
|
1232 /******************************/ |
|
1233 {integer} {yylval.ID=strdup(yytext); return integer_token;} |
|
1234 {real} {yylval.ID=strdup(yytext); return real_token;} |
|
1235 {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;} |
|
1236 {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;} |
|
1237 {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;} |
|
1238 |
|
1239 |
|
1240 /*****************************************/ |
|
1241 /* B.1.1 Letters, digits and identifiers */ |
|
1242 /*****************************************/ |
|
1243 <st>{identifier}/({st_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} |
|
1244 <il>{identifier}/({il_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} |
|
1245 {identifier} {yylval.ID=strdup(yytext); |
|
1246 /*printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));*/ |
|
1247 return get_identifier_token(yytext);} |
|
1248 |
|
1249 |
|
1250 |
|
1251 |
|
1252 |
|
1253 |
|
1254 /************************************************/ |
|
1255 /************************************************/ |
|
1256 /************************************************/ |
|
1257 /***** *****/ |
|
1258 /***** *****/ |
|
1259 /***** T H E L E F T O V E R S . . . *****/ |
|
1260 /***** *****/ |
|
1261 /***** *****/ |
|
1262 /************************************************/ |
|
1263 /************************************************/ |
|
1264 /************************************************/ |
|
1265 |
|
1266 /* do the single character tokens... |
|
1267 * |
|
1268 * e.g.: ':' '(' ')' '+' '*' ... |
|
1269 */ |
|
1270 . {return yytext[0];} |
|
1271 |
|
1272 |
|
1273 %% |
|
1274 |
|
1275 |
|
1276 |
|
1277 |
|
1278 |
|
1279 /***********************************/ |
|
1280 /* Utility function definitions... */ |
|
1281 /***********************************/ |
|
1282 |
|
1283 /* print the include file stack to stderr... */ |
|
1284 void print_include_stack(void) { |
|
1285 int i; |
|
1286 |
|
1287 if ((include_stack_ptr - 1) >= 0) |
|
1288 fprintf (stderr, "in file "); |
|
1289 for (i = include_stack_ptr - 1; i >= 0; i--) |
|
1290 fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].lineno); |
|
1291 } |
|
1292 |
|
1293 |
|
1294 /* return all the text in the current token back to the input stream, except the first n chars. */ |
|
1295 void unput_text(unsigned int n) { |
|
1296 /* it seems that flex has a bug in that it will not correctly count the line numbers |
|
1297 * if we return newlines back to the input stream. These newlines will be re-counted |
|
1298 * a second time when they are processed again by flex. |
|
1299 * We therefore determine how many newlines are in the text we are returning, |
|
1300 * and decrement the line counter acordingly... |
|
1301 */ |
|
1302 unsigned int i; |
|
1303 |
|
1304 for (i = n; i < strlen(yytext); i++) |
|
1305 if (yytext[i] == '\n') |
|
1306 yylineno--; |
|
1307 |
|
1308 /* now return all the text back to the input stream... */ |
|
1309 yyless(n); |
|
1310 } |
|
1311 |
|
1312 |
|
1313 /* Called by flex when it reaches the end-of-file */ |
|
1314 int yywrap(void) |
|
1315 { |
|
1316 /* We reached the end of the input file... */ |
|
1317 |
|
1318 /* Should we continue with another file? */ |
|
1319 /* If so: |
|
1320 * open the new file... |
|
1321 * return 0; |
|
1322 */ |
|
1323 |
|
1324 /* to we stop processing... |
|
1325 * |
|
1326 * return 1; |
|
1327 */ |
|
1328 |
|
1329 |
|
1330 return 1; /* Stop scanning at end of input file. */ |
|
1331 } |
|
1332 |
|
1333 |
|
1334 |
|
1335 /*************************************/ |
|
1336 /* Include a main() function to test */ |
|
1337 /* the token parsing by flex.... */ |
|
1338 /*************************************/ |
|
1339 #ifdef TEST_MAIN |
|
1340 |
|
1341 #include "../util/symtable.hh" |
|
1342 |
|
1343 yystype yylval; |
|
1344 YYLTYPE yylloc; |
|
1345 |
|
1346 const char *current_filename; |
|
1347 |
|
1348 int get_identifier_token(const char *identifier_str) {return 0;} |
|
1349 |
|
1350 |
|
1351 |
|
1352 |
|
1353 |
|
1354 int main(int argc, char **argv) { |
|
1355 |
|
1356 FILE *in_file; |
|
1357 int res; |
|
1358 |
|
1359 if (argc == 1) { |
|
1360 /* Work as an interactive (command line) parser... */ |
|
1361 while((res=yylex())) |
|
1362 fprintf(stderr, "(line %d)token: %d\n", yylineno, res); |
|
1363 } else { |
|
1364 /* Work as non-interactive (file) parser... */ |
|
1365 if((in_file = fopen(argv[1], "r")) == NULL) { |
|
1366 char *errmsg = strdup2("Error opening main file ", argv[1]); |
|
1367 perror(errmsg); |
|
1368 free(errmsg); |
|
1369 return -1; |
|
1370 } |
|
1371 |
|
1372 /* parse the file... */ |
|
1373 yyin = in_file; |
|
1374 current_filename = argv[1]; |
|
1375 while(1) { |
|
1376 res=yylex(); |
|
1377 fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID); |
|
1378 } |
|
1379 } |
|
1380 |
|
1381 return 0; |
|
1382 |
|
1383 } |
|
1384 #endif |