|
1 /* |
|
2 * matiec - a compiler for the programming languages defined in IEC 61131-3 |
|
3 * |
|
4 * Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt) |
|
5 * |
|
6 * This program is free software: you can redistribute it and/or modify |
|
7 * it under the terms of the GNU General Public License as published by |
|
8 * the Free Software Foundation, either version 3 of the License, or |
|
9 * (at your option) any later version. |
|
10 * |
|
11 * This program is distributed in the hope that it will be useful, |
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14 * GNU General Public License for more details. |
|
15 * |
|
16 * You should have received a copy of the GNU General Public License |
|
17 * along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
18 * |
|
19 * |
|
20 * This code is made available on the understanding that it will not be |
|
21 * used in safety-critical situations without a full and competent review. |
|
22 */ |
|
23 |
|
24 /* |
|
25 * An IEC 61131-3 compiler. |
|
26 * |
|
27 * Based on the |
|
28 * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10) |
|
29 * |
|
30 */ |
|
31 |
|
32 /* |
|
33 * Stage 1 |
|
34 * ======= |
|
35 * |
|
36 * This file contains the lexical tokens definitions, from which |
|
37 * the flex utility will generate a lexical parser function. |
|
38 */ |
|
39 |
|
40 |
|
41 |
|
42 |
|
43 /*****************************/ |
|
44 /* Lexical Parser Options... */ |
|
45 /*****************************/ |
|
46 |
|
47 /* The lexical analyser will never work in interactive mode, |
|
48 * i.e., it will only process programs saved to files, and never |
|
49 * programs being written inter-actively by the user. |
|
50 * This option saves the resulting parser from calling the |
|
51 * isatty() function, that seems to be generating some compile |
|
52 * errors under some (older?) versions of flex. |
|
53 */ |
|
54 %option never-interactive |
|
55 |
|
56 /* Have the lexical analyser use a 'char *yytext' instead of an |
|
57 * array of char 'char yytext[??]' to store the lexical token. |
|
58 */ |
|
59 %pointer |
|
60 |
|
61 |
|
62 /* Have the lexical analyser ignore the case of letters. |
|
63 * This will occur for all the tokens and keywords, but |
|
64 * the resulting text handed up to the syntax parser |
|
65 * will not be changed, and keep the original case |
|
66 * of the letters in the input file. |
|
67 */ |
|
68 %option case-insensitive |
|
69 |
|
70 /* Have the generated lexical analyser keep track of the |
|
71 * line number it is currently analysing. |
|
72 * This is used to pass up to the syntax parser |
|
73 * the number of the line on which the current |
|
74 * token was found. It will enable the syntax parser |
|
75 * to generate more informatve error messages... |
|
76 */ |
|
77 %option yylineno |
|
78 |
|
79 /* required for the use of the yy_pop_state() and |
|
80 * yy_push_state() functions |
|
81 */ |
|
82 %option stack |
|
83 |
|
84 /* The '%option stack' also requests the inclusion of |
|
85 * the yy_top_state(), however this function is not |
|
86 * currently being used. This means that the compiler |
|
87 * is complaining about the existance of this function. |
|
88 * The following option removes the yy_top_state() |
|
89 * function from the resulting c code, so the compiler |
|
90 * no longer complains. |
|
91 */ |
|
92 %option noyy_top_state |
|
93 |
|
94 /* We will not be using unput() in our flex code... */ |
|
95 %option nounput |
|
96 |
|
97 /**************************************************/ |
|
98 /* External Variable and Function declarations... */ |
|
99 /**************************************************/ |
|
100 |
|
101 |
|
102 %{ |
|
103 /* Define TEST_MAIN to include a main() function. |
|
104 * Useful for testing the parser generated by flex. |
|
105 */ |
|
106 /* |
|
107 #define TEST_MAIN |
|
108 */ |
|
109 /* If lexical parser is compiled by itself, we need to define the following |
|
110 * constant to some string. Under normal circumstances LIBDIRECTORY is set |
|
111 * in the syntax parser header file... |
|
112 */ |
|
113 #ifdef TEST_MAIN |
|
114 #define DEFAULT_LIBDIR "just_testing" |
|
115 #endif |
|
116 |
|
117 |
|
118 |
|
119 /* Required for strdup() */ |
|
120 #include <string.h> |
|
121 |
|
122 /* Required only for the declaration of abstract syntax classes |
|
123 * (class symbol_c; class token_c; class list_c;) |
|
124 * These will not be used in flex, but the token type union defined |
|
125 * in iec_bison.h contains pointers to these classes, so we must include |
|
126 * it here. |
|
127 */ |
|
128 #include "../absyntax/absyntax.hh" |
|
129 |
|
130 |
|
131 /* iec_bison.h is generated by bison. |
|
132 * Contains the definition of the token constants, and the |
|
133 * token value type YYSTYPE (in our case, a 'const char *') |
|
134 */ |
|
135 #include "iec_bison.h" |
|
136 #include "stage1_2_priv.hh" |
|
137 |
|
138 |
|
139 /* Variable defined by the bison parser, |
|
140 * where the value of the tokens will be stored |
|
141 */ |
|
142 extern YYSTYPE yylval; |
|
143 |
|
144 /* The name of the file currently being parsed... |
|
145 * This variable is declared and read from the code generated by bison! |
|
146 * Note that flex accesses and updates this global variable |
|
147 * apropriately whenever it comes across an (*#include <filename> *) |
|
148 * directive... |
|
149 */ |
|
150 /* |
|
151 NOTE: already defined in iec_bison.h |
|
152 extern const char *current_filename; |
|
153 */ |
|
154 |
|
155 |
|
156 /* We will not be using unput() in our flex code... */ |
|
157 /* NOTE: it seems that this #define is no longer needed, It has been |
|
158 * replaced by %option nounput. |
|
159 * Should we simply delete it? |
|
160 * For now leave it in, in case someone is using an old version of flex. |
|
161 * In any case, the most harm that can result in a warning message |
|
162 * when compiling iec.flex.c: |
|
163 * warning: ‘void yyunput(int, char*)’ defined but not used |
|
164 */ |
|
165 #define YY_NO_UNPUT |
|
166 |
|
167 /* Variable defined by the bison parser. |
|
168 * It must be initialised with the location |
|
169 * of the token being parsed. |
|
170 * This is only needed if we want to keep |
|
171 * track of the locations, in order to give |
|
172 * more meaningful error messages! |
|
173 */ |
|
174 extern YYLTYPE yylloc; |
|
175 |
|
176 #define YY_INPUT(buf,result,max_size) {\ |
|
177 result = GetNextChar(buf, max_size);\ |
|
178 if ( result <= 0 )\ |
|
179 result = YY_NULL;\ |
|
180 } |
|
181 |
|
182 |
|
183 /* A counter to track the order by which each token is processed. |
|
184 * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token). |
|
185 * i.e.. it may get incremented by more than one between two consecutive tokens. |
|
186 * This is due to the fact that the counter gets incremented every 'user action' in flex, |
|
187 * however not every user action will result in a token being passed to bison. |
|
188 * Nevertheless this is still OK, as we are only interested in the relative |
|
189 * ordering of tokens... |
|
190 */ |
|
191 static long int current_order = 0; |
|
192 |
|
193 |
|
194 /* Macro that is executed for every action. |
|
195 * We use it to pass the location of the token |
|
196 * back to the bison parser... |
|
197 */ |
|
198 #define YY_USER_ACTION {\ |
|
199 yylloc.first_line = current_tracking->lineNumber; \ |
|
200 yylloc.first_column = current_tracking->currentTokenStart; \ |
|
201 yylloc.first_file = current_filename; \ |
|
202 yylloc.first_order = current_order; \ |
|
203 yylloc.last_line = current_tracking->lineNumber; \ |
|
204 yylloc.last_column = current_tracking->currentChar - 1; \ |
|
205 yylloc.last_file = current_filename; \ |
|
206 yylloc.last_order = current_order; \ |
|
207 current_tracking->currentTokenStart = current_tracking->currentChar; \ |
|
208 current_order++; \ |
|
209 } |
|
210 |
|
211 |
|
212 /* Since this lexical parser we defined only works in ASCII based |
|
213 * systems, we might as well make sure it is being compiled on |
|
214 * one... |
|
215 * Lets check a few random characters... |
|
216 */ |
|
217 #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \ |
|
218 ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B)) |
|
219 #error This lexical analyser is not portable to a non ASCII based system. |
|
220 #endif |
|
221 |
|
222 |
|
223 /* Function only called from within flex, but defined |
|
224 * in iec.y! |
|
225 * We declare it here... |
|
226 * |
|
227 * Search for a symbol in either of the two symbol tables |
|
228 * and return the token id of the first symbol found. |
|
229 * Searches first in the variables, and only if not found |
|
230 * does it continue searching in the library elements |
|
231 */ |
|
232 //token_id_t get_identifier_token(const char *identifier_str); |
|
233 int get_identifier_token(const char *identifier_str); |
|
234 %} |
|
235 |
|
236 |
|
237 /***************************************************/ |
|
238 /* Forward Declaration of functions defined later. */ |
|
239 /***************************************************/ |
|
240 |
|
241 %{ |
|
242 /* return all the text in the current token back to the input stream. */ |
|
243 void unput_text(unsigned int n); |
|
244 %} |
|
245 |
|
246 |
|
247 |
|
248 /****************************/ |
|
249 /* Lexical Parser States... */ |
|
250 /****************************/ |
|
251 |
|
252 /* NOTE: Our psrser can parse st or il code, intermixed |
|
253 * within the same file. |
|
254 * With IL we come across the issue of the EOL (end of line) token. |
|
255 * ST, and the declaration parts of IL do not use this token! |
|
256 * If the lexical analyser were to issue this token during ST |
|
257 * language parsing, or during the declaration of data types, |
|
258 * function headers, etc. in IL, the syntax parser would crash. |
|
259 * |
|
260 * We can solve this issue using one of three methods: |
|
261 * (1) Augment all the syntax that does not accept the EOL |
|
262 * token to simply ignore it. This makes the syntax |
|
263 * definition (in iec.y) very cluttered! |
|
264 * (2) Let the lexical parser figure out which language |
|
265 * it is parsing, and decide whether or not to issue |
|
266 * the EOL token. This requires the lexical parser |
|
267 * to have knowledge of the syntax!, making for a poor |
|
268 * overall organisation of the code. It would also make it |
|
269 * very difficult to understand the lexical parser as it |
|
270 * would use several states, and a state machine to transition |
|
271 * between the states. The state transitions would be |
|
272 * intermingled with the lexical parser defintion! |
|
273 * (3) Use a mixture of (1) and (2). The lexical analyser |
|
274 * merely distinguishes between function headers and function |
|
275 * bodies, but no longer makes a distinction between il and |
|
276 * st language bodies. When parsing a body, it will return |
|
277 * the EOL token. In other states '\n' will be ignored as |
|
278 * whitespace. |
|
279 * The ST language syntax has been augmented in the syntax |
|
280 * parser configuration to ignore any EOL tokens that it may |
|
281 * come across! |
|
282 * This option has both drawbacks of option (1) and (2), but |
|
283 * much less intensely. |
|
284 * The syntax that gets cluttered is limited to the ST statements |
|
285 * (which is rather limited, compared to the function headers and |
|
286 * data type declarations, etc...), while the state machine in |
|
287 * the lexical parser becomes very simple. All state transitions |
|
288 * can be handled within the lexical parser by itself, and can be |
|
289 * easily identified. Thus knowledge of the syntax required by |
|
290 * the lexical parser is very limited! |
|
291 * |
|
292 * Amazingly enough, I (Mario) got to implement option (3) |
|
293 * at first, requiring two basic states, decl and body. |
|
294 * The lexical parser will enter the body state when |
|
295 * it is parsing the body of a function/program/function block. The |
|
296 * state transition is done when we find a VAR_END that is not followed |
|
297 * by a VAR! This is the syntax knowledge that gets included in the |
|
298 * lexical analyser with this option! |
|
299 * Unfortunately, getting the st syntax parser to ignore EOL anywhere |
|
300 * where they might appear leads to conflicts. This is due to the fact |
|
301 * that the syntax parser uses the single look-ahead token to remove |
|
302 * possible conflicts. When we insert a possible EOL, the single |
|
303 * look ahead token becomes the EOL, which means the potential conflicts |
|
304 * could no longer be resolved. |
|
305 * Removing these conflicts would make the st syntax parser very convoluted, |
|
306 * and adding the extraneous EOL would make it very cluttered. |
|
307 * This option was therefore dropped in favour of another! |
|
308 * |
|
309 * I ended up implementing (2). Unfortunately the lexical analyser can |
|
310 * not easily distinguish between il and st code, since function |
|
311 * calls in il are very similar to function block calls in st. |
|
312 * We therefore use an extra 'body' state. When the lexical parser |
|
313 * finds that last END_VAR, it enters the body state. This state |
|
314 * must figure out what language is being parsed from the first few |
|
315 * tokens, and switch to the correct state (st, il or sfc) according to the |
|
316 * language. This means that we insert quite a bit of knowledge of the |
|
317 * syntax of the languages into the lexical parser. This is ugly, but it |
|
318 * works, and at least it is possible to keep all the state changes together |
|
319 * to make it easier to remove them later on if need be. |
|
320 * Once the language being parsed has been identified, |
|
321 * the body state returns any matched text back to the buffer with unput(), |
|
322 * to be later matched correctly by the apropriate language parser (st, il or sfc). |
|
323 * |
|
324 * Aditionally, in sfc state it may further recursively enter the body state |
|
325 * once again. This is because an sfc body may contain ACTIONS, which are then |
|
326 * written in one of the three languages (ST, IL or SFC), so once again we need |
|
327 * to figure out which language the ACTION in the SFC was written in. We already |
|
328 * ahve all that done in the body state, so we recursively transition to the body |
|
329 * state once again. |
|
330 * Note that in this case, when coming out of the st/il state (whichever language |
|
331 * the action was written in) the sfc state will become active again. This is done by |
|
332 * pushing and poping the previously active state! |
|
333 * |
|
334 * The sfc_qualifier_state is required because when parsing actions within an |
|
335 * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order |
|
336 * to bison to work correctly, these qualifiers must be returned as tokens. However, |
|
337 * these tokens are not reserved keywords, which means it should be possible to |
|
338 * define variables/functions/FBs with any of these names (including |
|
339 * S and R which are special because they are also IL operators). So, when we are not |
|
340 * expecting any action qualifiers, flex does not return these tokens, and is free |
|
341 * to interpret them as previously defined variables/functions/... as the case may be. |
|
342 * |
|
343 * The state machine has 7 possible states (INITIAL, config, decl, body, st, il, sfc) |
|
344 * Possible state changes are: |
|
345 * INITIAL -> goto(decl_state) |
|
346 * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found, |
|
347 * and followed by a VAR declaration) |
|
348 * INITIAL -> goto(body_state) |
|
349 * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found, |
|
350 * and _not_ followed by a VAR declaration) |
|
351 * (This transition is actually commented out, since the syntax |
|
352 * does not allow the declaration of functions, FBs, or programs |
|
353 * without any VAR declaration!) |
|
354 * INITIAL -> goto(config_state) |
|
355 * (when a CONFIGURATION is found) |
|
356 * decl_state -> push(decl_state); goto(body_state) |
|
357 * (when the last END_VAR is found, i.e. the function body starts) |
|
358 * decl_state -> push(decl_state); goto(sfc_state) |
|
359 * (when it figures out it is parsing sfc language) |
|
360 * body_state -> goto(st_state) |
|
361 * (when it figures out it is parsing st language) |
|
362 * body_state -> goto(il_state) |
|
363 * (when it figures out it is parsing il language) |
|
364 * st_state -> pop() |
|
365 * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, |
|
366 * END_ACTION or END_TRANSITION is found) |
|
367 * il_state -> pop() |
|
368 * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, |
|
369 * END_ACTION or END_TRANSITION is found) |
|
370 * decl_state -> goto(INITIAL) |
|
371 * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) |
|
372 * sfc_state -> goto(INITIAL) |
|
373 * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) |
|
374 * config_state -> goto(INITIAL) |
|
375 * (when a END_CONFIGURATION is found) |
|
376 * sfc_state -> push(sfc_state); goto(body_state) |
|
377 * (when parsing an action. This transition is requested by bison) |
|
378 * sfc_state -> push(sfc_state); goto(sfc_qualifier_state) |
|
379 * (when expecting an action qualifier. This transition is requested by bison) |
|
380 * sfc_qualifier_state -> pop() |
|
381 * (when no longer expecting an action qualifier. This transition is requested by bison) |
|
382 * config_state -> push(config_state); goto(task_init_state) |
|
383 * (when parsing a task initialisation. This transition is requested by bison) |
|
384 * task_init_state -> pop() |
|
385 * (when no longer parsing task initialisation parameters. This transition is requested by bison) |
|
386 * |
|
387 */ |
|
388 |
|
389 |
|
390 /* we are parsing a configuration. */ |
|
391 %s config_state |
|
392 |
|
393 /* Inside a configuration, we are parsing a task initialisation parameters */ |
|
394 /* This means that PRIORITY, SINGLE and INTERVAL must be handled as |
|
395 * tokens, and not as possible identifiers. Note that the above words |
|
396 * are not keywords. |
|
397 */ |
|
398 %s task_init_state |
|
399 |
|
400 /* we are parsing a function, program or function block declaration */ |
|
401 %s decl_state |
|
402 |
|
403 /* we will be parsing a function body. Whether il/st is remains unknown */ |
|
404 %x body_state |
|
405 |
|
406 /* we are parsing il code -> flex must return the EOL tokens! */ |
|
407 %s il_state |
|
408 |
|
409 /* we are parsing st code -> flex must not return the EOL tokens! */ |
|
410 %s st_state |
|
411 |
|
412 /* we are parsing sfc code -> flex must not return the EOL tokens! */ |
|
413 %s sfc_state |
|
414 |
|
415 /* we are parsing sfc code, and expecting an action qualifier. */ |
|
416 %s sfc_qualifier_state |
|
417 |
|
418 /* we are parsing sfc code, and expecting the priority token. */ |
|
419 %s sfc_priority_state |
|
420 |
|
421 |
|
422 |
|
423 |
|
424 /*******************/ |
|
425 /* File #include's */ |
|
426 /*******************/ |
|
427 |
|
428 /* We extend the IEC 61131-3 standard syntax to allow inclusion |
|
429 * of other files, using the IEC 61131-3 pragma directive... |
|
430 * The accepted syntax is: |
|
431 * {#include "<filename>"} |
|
432 */ |
|
433 |
|
434 /* the "include" states are used for picking up the name of an include file */ |
|
435 %x include_beg |
|
436 %x include_filename |
|
437 %x include_end |
|
438 |
|
439 |
|
440 file_include_pragma_filename [^\"]* |
|
441 file_include_pragma_beg "{#include"{st_whitespace_only}\" |
|
442 file_include_pragma_end \"{st_whitespace_only}"}" |
|
443 file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end} |
|
444 |
|
445 |
|
446 %{ |
|
447 #define MAX_INCLUDE_DEPTH 16 |
|
448 |
|
449 typedef struct { |
|
450 YY_BUFFER_STATE buffer_state; |
|
451 tracking_t* env; |
|
452 const char *filename; |
|
453 } include_stack_t; |
|
454 |
|
455 tracking_t* current_tracking; |
|
456 include_stack_t include_stack[MAX_INCLUDE_DEPTH]; |
|
457 int include_stack_ptr = 0; |
|
458 |
|
459 const char *INCLUDE_DIRECTORIES[] = { |
|
460 DEFAULT_LIBDIR, |
|
461 ".", |
|
462 "/lib", |
|
463 "/usr/lib", |
|
464 "/usr/lib/iec", |
|
465 NULL /* must end with NULL!! */ |
|
466 }; |
|
467 |
|
468 %} |
|
469 |
|
470 |
|
471 |
|
472 /*****************************/ |
|
473 /* Prelimenary constructs... */ |
|
474 /*****************************/ |
|
475 |
|
476 /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...), |
|
477 * especially the prototypes of Functions and FBs defined in the standard |
|
478 * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax |
|
479 * with two pragmas to indicate that the code is to be parsed (going through the |
|
480 * lexical, syntactical, and semantic analysers), but no code is to be generated. |
|
481 * |
|
482 * The accepted syntax is: |
|
483 * {no_code_generation begin} |
|
484 * ... prototypes ... |
|
485 * {no_code_generation end} |
|
486 * |
|
487 * When parsing these prototypes the abstract syntax tree will be populated as usual, |
|
488 * allowing the semantic analyser to correctly analyse the semantics of calls to these |
|
489 * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code |
|
490 * between the above two pragmas. |
|
491 */ |
|
492 |
|
493 disable_code_generation_pragma "{disable code generation}" |
|
494 enable_code_generation_pragma "{enable code generation}" |
|
495 |
|
496 |
|
497 /* Any other pragma... */ |
|
498 |
|
499 pragma "{"[^}]*"}" |
|
500 |
|
501 /* NOTE: this seemingly unnecessary complex definition is required |
|
502 * to be able to eat up comments such as: |
|
503 * '(* Testing... ! ***** ******)' |
|
504 * without using the trailing context command in flex (/{context}) |
|
505 * since {comment} itself will later be used with |
|
506 * trailing context ({comment}/{context}) |
|
507 */ |
|
508 not_asterisk [^*] |
|
509 not_close_parenthesis_nor_asterisk [^*)] |
|
510 asterisk "*" |
|
511 comment_text {not_asterisk}|(({asterisk}+){not_close_parenthesis_nor_asterisk}) |
|
512 |
|
513 comment "(*"({comment_text}*)({asterisk}+)")" |
|
514 |
|
515 |
|
516 /* |
|
517 3.1 Whitespace |
|
518 (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!) |
|
519 No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.). |
|
520 The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace. |
|
521 On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple3.1 Whitespace |
|
522 (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!) |
|
523 No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.). |
|
524 The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace. |
|
525 On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple lines, which means that the newline character is being considered as whitespace. |
|
526 Our implementation works around this issue by including the new line character in the whitespace while parsing function declarations and the ST language, and parsing it as the EOL token only while parsing IL language statements. This requires the use of a state machine in the lexical parser that needs at least some knowledge of the syntax itself. |
|
527 */ |
|
528 /* NOTE: Our definition of whitespace will only work in ASCII! |
|
529 * |
|
530 * Since the IL language needs to know the location of newline |
|
531 * (token EOL -> '\n' ), we need one definition of whitespace |
|
532 * for each language... |
|
533 */ |
|
534 /* |
|
535 * NOTE: we cannot use |
|
536 * st_whitespace [:space:]* |
|
537 * since we use {st_whitespace} as trailing context. In our case |
|
538 * this would not constitute "dangerous trailing context", but the |
|
539 * lexical generator (i.e. flex) does not know this (since it does |
|
540 * not know which characters belong to the set [:space:]), and will |
|
541 * generate a "dangerous trailing context" warning! |
|
542 * We use this alternative just to stop the flex utility from |
|
543 * generating the invalid (in this case) warning... |
|
544 */ |
|
545 |
|
546 st_whitespace_only [ \f\n\r\t\v]* |
|
547 il_whitespace_only [ \f\r\t\v]* |
|
548 |
|
549 st_whitespace_text {st_whitespace_only}|{comment}|{pragma} |
|
550 il_whitespace_text {il_whitespace_only}|{comment}|{pragma} |
|
551 |
|
552 st_whitespace {st_whitespace_text}* |
|
553 il_whitespace {il_whitespace_text}* |
|
554 |
|
555 st_whitespace_text_no_pragma {st_whitespace_only}|{comment} |
|
556 il_whitespace_text_no_pragma {il_whitespace_only}|{comment} |
|
557 |
|
558 st_whitespace_no_pragma {st_whitespace_text_no_pragma}* |
|
559 il_whitespace_no_pragma {il_whitespace_text_no_pragma}* |
|
560 |
|
561 qualified_identifier {identifier}(\.{identifier})* |
|
562 |
|
563 |
|
564 |
|
565 /*****************************************/ |
|
566 /* B.1.1 Letters, digits and identifiers */ |
|
567 /*****************************************/ |
|
568 /* NOTE: The following definitions only work if the host computer |
|
569 * is using the ASCII maping. For e.g., with EBCDIC [A-Z] |
|
570 * contains non-alphabetic characters! |
|
571 * The correct way of doing it would be to use |
|
572 * the [:upper:] etc... definitions. |
|
573 * |
|
574 * Unfortunately, further on we need all printable |
|
575 * characters (i.e. [:print:]), but excluding '$'. |
|
576 * Flex does not allow sets to be composed by excluding |
|
577 * elements. Sets may only be constructed by adding new |
|
578 * elements, which means that we have to revert to |
|
579 * [\x20\x21\x23\x25\x26\x28-x7E] for the definition |
|
580 * of the printable characters with the required exceptions. |
|
581 * The above also implies the use of ASCII, but now we have |
|
582 * no way to work around it| |
|
583 * |
|
584 * The conclusion is that our parser is limited to ASCII |
|
585 * based host computers!! |
|
586 */ |
|
587 letter [A-Za-z] |
|
588 digit [0-9] |
|
589 octal_digit [0-7] |
|
590 hex_digit {digit}|[A-F] |
|
591 identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*) |
|
592 |
|
593 /*******************/ |
|
594 /* B.1.2 Constants */ |
|
595 /*******************/ |
|
596 |
|
597 /******************************/ |
|
598 /* B.1.2.1 Numeric literals */ |
|
599 /******************************/ |
|
600 integer {digit}((_?{digit})*) |
|
601 binary_integer 2#{bit}((_?{bit})*) |
|
602 bit [0-1] |
|
603 octal_integer 8#{octal_digit}((_?{octal_digit})*) |
|
604 hex_integer 16#{hex_digit}((_?{hex_digit})*) |
|
605 exponent [Ee]([+-]?){integer} |
|
606 /* The correct definition for real would be: |
|
607 * real {integer}\.{integer}({exponent}?) |
|
608 * |
|
609 * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as: |
|
610 * fixed_point {integer}\.{integer} |
|
611 * |
|
612 * This means that {integer}\.{integer} could be interpreted |
|
613 * as either a fixed_point or a real. |
|
614 * I have opted to interpret {integer}\.{integer} as a fixed_point. |
|
615 * In order to do this, the definition of real has been changed to: |
|
616 * real {integer}\.{integer}{exponent} |
|
617 * |
|
618 * This means that the syntax parser now needs to define a real to be |
|
619 * either a real_token or a fixed_point_token! |
|
620 */ |
|
621 real {integer}\.{integer}{exponent} |
|
622 |
|
623 |
|
624 /*******************************/ |
|
625 /* B.1.2.2 Character Strings */ |
|
626 /*******************************/ |
|
627 /* |
|
628 common_character_representation := |
|
629 <any printable character except '$', '"' or "'"> |
|
630 |'$$' |
|
631 |'$L'|'$N'|'$P'|'$R'|'$T' |
|
632 |'$l'|'$n'|'$p'|'$r'|'$t' |
|
633 |
|
634 NOTE: $ = 0x24 |
|
635 " = 0x22 |
|
636 ' = 0x27 |
|
637 |
|
638 printable chars in ASCII: 0x20-0x7E |
|
639 */ |
|
640 |
|
641 esc_char_u $L|$N|$P|$R|$T |
|
642 esc_char_l $l|$n|$p|$r|$t |
|
643 esc_char $$|{esc_char_u}|{esc_char_l} |
|
644 double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit}) |
|
645 single_byte_char (${hex_digit}{hex_digit}) |
|
646 |
|
647 /* WARNING: |
|
648 * This definition is only valid in ASCII... |
|
649 * |
|
650 * Flex includes the function print_char() that defines |
|
651 * all printable characters portably (i.e. whatever character |
|
652 * encoding is currently being used , ASCII, EBCDIC, etc...) |
|
653 * Unfortunately, we cannot generate the definition of |
|
654 * common_character_representation portably, since flex |
|
655 * does not allow definition of sets by subtracting |
|
656 * elements in one set from another set. |
|
657 * This means we must build up the defintion of |
|
658 * common_character_representation using only set addition, |
|
659 * which leaves us with the only choice of defining the |
|
660 * characters non-portably... |
|
661 */ |
|
662 common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char} |
|
663 double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation} |
|
664 single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation} |
|
665 |
|
666 |
|
667 double_byte_character_string \"({double_byte_character_representation}*)\" |
|
668 single_byte_character_string '({single_byte_character_representation}*)' |
|
669 |
|
670 |
|
671 /************************/ |
|
672 /* B 1.2.3.1 - Duration */ |
|
673 /************************/ |
|
674 fixed_point {integer}\.{integer} |
|
675 |
|
676 fixed_point_d {fixed_point}d |
|
677 integer_d {integer}d |
|
678 |
|
679 fixed_point_h {fixed_point}h |
|
680 integer_h {integer}h |
|
681 |
|
682 fixed_point_m {fixed_point}m |
|
683 integer_m {integer}m |
|
684 |
|
685 fixed_point_s {fixed_point}s |
|
686 integer_s {integer}s |
|
687 |
|
688 fixed_point_ms {fixed_point}ms |
|
689 integer_ms {integer}ms |
|
690 |
|
691 |
|
692 /********************************************/ |
|
693 /* B.1.4.1 Directly Represented Variables */ |
|
694 /********************************************/ |
|
695 /* The correct definition, if the standard were to be followed... */ |
|
696 |
|
697 location_prefix [IQM] |
|
698 size_prefix [XBWDL] |
|
699 direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*) |
|
700 |
|
701 |
|
702 /* For the MatPLC, we will accept %<identifier> |
|
703 * as a direct variable, this being mapped onto the MatPLC point |
|
704 * named <identifier> |
|
705 */ |
|
706 /* TODO: we should not restrict it to only the accepted syntax |
|
707 * of <identifier> as specified by the standard. MatPLC point names |
|
708 * have a more permissive syntax. |
|
709 * |
|
710 * e.g. "P__234" |
|
711 * Is a valid MatPLC point name, but not a valid <identifier> !! |
|
712 * The same happens with names such as "333", "349+23", etc... |
|
713 * How can we handle these more expressive names in our case? |
|
714 * Remember that some direct variable may remain anonymous, with |
|
715 * declarations such as: |
|
716 * VAR |
|
717 * AT %I3 : BYTE := 255; |
|
718 * END_VAR |
|
719 * in which case we are currently using "%I3" as the variable |
|
720 * name. |
|
721 */ |
|
722 direct_variable_matplc %{identifier} |
|
723 |
|
724 direct_variable {direct_variable_standard}|{direct_variable_matplc} |
|
725 |
|
726 /******************************************/ |
|
727 /* B 1.4.3 - Declaration & Initialisation */ |
|
728 /******************************************/ |
|
729 incompl_location %[IQM]\* |
|
730 |
|
731 |
|
732 |
|
733 |
|
734 %% |
|
735 /* fprintf(stderr, "flex: state %d\n", YY_START); */ |
|
736 |
|
737 /*****************************************************/ |
|
738 /*****************************************************/ |
|
739 /*****************************************************/ |
|
740 /***** *****/ |
|
741 /***** *****/ |
|
742 /***** F I R S T T H I N G S F I R S T *****/ |
|
743 /***** *****/ |
|
744 /***** *****/ |
|
745 /*****************************************************/ |
|
746 /*****************************************************/ |
|
747 /*****************************************************/ |
|
748 |
|
749 /***********************************************************/ |
|
750 /* Handle requests sent by bison for flex to change state. */ |
|
751 /***********************************************************/ |
|
752 if (get_goto_body_state()) { |
|
753 yy_push_state(body_state); |
|
754 rst_goto_body_state(); |
|
755 } |
|
756 |
|
757 if (get_goto_sfc_qualifier_state()) { |
|
758 yy_push_state(sfc_qualifier_state); |
|
759 rst_goto_sfc_qualifier_state(); |
|
760 } |
|
761 |
|
762 if (get_goto_sfc_priority_state()) { |
|
763 yy_push_state(sfc_priority_state); |
|
764 rst_goto_sfc_priority_state(); |
|
765 } |
|
766 |
|
767 if (get_goto_task_init_state()) { |
|
768 yy_push_state(task_init_state); |
|
769 rst_goto_task_init_state(); |
|
770 } |
|
771 |
|
772 if (get_pop_state()) { |
|
773 yy_pop_state(); |
|
774 rst_pop_state(); |
|
775 } |
|
776 |
|
777 /***************************/ |
|
778 /* Handle the pragmas! */ |
|
779 /***************************/ |
|
780 |
|
781 /* We start off by searching for the pragmas we handle in the lexical parser. */ |
|
782 <INITIAL>{file_include_pragma} unput_text(0); yy_push_state(include_beg); |
|
783 |
|
784 /* Pragmas sent to syntax analyser (bison) */ |
|
785 {disable_code_generation_pragma} return disable_code_generation_pragma_token; |
|
786 {enable_code_generation_pragma} return enable_code_generation_pragma_token; |
|
787 <body_state>{disable_code_generation_pragma} return disable_code_generation_pragma_token; |
|
788 <body_state>{enable_code_generation_pragma} return enable_code_generation_pragma_token; |
|
789 |
|
790 /* Any other pragma we find, we just pass it up to the syntax parser... */ |
|
791 /* Note that the <body_state> state is exclusive, so we have to include it here too. */ |
|
792 {pragma} {/* return the pragmma without the enclosing '{' and '}' */ |
|
793 yytext[strlen(yytext)-1] = '\0'; |
|
794 yylval.ID=strdup(yytext+1); |
|
795 return pragma_token; |
|
796 } |
|
797 <body_state>{pragma} {/* return the pragmma without the enclosing '{' and '}' */ |
|
798 yytext[strlen(yytext)-1] = '\0'; |
|
799 yylval.ID=strdup(yytext+1); |
|
800 return pragma_token; |
|
801 } |
|
802 |
|
803 |
|
804 /*********************************/ |
|
805 /* Handle the file includes! */ |
|
806 /*********************************/ |
|
807 <include_beg>{file_include_pragma_beg} BEGIN(include_filename); |
|
808 |
|
809 <include_filename>{file_include_pragma_filename} { |
|
810 /* got the include file name */ |
|
811 int i; |
|
812 |
|
813 if (include_stack_ptr >= MAX_INCLUDE_DEPTH) { |
|
814 fprintf(stderr, "Includes nested too deeply\n"); |
|
815 exit( 1 ); |
|
816 } |
|
817 include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER; |
|
818 include_stack[include_stack_ptr].env = current_tracking; |
|
819 include_stack[include_stack_ptr].filename = current_filename; |
|
820 |
|
821 for (i = 0, yyin = NULL; (INCLUDE_DIRECTORIES[i] != NULL) && (yyin == NULL); i++) { |
|
822 char *full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", yytext); |
|
823 if (full_name == NULL) { |
|
824 fprintf(stderr, "Out of memory!\n"); |
|
825 exit( 1 ); |
|
826 } |
|
827 yyin = fopen(full_name, "r"); |
|
828 free(full_name); |
|
829 } |
|
830 |
|
831 if (!yyin) { |
|
832 fprintf(stderr, "Error opening included file %s\n", yytext); |
|
833 exit( 1 ); |
|
834 } |
|
835 |
|
836 current_filename = strdup(yytext); |
|
837 current_tracking = GetNewTracking(yyin); |
|
838 include_stack_ptr++; |
|
839 |
|
840 /* switch input buffer to new file... */ |
|
841 yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE)); |
|
842 /* switch to whatever state was active before the include file */ |
|
843 yy_pop_state(); |
|
844 /* now process the new file... */ |
|
845 } |
|
846 |
|
847 |
|
848 <<EOF>> { /* NOTE: We must not change the value of include_stack_ptr |
|
849 * just yet. We must only decrement it if we are NOT |
|
850 * at the end of the main file. |
|
851 * If we have finished parsing the main file, then we |
|
852 * must leave include_stack_ptr at 0, in case the |
|
853 * parser is called once again with a new file. |
|
854 * (In fact, we currently do just that!) |
|
855 */ |
|
856 free(current_tracking); |
|
857 if (include_stack_ptr == 0) { |
|
858 /* yyterminate() terminates the scanner and returns a 0 to the |
|
859 * scanner's caller, indicating "all done". |
|
860 * |
|
861 * Our syntax parser (written with bison) has the token |
|
862 * END_OF_INPUT associated to the value 0, so even though |
|
863 * we don't explicitly return the token END_OF_INPUT |
|
864 * calling yyterminate() is equivalent to doing that. |
|
865 */ |
|
866 yyterminate(); |
|
867 } |
|
868 else { |
|
869 --include_stack_ptr; |
|
870 yy_delete_buffer(YY_CURRENT_BUFFER); |
|
871 yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state); |
|
872 current_tracking = include_stack[include_stack_ptr].env; |
|
873 /* removing constness of char *. This is safe actually, |
|
874 * since the only real const char * that is stored on the stack is |
|
875 * the first one (i.e. the one that gets stored in include_stack[0], |
|
876 * which is never free'd! |
|
877 */ |
|
878 /* NOTE: We do __NOT__ free the malloc()'d memory since |
|
879 * pointers to this filename will be kept by many objects |
|
880 * in the abstract syntax tree. |
|
881 * This will later be used to provide correct error |
|
882 * messages during semantic analysis (stage 3) |
|
883 */ |
|
884 /* free((char *)current_filename); */ |
|
885 current_filename = include_stack[include_stack_ptr].filename; |
|
886 yy_push_state(include_end); |
|
887 } |
|
888 } |
|
889 |
|
890 <include_end>{file_include_pragma_end} yy_pop_state(); |
|
891 |
|
892 |
|
893 /*********************************/ |
|
894 /* Handle all the state changes! */ |
|
895 /*********************************/ |
|
896 |
|
897 /* INITIAL -> decl_state */ |
|
898 <INITIAL>{ |
|
899 /* NOTE: how about functions that do not declare variables, and go directly to the body_state??? |
|
900 * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION |
|
901 * must have at least one input argument, so a correct declaration will have at least |
|
902 * one VAR_INPUT ... VAR_END construct! |
|
903 * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK |
|
904 * must have at least one input argument, so a correct declaration will have at least |
|
905 * one VAR_INPUT ... VAR_END construct! |
|
906 * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input |
|
907 * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END |
|
908 * construct! |
|
909 * |
|
910 * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or |
|
911 * FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state. |
|
912 * If the code has an error, and no VAR_END before the body, we will simply |
|
913 * continue in the <decl_state> state, untill the end of the FUNCTION, FUNCTION_BLOCK |
|
914 * or PROGAM. |
|
915 */ |
|
916 FUNCTION BEGIN(decl_state); return FUNCTION; |
|
917 FUNCTION_BLOCK BEGIN(decl_state); return FUNCTION_BLOCK; |
|
918 PROGRAM BEGIN(decl_state); return PROGRAM; |
|
919 CONFIGURATION BEGIN(config_state); return CONFIGURATION; |
|
920 } |
|
921 |
|
922 /* INITIAL -> body_state */ |
|
923 /* required if the function, program, etc.. has no VAR block! */ |
|
924 /* We comment it out since the standard does not allow this. */ |
|
925 /* NOTE: Even if we were to include the following code, it */ |
|
926 /* would have no effect whatsoever since the above */ |
|
927 /* rules will take precendence! */ |
|
928 /* |
|
929 <INITIAL>{ |
|
930 FUNCTION BEGIN(body_state); return FUNCTION; |
|
931 FUNCTION_BLOCK BEGIN(body_state); return FUNCTION_BLOCK; |
|
932 PROGRAM BEGIN(body_state); return PROGRAM; |
|
933 } |
|
934 */ |
|
935 |
|
936 /* decl_state -> (body_state | sfc_state) */ |
|
937 <decl_state>{ |
|
938 END_VAR{st_whitespace}VAR {unput_text(strlen("END_VAR")); |
|
939 return END_VAR; |
|
940 } |
|
941 END_VAR{st_whitespace}INITIAL_STEP {unput_text(strlen("END_VAR")); |
|
942 yy_push_state(sfc_state); |
|
943 return END_VAR; |
|
944 } |
|
945 END_VAR{st_whitespace} {unput_text(strlen("END_VAR")); |
|
946 cmd_goto_body_state(); |
|
947 return END_VAR; |
|
948 } |
|
949 } |
|
950 |
|
951 /* body_state -> (il_state | st_state) */ |
|
952 <body_state>{ |
|
953 {st_whitespace_no_pragma} /* Eat any whitespace */ |
|
954 {qualified_identifier}{st_whitespace}":=" unput_text(0); BEGIN(st_state); |
|
955 {direct_variable_standard}{st_whitespace}":=" unput_text(0); BEGIN(st_state); |
|
956 {qualified_identifier}"[" unput_text(0); BEGIN(st_state); |
|
957 |
|
958 RETURN unput_text(0); BEGIN(st_state); |
|
959 IF unput_text(0); BEGIN(st_state); |
|
960 CASE unput_text(0); BEGIN(st_state); |
|
961 FOR unput_text(0); BEGIN(st_state); |
|
962 WHILE unput_text(0); BEGIN(st_state); |
|
963 REPEAT unput_text(0); BEGIN(st_state); |
|
964 EXIT unput_text(0); BEGIN(st_state); |
|
965 |
|
966 /* ':=' occurs only in transitions, and not Function or FB bodies! */ |
|
967 := unput_text(0); BEGIN(st_state); |
|
968 |
|
969 /* Hopefully, the above rules (along with the last one), |
|
970 * used to distinguish ST from IL, are |
|
971 * enough to handle all ocurrences. However, if |
|
972 * there is some situation where the compiler is getting confused, |
|
973 * we add the following rule to detect 'label:' in IL code. This will |
|
974 * allow the user to insert a label right at the beginning (which |
|
975 * will probably not be used further by his code) simply as a way |
|
976 * to force the compiler to interpret his code as IL code. |
|
977 */ |
|
978 {identifier}{st_whitespace}":"{st_whitespace} unput_text(0); BEGIN(il_state); |
|
979 |
|
980 {identifier} {int token = get_identifier_token(yytext); |
|
981 if (token == prev_declared_fb_name_token) { |
|
982 /* the code has a call to a function block */ |
|
983 /* NOTE: if we ever decide to allow the user to use IL operator tokens |
|
984 * (LD, ST, ...) as identifiers for variable names (including |
|
985 * function block instances), then the above inference/conclusion |
|
986 * may be incorrect, and this condition may have to be changed! |
|
987 */ |
|
988 BEGIN(st_state); |
|
989 } else { |
|
990 BEGIN(il_state); |
|
991 } |
|
992 unput_text(0); |
|
993 } |
|
994 |
|
995 . unput_text(0); BEGIN(il_state); |
|
996 } /* end of body_state lexical parser */ |
|
997 |
|
998 /* (il_state | st_state) -> $previous_state (decl_state or sfc_state) */ |
|
999 <il_state,st_state>{ |
|
1000 END_FUNCTION yy_pop_state(); unput_text(0); |
|
1001 END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); |
|
1002 END_PROGRAM yy_pop_state(); unput_text(0); |
|
1003 END_TRANSITION yy_pop_state(); unput_text(0); |
|
1004 END_ACTION yy_pop_state(); unput_text(0); |
|
1005 } |
|
1006 |
|
1007 /* sfc_state -> INITIAL */ |
|
1008 <sfc_state>{ |
|
1009 END_FUNCTION yy_pop_state(); unput_text(0); |
|
1010 END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); |
|
1011 END_PROGRAM yy_pop_state(); unput_text(0); |
|
1012 } |
|
1013 |
|
1014 /* decl_state -> INITIAL */ |
|
1015 <decl_state>{ |
|
1016 END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; |
|
1017 END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; |
|
1018 END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; |
|
1019 } |
|
1020 /* config -> INITIAL */ |
|
1021 END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; |
|
1022 |
|
1023 |
|
1024 |
|
1025 /***************************************/ |
|
1026 /* Next is to to remove all whitespace */ |
|
1027 /***************************************/ |
|
1028 /* NOTE: pragmas are handled right at the beginning... */ |
|
1029 |
|
1030 <INITIAL,config_state,decl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace_no_pragma} /* Eat any whitespace */ |
|
1031 <il_state>{il_whitespace_no_pragma} /* Eat any whitespace */ |
|
1032 |
|
1033 |
|
1034 |
|
1035 /*****************************************/ |
|
1036 /* B.1.1 Letters, digits and identifiers */ |
|
1037 /*****************************************/ |
|
1038 /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens |
|
1039 * On the other hand, the spec does not define them as keywords, |
|
1040 * which means they may be re-used for variable names, etc...! |
|
1041 * The syntax parser already caters for the possibility of these |
|
1042 * tokens being used for variable names in their declarations. |
|
1043 * When they are declared, they will be added to the variable symbol table! |
|
1044 * Further appearances of these tokens must no longer be parsed |
|
1045 * as R1_tokens etc..., but rather as variable_name_tokens! |
|
1046 * |
|
1047 * That is why the first thing we do with identifiers, even before |
|
1048 * checking whether they may be a 'keyword', is to check whether |
|
1049 * they have been previously declared as a variable name, |
|
1050 * |
|
1051 * However, we have a dilema! Should we here also check for |
|
1052 * prev_declared_derived_function_name_token? |
|
1053 * If we do, then the 'MOD' default library function (defined in |
|
1054 * the standard) will always be returned as a function name, and |
|
1055 * it will therefore not be possible to use it as an operator as |
|
1056 * in the following ST expression 'X := Y MOD Z;' ! |
|
1057 * If we don't, then even it will not be possible to use 'MOD' |
|
1058 * as a funtion as in 'X := MOD(Y, Z);' |
|
1059 * We solve this by NOT testing for function names here, and |
|
1060 * handling this function and keyword clash in bison! |
|
1061 */ |
|
1062 /* |
|
1063 {identifier} {int token = get_identifier_token(yytext); |
|
1064 // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); |
|
1065 if ((token == prev_declared_variable_name_token) || |
|
1066 // (token == prev_declared_derived_function_name_token) || // DO NOT add this condition! |
|
1067 (token == prev_declared_fb_name_token)) { |
|
1068 // if (token != identifier_token) |
|
1069 // * NOTE: if we replace the above uncommented conditions with |
|
1070 * the simple test of (token != identifier_token), then |
|
1071 * 'MOD' et al must be removed from the |
|
1072 * library_symbol_table as a default function name! |
|
1073 * // |
|
1074 yylval.ID=strdup(yytext); |
|
1075 // fprintf(stderr, "returning token %d\n", token); |
|
1076 return token; |
|
1077 } |
|
1078 // otherwise, leave it for the other lexical parser rules... |
|
1079 // fprintf(stderr, "rejecting\n"); |
|
1080 REJECT; |
|
1081 } |
|
1082 */ |
|
1083 |
|
1084 /******************************************************/ |
|
1085 /******************************************************/ |
|
1086 /******************************************************/ |
|
1087 /***** *****/ |
|
1088 /***** *****/ |
|
1089 /***** N O W D O T H E K E Y W O R D S *****/ |
|
1090 /***** *****/ |
|
1091 /***** *****/ |
|
1092 /******************************************************/ |
|
1093 /******************************************************/ |
|
1094 /******************************************************/ |
|
1095 |
|
1096 |
|
1097 EN return EN; /* Keyword */ |
|
1098 ENO return ENO; /* Keyword */ |
|
1099 |
|
1100 |
|
1101 /******************************/ |
|
1102 /* B 1.2.1 - Numeric Literals */ |
|
1103 /******************************/ |
|
1104 TRUE return TRUE; /* Keyword */ |
|
1105 BOOL#1 return boolean_true_literal_token; |
|
1106 BOOL#TRUE return boolean_true_literal_token; |
|
1107 SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ |
|
1108 SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ |
|
1109 |
|
1110 FALSE return FALSE; /* Keyword */ |
|
1111 BOOL#0 return boolean_false_literal_token; |
|
1112 BOOL#FALSE return boolean_false_literal_token; |
|
1113 SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ |
|
1114 SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ |
|
1115 |
|
1116 |
|
1117 /************************/ |
|
1118 /* B 1.2.3.1 - Duration */ |
|
1119 /************************/ |
|
1120 t# return T_SHARP; /* Delimiter */ |
|
1121 T# return T_SHARP; /* Delimiter */ |
|
1122 TIME return TIME; /* Keyword (Data Type) */ |
|
1123 |
|
1124 |
|
1125 /************************************/ |
|
1126 /* B 1.2.3.2 - Time of day and Date */ |
|
1127 /************************************/ |
|
1128 TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ |
|
1129 TOD return TIME_OF_DAY; /* Keyword (Data Type) */ |
|
1130 DATE return DATE; /* Keyword (Data Type) */ |
|
1131 d# return D_SHARP; /* Delimiter */ |
|
1132 D# return D_SHARP; /* Delimiter */ |
|
1133 DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ |
|
1134 DT return DATE_AND_TIME; /* Keyword (Data Type) */ |
|
1135 |
|
1136 |
|
1137 /***********************************/ |
|
1138 /* B 1.3.1 - Elementary Data Types */ |
|
1139 /***********************************/ |
|
1140 BOOL return BOOL; /* Keyword (Data Type) */ |
|
1141 |
|
1142 BYTE return BYTE; /* Keyword (Data Type) */ |
|
1143 WORD return WORD; /* Keyword (Data Type) */ |
|
1144 DWORD return DWORD; /* Keyword (Data Type) */ |
|
1145 LWORD return LWORD; /* Keyword (Data Type) */ |
|
1146 |
|
1147 SINT return SINT; /* Keyword (Data Type) */ |
|
1148 INT return INT; /* Keyword (Data Type) */ |
|
1149 DINT return DINT; /* Keyword (Data Type) */ |
|
1150 LINT return LINT; /* Keyword (Data Type) */ |
|
1151 |
|
1152 USINT return USINT; /* Keyword (Data Type) */ |
|
1153 UINT return UINT; /* Keyword (Data Type) */ |
|
1154 UDINT return UDINT; /* Keyword (Data Type) */ |
|
1155 ULINT return ULINT; /* Keyword (Data Type) */ |
|
1156 |
|
1157 REAL return REAL; /* Keyword (Data Type) */ |
|
1158 LREAL return LREAL; /* Keyword (Data Type) */ |
|
1159 |
|
1160 WSTRING return WSTRING; /* Keyword (Data Type) */ |
|
1161 STRING return STRING; /* Keyword (Data Type) */ |
|
1162 |
|
1163 TIME return TIME; /* Keyword (Data Type) */ |
|
1164 DATE return DATE; /* Keyword (Data Type) */ |
|
1165 DT return DT; /* Keyword (Data Type) */ |
|
1166 TOD return TOD; /* Keyword (Data Type) */ |
|
1167 DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ |
|
1168 TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ |
|
1169 |
|
1170 /*****************************************************************/ |
|
1171 /* Keywords defined in "Safety Software Technical Specification" */ |
|
1172 /*****************************************************************/ |
|
1173 /* |
|
1174 * NOTE: The following keywords are define in |
|
1175 * "Safety Software Technical Specification, |
|
1176 * Part 1: Concepts and Function Blocks, |
|
1177 * Version 1.0 – Official Release" |
|
1178 * written by PLCopen - Technical Committee 5 |
|
1179 * |
|
1180 * We only support these extensions and keywords |
|
1181 * if the apropriate command line option is given. |
|
1182 */ |
|
1183 SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}} |
|
1184 |
|
1185 SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}} |
|
1186 SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}} |
|
1187 SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}} |
|
1188 SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}} |
|
1189 |
|
1190 SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} |
|
1191 SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}} |
|
1192 |
|
1193 SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} |
|
1194 SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}} |
|
1195 SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}} |
|
1196 SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}} |
|
1197 |
|
1198 SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}} |
|
1199 SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}} |
|
1200 SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}} |
|
1201 SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}} |
|
1202 |
|
1203 /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */ |
|
1204 /* so it is best not to support them at all... */ |
|
1205 /* |
|
1206 SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}} |
|
1207 SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}} |
|
1208 */ |
|
1209 |
|
1210 SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}} |
|
1211 SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}} |
|
1212 SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}} |
|
1213 SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}} |
|
1214 SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}} |
|
1215 SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}} |
|
1216 |
|
1217 /********************************/ |
|
1218 /* B 1.3.2 - Generic data types */ |
|
1219 /********************************/ |
|
1220 /* Strangely, the following symbols do not seem to be required! */ |
|
1221 /* But we include them so they become reserved words, and do not |
|
1222 * get passed up to bison as an identifier... |
|
1223 */ |
|
1224 ANY return ANY; /* Keyword (Data Type) */ |
|
1225 ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */ |
|
1226 ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */ |
|
1227 ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */ |
|
1228 ANY_NUM return ANY_NUM; /* Keyword (Data Type) */ |
|
1229 ANY_REAL return ANY_REAL; /* Keyword (Data Type) */ |
|
1230 ANY_INT return ANY_INT; /* Keyword (Data Type) */ |
|
1231 ANY_BIT return ANY_BIT; /* Keyword (Data Type) */ |
|
1232 ANY_STRING return ANY_STRING; /* Keyword (Data Type) */ |
|
1233 ANY_DATE return ANY_DATE; /* Keyword (Data Type) */ |
|
1234 |
|
1235 |
|
1236 /********************************/ |
|
1237 /* B 1.3.3 - Derived data types */ |
|
1238 /********************************/ |
|
1239 ":=" return ASSIGN; /* Delimiter */ |
|
1240 ".." return DOTDOT; /* Delimiter */ |
|
1241 TYPE return TYPE; /* Keyword */ |
|
1242 END_TYPE return END_TYPE; /* Keyword */ |
|
1243 ARRAY return ARRAY; /* Keyword */ |
|
1244 OF return OF; /* Keyword */ |
|
1245 STRUCT return STRUCT; /* Keyword */ |
|
1246 END_STRUCT return END_STRUCT; /* Keyword */ |
|
1247 |
|
1248 |
|
1249 /*********************/ |
|
1250 /* B 1.4 - Variables */ |
|
1251 /*********************/ |
|
1252 |
|
1253 /******************************************/ |
|
1254 /* B 1.4.3 - Declaration & Initialisation */ |
|
1255 /******************************************/ |
|
1256 VAR_INPUT return VAR_INPUT; /* Keyword */ |
|
1257 VAR_OUTPUT return VAR_OUTPUT; /* Keyword */ |
|
1258 VAR_IN_OUT return VAR_IN_OUT; /* Keyword */ |
|
1259 VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */ |
|
1260 VAR_GLOBAL return VAR_GLOBAL; /* Keyword */ |
|
1261 END_VAR return END_VAR; /* Keyword */ |
|
1262 RETAIN return RETAIN; /* Keyword */ |
|
1263 NON_RETAIN return NON_RETAIN; /* Keyword */ |
|
1264 R_EDGE return R_EDGE; /* Keyword */ |
|
1265 F_EDGE return F_EDGE; /* Keyword */ |
|
1266 AT return AT; /* Keyword */ |
|
1267 |
|
1268 |
|
1269 /***********************/ |
|
1270 /* B 1.5.1 - Functions */ |
|
1271 /***********************/ |
|
1272 FUNCTION return FUNCTION; /* Keyword */ |
|
1273 END_FUNCTION return END_FUNCTION; /* Keyword */ |
|
1274 VAR return VAR; /* Keyword */ |
|
1275 CONSTANT return CONSTANT; /* Keyword */ |
|
1276 |
|
1277 |
|
1278 /*****************************/ |
|
1279 /* B 1.5.2 - Function Blocks */ |
|
1280 /*****************************/ |
|
1281 FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */ |
|
1282 END_FUNCTION_BLOCK return END_FUNCTION_BLOCK; /* Keyword */ |
|
1283 VAR_TEMP return VAR_TEMP; /* Keyword */ |
|
1284 VAR return VAR; /* Keyword */ |
|
1285 NON_RETAIN return NON_RETAIN; /* Keyword */ |
|
1286 END_VAR return END_VAR; /* Keyword */ |
|
1287 |
|
1288 |
|
1289 /**********************/ |
|
1290 /* B 1.5.3 - Programs */ |
|
1291 /**********************/ |
|
1292 PROGRAM return PROGRAM; /* Keyword */ |
|
1293 END_PROGRAM return END_PROGRAM; /* Keyword */ |
|
1294 |
|
1295 |
|
1296 /********************************************/ |
|
1297 /* B 1.6 Sequential Function Chart elements */ |
|
1298 /********************************************/ |
|
1299 /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well |
|
1300 .* as other identifiers that may be used as variable names inside IL and ST programs. |
|
1301 * They will have to be handled when we include parsing of SFC... For now, simply |
|
1302 * ignore them! |
|
1303 */ |
|
1304 |
|
1305 ACTION return ACTION; /* Keyword */ |
|
1306 END_ACTION return END_ACTION; /* Keyword */ |
|
1307 |
|
1308 TRANSITION return TRANSITION; /* Keyword */ |
|
1309 END_TRANSITION return END_TRANSITION; /* Keyword */ |
|
1310 FROM return FROM; /* Keyword */ |
|
1311 TO return TO; /* Keyword */ |
|
1312 |
|
1313 INITIAL_STEP return INITIAL_STEP; /* Keyword */ |
|
1314 STEP return STEP; /* Keyword */ |
|
1315 END_STEP return END_STEP; /* Keyword */ |
|
1316 |
|
1317 /* PRIORITY is not a keyword, so we only return it when |
|
1318 * it is explicitly required and we are not expecting any identifiers |
|
1319 * that could also use the same letter sequence (i.e. an identifier: piority) |
|
1320 */ |
|
1321 <sfc_priority_state>PRIORITY return PRIORITY; |
|
1322 |
|
1323 <sfc_qualifier_state>{ |
|
1324 L return L; |
|
1325 D return D; |
|
1326 SD return SD; |
|
1327 DS return DS; |
|
1328 SL return SL; |
|
1329 N return N; |
|
1330 P return P; |
|
1331 R return R; |
|
1332 S return S; |
|
1333 } |
|
1334 |
|
1335 |
|
1336 /********************************/ |
|
1337 /* B 1.7 Configuration elements */ |
|
1338 /********************************/ |
|
1339 CONFIGURATION return CONFIGURATION; /* Keyword */ |
|
1340 END_CONFIGURATION return END_CONFIGURATION; /* Keyword */ |
|
1341 TASK return TASK; /* Keyword */ |
|
1342 RESOURCE return RESOURCE; /* Keyword */ |
|
1343 ON return ON; /* Keyword */ |
|
1344 END_RESOURCE return END_RESOURCE; /* Keyword */ |
|
1345 VAR_CONFIG return VAR_CONFIG; /* Keyword */ |
|
1346 VAR_ACCESS return VAR_ACCESS; /* Keyword */ |
|
1347 END_VAR return END_VAR; /* Keyword */ |
|
1348 WITH return WITH; /* Keyword */ |
|
1349 PROGRAM return PROGRAM; /* Keyword */ |
|
1350 RETAIN return RETAIN; /* Keyword */ |
|
1351 NON_RETAIN return NON_RETAIN; /* Keyword */ |
|
1352 READ_WRITE return READ_WRITE; /* Keyword */ |
|
1353 READ_ONLY return READ_ONLY; /* Keyword */ |
|
1354 |
|
1355 /* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when |
|
1356 * it is explicitly required and we are not expecting any identifiers |
|
1357 * that could also use the same letter sequence (i.e. an identifier: piority, ...) |
|
1358 */ |
|
1359 <task_init_state>{ |
|
1360 PRIORITY return PRIORITY; |
|
1361 SINGLE return SINGLE; |
|
1362 INTERVAL return INTERVAL; |
|
1363 } |
|
1364 |
|
1365 /***********************************/ |
|
1366 /* B 2.1 Instructions and Operands */ |
|
1367 /***********************************/ |
|
1368 <il_state>\n return EOL; |
|
1369 |
|
1370 |
|
1371 /*******************/ |
|
1372 /* B 2.2 Operators */ |
|
1373 /*******************/ |
|
1374 /* NOTE: we can't have flex return the same token for |
|
1375 * ANDN and &N, neither for AND and &, since |
|
1376 * AND and ANDN are considered valid variable |
|
1377 * function or functionblock type names! |
|
1378 * This means that the parser may decide that the |
|
1379 * AND or ANDN strings found in the source code |
|
1380 * are being used as variable names |
|
1381 * and not as operators, and will therefore transform |
|
1382 * these tokens into indentifier tokens! |
|
1383 * We can't have the parser thinking that the source |
|
1384 * code contained the string AND (which may be interpreted |
|
1385 * as a vairable name) when in reality the source code |
|
1386 * merely contained the character &, so we use two |
|
1387 * different tokens for & and AND (and similarly |
|
1388 * ANDN and &N)! |
|
1389 */ |
|
1390 /* The following tokens clash with ST expression operators and Standard Functions */ |
|
1391 /* They are also keywords! */ |
|
1392 AND return AND; /* Keyword */ |
|
1393 MOD return MOD; /* Keyword */ |
|
1394 OR return OR; /* Keyword */ |
|
1395 XOR return XOR; /* Keyword */ |
|
1396 NOT return NOT; /* Keyword */ |
|
1397 |
|
1398 /* The following tokens clash with Standard Functions */ |
|
1399 /* They are keywords because they are a function name */ |
|
1400 <il_state>{ |
|
1401 ADD return ADD; /* Keyword (Standard Function) */ |
|
1402 DIV return DIV; /* Keyword (Standard Function) */ |
|
1403 EQ return EQ; /* Keyword (Standard Function) */ |
|
1404 GE return GE; /* Keyword (Standard Function) */ |
|
1405 GT return GT; /* Keyword (Standard Function) */ |
|
1406 LE return LE; /* Keyword (Standard Function) */ |
|
1407 LT return LT; /* Keyword (Standard Function) */ |
|
1408 MUL return MUL; /* Keyword (Standard Function) */ |
|
1409 NE return NE; /* Keyword (Standard Function) */ |
|
1410 SUB return SUB; /* Keyword (Standard Function) */ |
|
1411 } |
|
1412 |
|
1413 /* The following tokens clash with SFC action qualifiers */ |
|
1414 /* They are not keywords! */ |
|
1415 <il_state>{ |
|
1416 S return S; |
|
1417 R return R; |
|
1418 } |
|
1419 |
|
1420 /* The following tokens clash with ST expression operators */ |
|
1421 & return AND2; /* NOT a Delimiter! */ |
|
1422 |
|
1423 /* The following tokens have no clashes */ |
|
1424 /* They are not keywords! */ |
|
1425 <il_state>{ |
|
1426 LD return LD; |
|
1427 LDN return LDN; |
|
1428 ST return ST; |
|
1429 STN return STN; |
|
1430 S1 return S1; |
|
1431 R1 return R1; |
|
1432 CLK return CLK; |
|
1433 CU return CU; |
|
1434 CD return CD; |
|
1435 PV return PV; |
|
1436 IN return IN; |
|
1437 PT return PT; |
|
1438 ANDN return ANDN; |
|
1439 &N return ANDN2; |
|
1440 ORN return ORN; |
|
1441 XORN return XORN; |
|
1442 CAL return CAL; |
|
1443 CALC return CALC; |
|
1444 CALCN return CALCN; |
|
1445 RET return RET; |
|
1446 RETC return RETC; |
|
1447 RETCN return RETCN; |
|
1448 JMP return JMP; |
|
1449 JMPC return JMPC; |
|
1450 JMPCN return JMPCN; |
|
1451 } |
|
1452 |
|
1453 /***********************/ |
|
1454 /* B 3.1 - Expressions */ |
|
1455 /***********************/ |
|
1456 "**" return OPER_EXP; /* NOT a Delimiter! */ |
|
1457 "<>" return OPER_NE; /* NOT a Delimiter! */ |
|
1458 ">=" return OPER_GE; /* NOT a Delimiter! */ |
|
1459 "<=" return OPER_LE; /* NOT a Delimiter! */ |
|
1460 & return AND2; /* NOT a Delimiter! */ |
|
1461 AND return AND; /* Keyword */ |
|
1462 XOR return XOR; /* Keyword */ |
|
1463 OR return OR; /* Keyword */ |
|
1464 NOT return NOT; /* Keyword */ |
|
1465 MOD return MOD; /* Keyword */ |
|
1466 |
|
1467 |
|
1468 /*****************************************/ |
|
1469 /* B 3.2.2 Subprogram Control Statements */ |
|
1470 /*****************************************/ |
|
1471 := return ASSIGN; /* Delimiter */ |
|
1472 => return SENDTO; /* Delimiter */ |
|
1473 RETURN return RETURN; /* Keyword */ |
|
1474 |
|
1475 |
|
1476 /********************************/ |
|
1477 /* B 3.2.3 Selection Statements */ |
|
1478 /********************************/ |
|
1479 IF return IF; /* Keyword */ |
|
1480 THEN return THEN; /* Keyword */ |
|
1481 ELSIF return ELSIF; /* Keyword */ |
|
1482 ELSE return ELSE; /* Keyword */ |
|
1483 END_IF return END_IF; /* Keyword */ |
|
1484 |
|
1485 CASE return CASE; /* Keyword */ |
|
1486 OF return OF; /* Keyword */ |
|
1487 ELSE return ELSE; /* Keyword */ |
|
1488 END_CASE return END_CASE; /* Keyword */ |
|
1489 |
|
1490 |
|
1491 /********************************/ |
|
1492 /* B 3.2.4 Iteration Statements */ |
|
1493 /********************************/ |
|
1494 FOR return FOR; /* Keyword */ |
|
1495 TO return TO; /* Keyword */ |
|
1496 BY return BY; /* Keyword */ |
|
1497 DO return DO; /* Keyword */ |
|
1498 END_FOR return END_FOR; /* Keyword */ |
|
1499 |
|
1500 WHILE return WHILE; /* Keyword */ |
|
1501 DO return DO; /* Keyword */ |
|
1502 END_WHILE return END_WHILE; /* Keyword */ |
|
1503 |
|
1504 REPEAT return REPEAT; /* Keyword */ |
|
1505 UNTIL return UNTIL; /* Keyword */ |
|
1506 END_REPEAT return END_REPEAT; /* Keyword */ |
|
1507 |
|
1508 EXIT return EXIT; /* Keyword */ |
|
1509 |
|
1510 |
|
1511 |
|
1512 |
|
1513 |
|
1514 |
|
1515 /********************************************************/ |
|
1516 /********************************************************/ |
|
1517 /********************************************************/ |
|
1518 /***** *****/ |
|
1519 /***** *****/ |
|
1520 /***** N O W W O R K W I T H V A L U E S *****/ |
|
1521 /***** *****/ |
|
1522 /***** *****/ |
|
1523 /********************************************************/ |
|
1524 /********************************************************/ |
|
1525 /********************************************************/ |
|
1526 |
|
1527 |
|
1528 /********************************************/ |
|
1529 /* B.1.4.1 Directly Represented Variables */ |
|
1530 /********************************************/ |
|
1531 {direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);} |
|
1532 |
|
1533 |
|
1534 /******************************************/ |
|
1535 /* B 1.4.3 - Declaration & Initialisation */ |
|
1536 /******************************************/ |
|
1537 {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;} |
|
1538 |
|
1539 |
|
1540 /************************/ |
|
1541 /* B 1.2.3.1 - Duration */ |
|
1542 /************************/ |
|
1543 {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;} |
|
1544 |
|
1545 {fixed_point_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} |
|
1546 {integer_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} |
|
1547 |
|
1548 {fixed_point_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} |
|
1549 {integer_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} |
|
1550 |
|
1551 {fixed_point_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} |
|
1552 {integer_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} |
|
1553 |
|
1554 {fixed_point_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} |
|
1555 {integer_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} |
|
1556 |
|
1557 {fixed_point_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} |
|
1558 {integer_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} |
|
1559 |
|
1560 |
|
1561 /*******************************/ |
|
1562 /* B.1.2.2 Character Strings */ |
|
1563 /*******************************/ |
|
1564 {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;} |
|
1565 {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;} |
|
1566 |
|
1567 |
|
1568 /******************************/ |
|
1569 /* B.1.2.1 Numeric literals */ |
|
1570 /******************************/ |
|
1571 {integer} {yylval.ID=strdup(yytext); return integer_token;} |
|
1572 {real} {yylval.ID=strdup(yytext); return real_token;} |
|
1573 {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;} |
|
1574 {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;} |
|
1575 {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;} |
|
1576 |
|
1577 |
|
1578 /*****************************************/ |
|
1579 /* B.1.1 Letters, digits and identifiers */ |
|
1580 /*****************************************/ |
|
1581 <st_state>{identifier}/({st_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} |
|
1582 <il_state>{identifier}/({il_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} |
|
1583 {identifier} {yylval.ID=strdup(yytext); |
|
1584 // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext)); |
|
1585 return get_identifier_token(yytext);} |
|
1586 |
|
1587 |
|
1588 |
|
1589 |
|
1590 |
|
1591 |
|
1592 /************************************************/ |
|
1593 /************************************************/ |
|
1594 /************************************************/ |
|
1595 /***** *****/ |
|
1596 /***** *****/ |
|
1597 /***** T H E L E F T O V E R S . . . *****/ |
|
1598 /***** *****/ |
|
1599 /***** *****/ |
|
1600 /************************************************/ |
|
1601 /************************************************/ |
|
1602 /************************************************/ |
|
1603 |
|
1604 /* do the single character tokens... |
|
1605 * |
|
1606 * e.g.: ':' '(' ')' '+' '*' ... |
|
1607 */ |
|
1608 . {return yytext[0];} |
|
1609 |
|
1610 |
|
1611 %% |
|
1612 |
|
1613 |
|
1614 /***********************************/ |
|
1615 /* Utility function definitions... */ |
|
1616 /***********************************/ |
|
1617 |
|
1618 /* print the include file stack to stderr... */ |
|
1619 void print_include_stack(void) { |
|
1620 int i; |
|
1621 |
|
1622 if ((include_stack_ptr - 1) >= 0) |
|
1623 fprintf (stderr, "in file "); |
|
1624 for (i = include_stack_ptr - 1; i >= 0; i--) |
|
1625 fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber); |
|
1626 } |
|
1627 |
|
1628 |
|
1629 /* return all the text in the current token back to the input stream, except the first n chars. */ |
|
1630 void unput_text(unsigned int n) { |
|
1631 /* it seems that flex has a bug in that it will not correctly count the line numbers |
|
1632 * if we return newlines back to the input stream. These newlines will be re-counted |
|
1633 * a second time when they are processed again by flex. |
|
1634 * We therefore determine how many newlines are in the text we are returning, |
|
1635 * and decrement the line counter acordingly... |
|
1636 */ |
|
1637 /*unsigned int i; |
|
1638 |
|
1639 for (i = n; i < strlen(yytext); i++) |
|
1640 if (yytext[i] == '\n') |
|
1641 current_tracking->lineNumber--;*/ |
|
1642 |
|
1643 /* now return all the text back to the input stream... */ |
|
1644 yyless(n); |
|
1645 } |
|
1646 |
|
1647 |
|
1648 /* Called by flex when it reaches the end-of-file */ |
|
1649 int yywrap(void) |
|
1650 { |
|
1651 /* We reached the end of the input file... */ |
|
1652 |
|
1653 /* Should we continue with another file? */ |
|
1654 /* If so: |
|
1655 * open the new file... |
|
1656 * return 0; |
|
1657 */ |
|
1658 |
|
1659 /* to we stop processing... |
|
1660 * |
|
1661 * return 1; |
|
1662 */ |
|
1663 |
|
1664 |
|
1665 return 1; /* Stop scanning at end of input file. */ |
|
1666 } |
|
1667 |
|
1668 |
|
1669 |
|
1670 /*************************************/ |
|
1671 /* Include a main() function to test */ |
|
1672 /* the token parsing by flex.... */ |
|
1673 /*************************************/ |
|
1674 #ifdef TEST_MAIN |
|
1675 |
|
1676 #include "../util/symtable.hh" |
|
1677 |
|
1678 yystype yylval; |
|
1679 YYLTYPE yylloc; |
|
1680 |
|
1681 const char *current_filename; |
|
1682 |
|
1683 |
|
1684 |
|
1685 int get_identifier_token(const char *identifier_str) {return 0;} |
|
1686 int get_direct_variable_token(const char *direct_variable_str) {return 0;} |
|
1687 |
|
1688 |
|
1689 int main(int argc, char **argv) { |
|
1690 |
|
1691 FILE *in_file; |
|
1692 int res; |
|
1693 |
|
1694 if (argc == 1) { |
|
1695 /* Work as an interactive (command line) parser... */ |
|
1696 while((res=yylex())) |
|
1697 fprintf(stderr, "(line %d)token: %d\n", yylineno, res); |
|
1698 } else { |
|
1699 /* Work as non-interactive (file) parser... */ |
|
1700 if((in_file = fopen(argv[1], "r")) == NULL) { |
|
1701 char *errmsg = strdup2("Error opening main file ", argv[1]); |
|
1702 perror(errmsg); |
|
1703 free(errmsg); |
|
1704 return -1; |
|
1705 } |
|
1706 |
|
1707 /* parse the file... */ |
|
1708 yyin = in_file; |
|
1709 current_filename = argv[1]; |
|
1710 while(1) { |
|
1711 res=yylex(); |
|
1712 fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID); |
|
1713 } |
|
1714 } |
|
1715 |
|
1716 return 0; |
|
1717 |
|
1718 } |
|
1719 #endif |