stage1_2/iec_flex.ll
changeset 354 0f24db96b519
parent 350 2c3c4dc34979
child 415 5c115720149c
equal deleted inserted replaced
353:17bffb57a8c5 354:0f24db96b519
       
     1 /*
       
     2  *  matiec - a compiler for the programming languages defined in IEC 61131-3
       
     3  *
       
     4  *  Copyright (C) 2003-2011  Mario de Sousa (msousa@fe.up.pt)
       
     5  *
       
     6  *  This program is free software: you can redistribute it and/or modify
       
     7  *  it under the terms of the GNU General Public License as published by
       
     8  *  the Free Software Foundation, either version 3 of the License, or
       
     9  *  (at your option) any later version.
       
    10  *
       
    11  *  This program is distributed in the hope that it will be useful,
       
    12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       
    14  *  GNU General Public License for more details.
       
    15  *
       
    16  *  You should have received a copy of the GNU General Public License
       
    17  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
       
    18  *
       
    19  *
       
    20  * This code is made available on the understanding that it will not be
       
    21  * used in safety-critical situations without a full and competent review.
       
    22  */
       
    23 
       
    24 /*
       
    25  * An IEC 61131-3 compiler.
       
    26  *
       
    27  * Based on the
       
    28  * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
       
    29  *
       
    30  */
       
    31 
       
    32 /*
       
    33  * Stage 1
       
    34  * =======
       
    35  *
       
    36  * This file contains the lexical tokens definitions, from which
       
    37  * the flex utility will generate a lexical parser function.
       
    38  */
       
    39 
       
    40 
       
    41 
       
    42 
       
    43 /*****************************/
       
    44 /* Lexical Parser Options... */
       
    45 /*****************************/
       
    46 
       
    47 /* The lexical analyser will never work in interactive mode,
       
    48  * i.e., it will only process programs saved to files, and never
       
    49  * programs being written inter-actively by the user.
       
    50  * This option saves the resulting parser from calling the
       
    51  * isatty() function, that seems to be generating some compile
       
    52  * errors under some (older?) versions of flex.
       
    53  */
       
    54 %option never-interactive
       
    55 
       
    56 /* Have the lexical analyser use a 'char *yytext' instead of an
       
    57  * array of char 'char yytext[??]' to store the lexical token.
       
    58  */
       
    59 %pointer
       
    60 
       
    61 
       
    62 /* Have the lexical analyser ignore the case of letters.
       
    63  * This will occur for all the tokens and keywords, but
       
    64  * the resulting text handed up to the syntax parser
       
    65  * will not be changed, and keep the original case
       
    66  * of the letters in the input file.
       
    67  */
       
    68 %option case-insensitive
       
    69 
       
    70 /* Have the generated lexical analyser keep track of the
       
    71  * line number it is currently analysing.
       
    72  * This is used to pass up to the syntax parser
       
    73  * the number of the line on which the current
       
    74  * token was found. It will enable the syntax parser
       
    75  * to generate more informatve error messages...
       
    76  */
       
    77 %option yylineno
       
    78 
       
    79 /* required for the use of the yy_pop_state() and
       
    80  * yy_push_state() functions
       
    81  */
       
    82 %option stack
       
    83 
       
    84 /* The '%option stack' also requests the inclusion of 
       
    85  * the yy_top_state(), however this function is not
       
    86  * currently being used. This means that the compiler
       
    87  * is complaining about the existance of this function.
       
    88  * The following option removes the yy_top_state()
       
    89  * function from the resulting c code, so the compiler 
       
    90  * no longer complains.
       
    91  */
       
    92 %option noyy_top_state
       
    93 
       
    94 /* We will not be using unput() in our flex code... */
       
    95 %option nounput
       
    96 
       
    97 /**************************************************/
       
    98 /* External Variable and Function declarations... */
       
    99 /**************************************************/
       
   100 
       
   101 
       
   102 %{
       
   103 /* Define TEST_MAIN to include a main() function.
       
   104  * Useful for testing the parser generated by flex.
       
   105  */
       
   106 /*
       
   107 #define TEST_MAIN
       
   108 */
       
   109 /* If lexical parser is compiled by itself, we need to define the following
       
   110  * constant to some string. Under normal circumstances LIBDIRECTORY is set
       
   111  * in the syntax parser header file...
       
   112  */
       
   113 #ifdef TEST_MAIN
       
   114 #define DEFAULT_LIBDIR "just_testing"
       
   115 #endif
       
   116 
       
   117 
       
   118 
       
   119 /* Required for strdup() */
       
   120 #include <string.h>
       
   121 
       
   122 /* Required only for the declaration of abstract syntax classes
       
   123  * (class symbol_c; class token_c; class list_c;)
       
   124  * These will not be used in flex, but the token type union defined
       
   125  * in iec_bison.h contains pointers to these classes, so we must include
       
   126  * it here.
       
   127  */
       
   128 #include "../absyntax/absyntax.hh"
       
   129 
       
   130 
       
   131 /* iec_bison.h is generated by bison.
       
   132  * Contains the definition of the token constants, and the
       
   133  * token value type YYSTYPE (in our case, a 'const char *')
       
   134  */
       
   135 #include "iec_bison.h"
       
   136 #include "stage1_2_priv.hh"
       
   137 
       
   138 
       
   139 /* Variable defined by the bison parser,
       
   140  * where the value of the tokens will be stored
       
   141  */
       
   142 extern YYSTYPE yylval;
       
   143 
       
   144 /* The name of the file currently being parsed...
       
   145  * This variable is declared and read from the code generated by bison!
       
   146  * Note that flex accesses and updates this global variable
       
   147  * apropriately whenever it comes across an (*#include <filename> *)
       
   148  * directive...
       
   149  */
       
   150 /*
       
   151  NOTE: already defined in iec_bison.h 
       
   152 extern const char *current_filename;
       
   153 */
       
   154 
       
   155 
       
   156 /* We will not be using unput() in our flex code... */
       
   157 /* NOTE: it seems that this #define is no longer needed, It has been 
       
   158  * replaced by %option nounput.
       
   159  * Should we simply delete it?
       
   160  * For now leave it in, in case someone is using an old version of flex.
       
   161  * In any case, the most harm that can result in a warning message
       
   162  * when compiling iec.flex.c:
       
   163  * warning: ‘void yyunput(int, char*)’ defined but not used
       
   164  */
       
   165 #define YY_NO_UNPUT
       
   166 
       
   167 /* Variable defined by the bison parser.
       
   168  * It must be initialised with the location
       
   169  * of the token being parsed.
       
   170  * This is only needed if we want to keep
       
   171  * track of the locations, in order to give
       
   172  * more meaningful error messages!
       
   173  */
       
   174 extern YYLTYPE yylloc;
       
   175 
       
   176 #define YY_INPUT(buf,result,max_size)  {\
       
   177     result = GetNextChar(buf, max_size);\
       
   178     if (  result <= 0  )\
       
   179       result = YY_NULL;\
       
   180     }
       
   181 
       
   182 
       
   183 /* A counter to track the order by which each token is processed.
       
   184  * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
       
   185  *       i.e.. it may get incremented by more than one between two consecutive tokens.
       
   186  *       This is due to the fact that the counter gets incremented every 'user action' in flex,
       
   187  *       however not every user action will result in a token being passed to bison.
       
   188  *       Nevertheless this is still OK, as we are only interested in the relative
       
   189  *       ordering of tokens...
       
   190  */
       
   191 static long int current_order = 0;
       
   192 
       
   193 
       
   194 /* Macro that is executed for every action.
       
   195  * We use it to pass the location of the token
       
   196  * back to the bison parser...
       
   197  */
       
   198 #define YY_USER_ACTION {\
       
   199 	yylloc.first_line = current_tracking->lineNumber;			\
       
   200 	yylloc.first_column = current_tracking->currentTokenStart;		\
       
   201 	yylloc.first_file = current_filename;					\
       
   202 	yylloc.first_order = current_order;					\
       
   203 	yylloc.last_line = current_tracking->lineNumber;			\
       
   204 	yylloc.last_column = current_tracking->currentChar - 1;			\
       
   205 	yylloc.last_file = current_filename;					\
       
   206 	yylloc.last_order = current_order;					\
       
   207 	current_tracking->currentTokenStart = current_tracking->currentChar;	\
       
   208 	current_order++;							\
       
   209 	}
       
   210 
       
   211 
       
   212 /* Since this lexical parser we defined only works in ASCII based
       
   213  * systems, we might as well make sure it is being compiled on
       
   214  * one...
       
   215  * Lets check a few random characters...
       
   216  */
       
   217 #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
       
   218      ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
       
   219 #error This lexical analyser is not portable to a non ASCII based system.
       
   220 #endif
       
   221 
       
   222 
       
   223 /* Function only called from within flex, but defined
       
   224  * in iec.y!
       
   225  * We declare it here...
       
   226  *
       
   227  * Search for a symbol in either of the two symbol tables
       
   228  * and return the token id of the first symbol found.
       
   229  * Searches first in the variables, and only if not found
       
   230  * does it continue searching in the library elements
       
   231  */
       
   232 //token_id_t get_identifier_token(const char *identifier_str);
       
   233 int get_identifier_token(const char *identifier_str);
       
   234 %}
       
   235 
       
   236 
       
   237 /***************************************************/
       
   238 /* Forward Declaration of functions defined later. */
       
   239 /***************************************************/
       
   240 
       
   241 %{
       
   242 /* return all the text in the current token back to the input stream. */
       
   243 void unput_text(unsigned int n);
       
   244 %}
       
   245 
       
   246 
       
   247 
       
   248 /****************************/
       
   249 /* Lexical Parser States... */
       
   250 /****************************/
       
   251 
       
   252 /* NOTE: Our psrser can parse st or il code, intermixed
       
   253  *       within the same file.
       
   254  *       With IL we come across the issue of the EOL (end of line) token.
       
   255  *       ST, and the declaration parts of IL do not use this token!
       
   256  *       If the lexical analyser were to issue this token during ST
       
   257  *       language parsing, or during the declaration of data types,
       
   258  *       function headers, etc. in IL, the syntax parser would crash.
       
   259  *
       
   260  *       We can solve this issue using one of three methods:
       
   261  *        (1) Augment all the syntax that does not accept the EOL
       
   262  *            token to simply ignore it. This makes the syntax
       
   263  *            definition (in iec.y) very cluttered!
       
   264  *        (2) Let the lexical parser figure out which language
       
   265  *            it is parsing, and decide whether or not to issue
       
   266  *            the EOL token. This requires the lexical parser
       
   267  *            to have knowledge of the syntax!, making for a poor
       
   268  *            overall organisation of the code. It would also make it
       
   269  *            very difficult to understand the lexical parser as it
       
   270  *            would use several states, and a state machine to transition
       
   271  *            between the states. The state transitions would be
       
   272  *            intermingled with the lexical parser defintion!
       
   273  *        (3) Use a mixture of (1) and (2). The lexical analyser
       
   274  *            merely distinguishes between function headers and function
       
   275  *            bodies, but no longer makes a distinction between il and
       
   276  *            st language bodies. When parsing a body, it will return
       
   277  *            the EOL token. In other states '\n' will be ignored as
       
   278  *            whitespace.
       
   279  *            The ST language syntax has been augmented in the syntax
       
   280  *            parser configuration to ignore any EOL tokens that it may
       
   281  *            come across!
       
   282  *            This option has both drawbacks of option (1) and (2), but
       
   283  *            much less intensely.
       
   284  *            The syntax that gets cluttered is limited to the ST statements
       
   285  *            (which is rather limited, compared to the function headers and
       
   286  *            data type declarations, etc...), while the state machine in
       
   287  *            the lexical parser becomes very simple. All state transitions
       
   288  *            can be handled within the lexical parser by itself, and can be
       
   289  *            easily identified. Thus knowledge of the syntax required by
       
   290  *            the lexical parser is very limited!
       
   291  *
       
   292  * Amazingly enough, I (Mario) got to implement option (3)
       
   293  * at first, requiring two basic states, decl and body.
       
   294  * The lexical parser will enter the body state when
       
   295  * it is parsing the body of a function/program/function block. The
       
   296  * state transition is done when we find a VAR_END that is not followed
       
   297  * by a VAR! This is the syntax knowledge that gets included in the
       
   298  * lexical analyser with this option!
       
   299  * Unfortunately, getting the st syntax parser to ignore EOL anywhere
       
   300  * where they might appear leads to conflicts. This is due to the fact
       
   301  * that the syntax parser uses the single look-ahead token to remove
       
   302  * possible conflicts. When we insert a possible EOL, the single
       
   303  * look ahead token becomes the EOL, which means the potential conflicts
       
   304  * could no longer be resolved.
       
   305  * Removing these conflicts would make the st syntax parser very convoluted,
       
   306  * and adding the extraneous EOL would make it very cluttered.
       
   307  * This option was therefore dropped in favour of another!
       
   308  *
       
   309  * I ended up implementing (2). Unfortunately the lexical analyser can
       
   310  * not easily distinguish between il and st code, since function
       
   311  * calls in il are very similar to function block calls in st.
       
   312  * We therefore use an extra 'body' state. When the lexical parser
       
   313  * finds that last END_VAR, it enters the body state. This state
       
   314  * must figure out what language is being parsed from the first few
       
   315  * tokens, and switch to the correct state (st, il or sfc) according to the
       
   316  * language. This means that we insert quite a bit of knowledge of the
       
   317  * syntax of the languages into the lexical parser. This is ugly, but it
       
   318  * works, and at least it is possible to keep all the state changes together
       
   319  * to make it easier to remove them later on if need be.
       
   320  * Once the language being parsed has been identified, 
       
   321  * the body state returns any matched text back to the buffer with unput(),
       
   322  * to be later matched correctly by the apropriate language parser (st, il or sfc).
       
   323  *
       
   324  * Aditionally, in sfc state it may further recursively enter the body state
       
   325  * once again. This is because an sfc body may contain ACTIONS, which are then
       
   326  * written in one of the three languages (ST, IL or SFC), so once again we need
       
   327  * to figure out which language the ACTION in the SFC was written in. We already
       
   328  * ahve all that done in the body state, so we recursively transition to the body 
       
   329  * state once again.
       
   330  * Note that in this case, when coming out of the st/il state (whichever language
       
   331  * the action was written in) the sfc state will become active again. This is done by
       
   332  * pushing and poping the previously active state!
       
   333  *
       
   334  * The sfc_qualifier_state is required because when parsing actions within an
       
   335  * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
       
   336  * to bison to work correctly, these qualifiers must be returned as tokens. However,
       
   337  * these tokens are not reserved keywords, which means it should be possible to
       
   338  * define variables/functions/FBs with any of these names (including 
       
   339  * S and R which are special because they are also IL operators). So, when we are not
       
   340  * expecting any action qualifiers, flex does not return these tokens, and is free
       
   341  * to interpret them as previously defined variables/functions/... as the case may be.
       
   342  *
       
   343  * The state machine has 7 possible states (INITIAL, config, decl, body, st, il, sfc)
       
   344  * Possible state changes are:
       
   345  *   INITIAL -> goto(decl_state)
       
   346  *               (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
       
   347  *                and followed by a VAR declaration)
       
   348  *   INITIAL -> goto(body_state) 
       
   349  *                (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
       
   350  *                 and _not_ followed by a VAR declaration)
       
   351  *                (This transition is actually commented out, since the syntax
       
   352  *                 does not allow the declaration of functions, FBs, or programs
       
   353  *                 without any VAR declaration!)
       
   354  *   INITIAL -> goto(config_state)
       
   355  *                (when a CONFIGURATION is found)
       
   356  *   decl_state    -> push(decl_state); goto(body_state)
       
   357  *                     (when the last END_VAR is found, i.e. the function body starts)
       
   358  *   decl_state    -> push(decl_state); goto(sfc_state)
       
   359  *                     (when it figures out it is parsing sfc language)
       
   360  *   body_state    -> goto(st_state)
       
   361  *                     (when it figures out it is parsing st language)
       
   362  *   body_state    -> goto(il_state)
       
   363  *                     (when it figures out it is parsing il language)
       
   364  *   st_state      -> pop()
       
   365  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
       
   366  *                      END_ACTION or END_TRANSITION is found)
       
   367  *   il_state      -> pop()
       
   368  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
       
   369  *                      END_ACTION or END_TRANSITION is found)
       
   370  *   decl_state    -> goto(INITIAL)
       
   371  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
       
   372  *   sfc_state     -> goto(INITIAL)
       
   373  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
       
   374  *   config_state  -> goto(INITIAL)
       
   375  *                     (when a END_CONFIGURATION is found)
       
   376  *   sfc_state     -> push(sfc_state); goto(body_state)
       
   377  *                     (when parsing an action. This transition is requested by bison)
       
   378  *   sfc_state     -> push(sfc_state); goto(sfc_qualifier_state)
       
   379  *                     (when expecting an action qualifier. This transition is requested by bison)
       
   380  *   sfc_qualifier_state -> pop()
       
   381  *                     (when no longer expecting an action qualifier. This transition is requested by bison)
       
   382  *   config_state  -> push(config_state); goto(task_init_state)
       
   383  *                     (when parsing a task initialisation. This transition is requested by bison)
       
   384  *   task_init_state -> pop()
       
   385  *                     (when no longer parsing task initialisation parameters. This transition is requested by bison)
       
   386  *
       
   387  */
       
   388 
       
   389 
       
   390 /* we are parsing a configuration. */
       
   391 %s config_state
       
   392 
       
   393 /* Inside a configuration, we are parsing a task initialisation parameters */
       
   394 /* This means that PRIORITY, SINGLE and INTERVAL must be handled as
       
   395  * tokens, and not as possible identifiers. Note that the above words
       
   396  * are not keywords.
       
   397  */
       
   398 %s task_init_state
       
   399 
       
   400 /* we are parsing a function, program or function block declaration */
       
   401 %s decl_state
       
   402 
       
   403 /* we will be parsing a function body. Whether il/st is remains unknown */
       
   404 %x body_state
       
   405 
       
   406 /* we are parsing il code -> flex must return the EOL tokens!       */
       
   407 %s il_state
       
   408 
       
   409 /* we are parsing st code -> flex must not return the EOL tokens!   */
       
   410 %s st_state
       
   411 
       
   412 /* we are parsing sfc code -> flex must not return the EOL tokens!  */
       
   413 %s sfc_state
       
   414 
       
   415 /* we are parsing sfc code, and expecting an action qualifier.      */
       
   416 %s sfc_qualifier_state
       
   417 
       
   418 /* we are parsing sfc code, and expecting the priority token.       */
       
   419 %s sfc_priority_state
       
   420 
       
   421 
       
   422 
       
   423 
       
   424 /*******************/
       
   425 /* File #include's */
       
   426 /*******************/
       
   427 
       
   428 /* We extend the IEC 61131-3 standard syntax to allow inclusion
       
   429  * of other files, using the IEC 61131-3 pragma directive...
       
   430  * The accepted syntax is:
       
   431  *  {#include "<filename>"}
       
   432  */
       
   433 
       
   434 /* the "include" states are used for picking up the name of an include file */
       
   435 %x include_beg
       
   436 %x include_filename
       
   437 %x include_end
       
   438 
       
   439 
       
   440 file_include_pragma_filename	[^\"]*
       
   441 file_include_pragma_beg		"{#include"{st_whitespace_only}\"
       
   442 file_include_pragma_end		\"{st_whitespace_only}"}"
       
   443 file_include_pragma			{file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
       
   444 
       
   445 
       
   446 %{
       
   447 #define MAX_INCLUDE_DEPTH 16
       
   448 
       
   449 typedef struct {
       
   450 	  YY_BUFFER_STATE buffer_state;
       
   451 	  tracking_t* env;
       
   452 	  const char *filename;
       
   453 	} include_stack_t;
       
   454 
       
   455 tracking_t* current_tracking;
       
   456 include_stack_t include_stack[MAX_INCLUDE_DEPTH];
       
   457 int include_stack_ptr = 0;
       
   458 
       
   459 const char *INCLUDE_DIRECTORIES[] = {
       
   460 	DEFAULT_LIBDIR,
       
   461 	".",
       
   462 	"/lib",
       
   463 	"/usr/lib",
       
   464 	"/usr/lib/iec",
       
   465 	NULL /* must end with NULL!! */
       
   466 	};
       
   467 
       
   468 %}
       
   469 
       
   470 
       
   471 
       
   472 /*****************************/
       
   473 /* Prelimenary constructs... */
       
   474 /*****************************/
       
   475 
       
   476 /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
       
   477  * especially the prototypes of Functions and FBs defined in the standard
       
   478  * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax 
       
   479  * with two pragmas to indicate that the code is to be parsed (going through the 
       
   480  * lexical, syntactical, and semantic analysers), but no code is to be generated.
       
   481  * 
       
   482  * The accepted syntax is:
       
   483  *  {no_code_generation begin}
       
   484  *    ... prototypes ...
       
   485  *  {no_code_generation end}
       
   486  * 
       
   487  * When parsing these prototypes the abstract syntax tree will be populated as usual,
       
   488  * allowing the semantic analyser to correctly analyse the semantics of calls to these
       
   489  * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
       
   490  * between the above two pragmas.
       
   491  */
       
   492 
       
   493 disable_code_generation_pragma	"{disable code generation}"
       
   494 enable_code_generation_pragma	"{enable code generation}"
       
   495 
       
   496 
       
   497 /* Any other pragma... */
       
   498 
       
   499 pragma "{"[^}]*"}"
       
   500 
       
   501 /* NOTE: this seemingly unnecessary complex definition is required
       
   502  *       to be able to eat up comments such as:
       
   503  *          '(* Testing... ! ***** ******)'
       
   504  *       without using the trailing context command in flex (/{context})
       
   505  *       since {comment} itself will later be used with
       
   506  *       trailing context ({comment}/{context})
       
   507  */
       
   508 not_asterisk				[^*]
       
   509 not_close_parenthesis_nor_asterisk	[^*)]
       
   510 asterisk				"*"
       
   511 comment_text		{not_asterisk}|(({asterisk}+){not_close_parenthesis_nor_asterisk})
       
   512 
       
   513 comment		"(*"({comment_text}*)({asterisk}+)")"
       
   514 
       
   515 
       
   516 /*
       
   517 3.1 Whitespace
       
   518  (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
       
   519  No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
       
   520  The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
       
   521  On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple3.1 Whitespace
       
   522  (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
       
   523  No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
       
   524  The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
       
   525  On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple lines, which means that the newline character is being considered as whitespace.
       
   526  Our implementation works around this issue by including the new line character in the whitespace while parsing function declarations and the ST language, and parsing it as the EOL token only while parsing IL language statements. This requires the use of a state machine in the lexical parser that needs at least some knowledge of the syntax itself.
       
   527 */
       
   528 /* NOTE: Our definition of whitespace will only work in ASCII!
       
   529  *
       
   530  *       Since the IL language needs to know the location of newline
       
   531  *       (token EOL -> '\n' ), we need one definition of whitespace
       
   532  *       for each language...
       
   533  */
       
   534 /*
       
   535  * NOTE: we cannot use
       
   536  *         st_whitespace	[:space:]*
       
   537  *       since we use {st_whitespace} as trailing context. In our case
       
   538  *       this would not constitute "dangerous trailing context", but the
       
   539  *       lexical generator (i.e. flex) does not know this (since it does
       
   540  *       not know which characters belong to the set [:space:]), and will
       
   541  *       generate a "dangerous trailing context" warning!
       
   542  *       We use this alternative just to stop the flex utility from
       
   543  *       generating the invalid (in this case) warning...
       
   544  */
       
   545 
       
   546 st_whitespace_only	[ \f\n\r\t\v]*
       
   547 il_whitespace_only	[ \f\r\t\v]*
       
   548 
       
   549 st_whitespace_text	{st_whitespace_only}|{comment}|{pragma}
       
   550 il_whitespace_text	{il_whitespace_only}|{comment}|{pragma}
       
   551 
       
   552 st_whitespace	{st_whitespace_text}*
       
   553 il_whitespace	{il_whitespace_text}*
       
   554 
       
   555 st_whitespace_text_no_pragma	{st_whitespace_only}|{comment}
       
   556 il_whitespace_text_no_pragma	{il_whitespace_only}|{comment}
       
   557 
       
   558 st_whitespace_no_pragma	{st_whitespace_text_no_pragma}*
       
   559 il_whitespace_no_pragma	{il_whitespace_text_no_pragma}*
       
   560 
       
   561 qualified_identifier	{identifier}(\.{identifier})*
       
   562 
       
   563 
       
   564 
       
   565 /*****************************************/
       
   566 /* B.1.1 Letters, digits and identifiers */
       
   567 /*****************************************/
       
   568 /* NOTE: The following definitions only work if the host computer
       
   569  *       is using the ASCII maping. For e.g., with EBCDIC [A-Z]
       
   570  *       contains non-alphabetic characters!
       
   571  *       The correct way of doing it would be to use
       
   572  *       the [:upper:] etc... definitions.
       
   573  *
       
   574  *       Unfortunately, further on we need all printable
       
   575  *       characters (i.e. [:print:]), but excluding '$'.
       
   576  *       Flex does not allow sets to be composed by excluding
       
   577  *       elements. Sets may only be constructed by adding new
       
   578  *       elements, which means that we have to revert to
       
   579  *       [\x20\x21\x23\x25\x26\x28-x7E] for the definition
       
   580  *       of the printable characters with the required exceptions.
       
   581  *       The above also implies the use of ASCII, but now we have
       
   582  *       no way to work around it|
       
   583  *
       
   584  *       The conclusion is that our parser is limited to ASCII
       
   585  *       based host computers!!
       
   586  */
       
   587 letter		[A-Za-z]
       
   588 digit		[0-9]
       
   589 octal_digit	[0-7]
       
   590 hex_digit	{digit}|[A-F]
       
   591 identifier	({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
       
   592 
       
   593 /*******************/
       
   594 /* B.1.2 Constants */
       
   595 /*******************/
       
   596 
       
   597 /******************************/
       
   598 /* B.1.2.1   Numeric literals */
       
   599 /******************************/
       
   600 integer         {digit}((_?{digit})*)
       
   601 binary_integer  2#{bit}((_?{bit})*)
       
   602 bit		[0-1]
       
   603 octal_integer   8#{octal_digit}((_?{octal_digit})*)
       
   604 hex_integer     16#{hex_digit}((_?{hex_digit})*)
       
   605 exponent        [Ee]([+-]?){integer}
       
   606 /* The correct definition for real would be:
       
   607  * real		{integer}\.{integer}({exponent}?)
       
   608  *
       
   609  * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
       
   610  * fixed_point		{integer}\.{integer}
       
   611  *
       
   612  * This means that {integer}\.{integer} could be interpreted
       
   613  * as either a fixed_point or a real.
       
   614  * I have opted to interpret {integer}\.{integer} as a fixed_point.
       
   615  * In order to do this, the definition of real has been changed to:
       
   616  * real		{integer}\.{integer}{exponent}
       
   617  *
       
   618  * This means that the syntax parser now needs to define a real to be
       
   619  * either a real_token or a fixed_point_token!
       
   620  */
       
   621 real		{integer}\.{integer}{exponent}
       
   622 
       
   623 
       
   624 /*******************************/
       
   625 /* B.1.2.2   Character Strings */
       
   626 /*******************************/
       
   627 /*
       
   628 common_character_representation :=
       
   629 <any printable character except '$', '"' or "'">
       
   630 |'$$'
       
   631 |'$L'|'$N'|'$P'|'$R'|'$T'
       
   632 |'$l'|'$n'|'$p'|'$r'|'$t'
       
   633 
       
   634 NOTE: 	$ = 0x24
       
   635 	" = 0x22
       
   636 	' = 0x27
       
   637 
       
   638 	printable chars in ASCII: 0x20-0x7E
       
   639 */
       
   640 
       
   641 esc_char_u		$L|$N|$P|$R|$T
       
   642 esc_char_l		$l|$n|$p|$r|$t
       
   643 esc_char		$$|{esc_char_u}|{esc_char_l}
       
   644 double_byte_char	(${hex_digit}{hex_digit}{hex_digit}{hex_digit})
       
   645 single_byte_char	(${hex_digit}{hex_digit})
       
   646 
       
   647 /* WARNING:
       
   648  * This definition is only valid in ASCII...
       
   649  *
       
   650  * Flex includes the function print_char() that defines
       
   651  * all printable characters portably (i.e. whatever character
       
   652  * encoding is currently being used , ASCII, EBCDIC, etc...)
       
   653  * Unfortunately, we cannot generate the definition of
       
   654  * common_character_representation portably, since flex
       
   655  * does not allow definition of sets by subtracting
       
   656  * elements in one set from another set.
       
   657  * This means we must build up the defintion of
       
   658  * common_character_representation using only set addition,
       
   659  * which leaves us with the only choice of defining the
       
   660  * characters non-portably...
       
   661  */
       
   662 common_character_representation		[\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
       
   663 double_byte_character_representation 	$\"|'|{double_byte_char}|{common_character_representation}
       
   664 single_byte_character_representation 	$'|\"|{single_byte_char}|{common_character_representation}
       
   665 
       
   666 
       
   667 double_byte_character_string	\"({double_byte_character_representation}*)\"
       
   668 single_byte_character_string	'({single_byte_character_representation}*)'
       
   669 
       
   670 
       
   671 /************************/
       
   672 /* B 1.2.3.1 - Duration */
       
   673 /************************/
       
   674 fixed_point		{integer}\.{integer}
       
   675 
       
   676 fixed_point_d		{fixed_point}d
       
   677 integer_d		{integer}d
       
   678 
       
   679 fixed_point_h		{fixed_point}h
       
   680 integer_h		{integer}h
       
   681 
       
   682 fixed_point_m		{fixed_point}m
       
   683 integer_m		{integer}m
       
   684 
       
   685 fixed_point_s		{fixed_point}s
       
   686 integer_s		{integer}s
       
   687 
       
   688 fixed_point_ms		{fixed_point}ms
       
   689 integer_ms		{integer}ms
       
   690 
       
   691 
       
   692 /********************************************/
       
   693 /* B.1.4.1   Directly Represented Variables */
       
   694 /********************************************/
       
   695 /* The correct definition, if the standard were to be followed... */
       
   696 
       
   697 location_prefix			[IQM]
       
   698 size_prefix			[XBWDL]
       
   699 direct_variable_standard	%{location_prefix}({size_prefix}?){integer}((.{integer})*)
       
   700 
       
   701 
       
   702 /* For the MatPLC, we will accept %<identifier>
       
   703  * as a direct variable, this being mapped onto the MatPLC point
       
   704  * named <identifier>
       
   705  */
       
   706 /* TODO: we should not restrict it to only the accepted syntax
       
   707  * of <identifier> as specified by the standard. MatPLC point names
       
   708  * have a more permissive syntax.
       
   709  *
       
   710  * e.g. "P__234"
       
   711  *    Is a valid MatPLC point name, but not a valid <identifier> !!
       
   712  *    The same happens with names such as "333", "349+23", etc...
       
   713  *    How can we handle these more expressive names in our case?
       
   714  *    Remember that some direct variable may remain anonymous, with
       
   715  *    declarations such as:
       
   716  *    VAR
       
   717  *       AT %I3 : BYTE := 255;
       
   718  *    END_VAR
       
   719  *    in which case we are currently using "%I3" as the variable
       
   720  *    name.
       
   721  */
       
   722 direct_variable_matplc		%{identifier}
       
   723 
       
   724 direct_variable			{direct_variable_standard}|{direct_variable_matplc}
       
   725 
       
   726 /******************************************/
       
   727 /* B 1.4.3 - Declaration & Initialisation */
       
   728 /******************************************/
       
   729 incompl_location	%[IQM]\*
       
   730 
       
   731 
       
   732 
       
   733 
       
   734 %%
       
   735 	/* fprintf(stderr, "flex: state %d\n", YY_START); */
       
   736 
       
   737 	/*****************************************************/
       
   738 	/*****************************************************/
       
   739 	/*****************************************************/
       
   740 	/*****                                           *****/
       
   741 	/*****                                           *****/
       
   742 	/*****   F I R S T    T H I N G S    F I R S T   *****/
       
   743 	/*****                                           *****/
       
   744 	/*****                                           *****/
       
   745 	/*****************************************************/
       
   746 	/*****************************************************/
       
   747 	/*****************************************************/
       
   748 
       
   749 	/***********************************************************/
       
   750 	/* Handle requests sent by bison for flex to change state. */
       
   751 	/***********************************************************/
       
   752 	if (get_goto_body_state()) {
       
   753 	  yy_push_state(body_state);
       
   754 	  rst_goto_body_state();
       
   755 	}
       
   756 
       
   757 	if (get_goto_sfc_qualifier_state()) {
       
   758 	  yy_push_state(sfc_qualifier_state);
       
   759 	  rst_goto_sfc_qualifier_state();
       
   760 	}
       
   761 
       
   762 	if (get_goto_sfc_priority_state()) {
       
   763 	  yy_push_state(sfc_priority_state);
       
   764 	  rst_goto_sfc_priority_state();
       
   765 	}
       
   766 
       
   767 	if (get_goto_task_init_state()) {
       
   768 	  yy_push_state(task_init_state);
       
   769 	  rst_goto_task_init_state();
       
   770 	}
       
   771 
       
   772 	if (get_pop_state()) {
       
   773 	  yy_pop_state();
       
   774 	  rst_pop_state();
       
   775 	}
       
   776 
       
   777 	/***************************/
       
   778 	/* Handle the pragmas!     */
       
   779 	/***************************/
       
   780 
       
   781 	/* We start off by searching for the pragmas we handle in the lexical parser. */
       
   782 <INITIAL>{file_include_pragma}	unput_text(0); yy_push_state(include_beg);
       
   783 
       
   784 	/* Pragmas sent to syntax analyser (bison) */
       
   785 {disable_code_generation_pragma}               return disable_code_generation_pragma_token;
       
   786 {enable_code_generation_pragma}                return enable_code_generation_pragma_token;
       
   787 <body_state>{disable_code_generation_pragma}   return disable_code_generation_pragma_token;
       
   788 <body_state>{enable_code_generation_pragma}    return enable_code_generation_pragma_token;
       
   789 
       
   790 	/* Any other pragma we find, we just pass it up to the syntax parser...   */
       
   791 	/* Note that the <body_state> state is exclusive, so we have to include it here too. */
       
   792 {pragma}	{/* return the pragmma without the enclosing '{' and '}' */
       
   793 		 yytext[strlen(yytext)-1] = '\0';
       
   794 		 yylval.ID=strdup(yytext+1);
       
   795 		 return pragma_token;
       
   796 		}
       
   797 <body_state>{pragma} {/* return the pragmma without the enclosing '{' and '}' */
       
   798 		 yytext[strlen(yytext)-1] = '\0';
       
   799 		 yylval.ID=strdup(yytext+1);
       
   800 		 return pragma_token;
       
   801 		}
       
   802 
       
   803 
       
   804 	/*********************************/
       
   805 	/* Handle the file includes!     */
       
   806 	/*********************************/
       
   807 <include_beg>{file_include_pragma_beg}	BEGIN(include_filename);
       
   808 
       
   809 <include_filename>{file_include_pragma_filename}	{
       
   810 			  /* got the include file name */
       
   811 			  int i;
       
   812 
       
   813 			  if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
       
   814 			    fprintf(stderr, "Includes nested too deeply\n");
       
   815 			    exit( 1 );
       
   816 			  }
       
   817 			  include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
       
   818 			  include_stack[include_stack_ptr].env = current_tracking;
       
   819 			  include_stack[include_stack_ptr].filename = current_filename;
       
   820 			  
       
   821 			  for (i = 0, yyin = NULL; (INCLUDE_DIRECTORIES[i] != NULL) && (yyin == NULL); i++) {
       
   822 			    char *full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", yytext);
       
   823 			    if (full_name == NULL) {
       
   824 			      fprintf(stderr, "Out of memory!\n");
       
   825 			      exit( 1 );
       
   826 			    }
       
   827 			    yyin = fopen(full_name, "r");
       
   828 			    free(full_name);
       
   829 			  }
       
   830 
       
   831 			  if (!yyin) {
       
   832 			    fprintf(stderr, "Error opening included file %s\n", yytext);
       
   833 			    exit( 1 );
       
   834 			  }
       
   835 
       
   836 			  current_filename = strdup(yytext);
       
   837 			  current_tracking = GetNewTracking(yyin);
       
   838 			  include_stack_ptr++;
       
   839 
       
   840 			  /* switch input buffer to new file... */
       
   841 			  yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
       
   842 			  /* switch to whatever state was active before the include file */
       
   843 			  yy_pop_state();
       
   844 			  /* now process the new file... */
       
   845 			}
       
   846 
       
   847 
       
   848 <<EOF>>			{     /* NOTE: We must not change the value of include_stack_ptr
       
   849 			       *       just yet. We must only decrement it if we are NOT
       
   850 			       *       at the end of the main file.
       
   851 			       *       If we have finished parsing the main file, then we
       
   852 			       *       must leave include_stack_ptr at 0, in case the 
       
   853 			       *       parser is called once again with a new file.
       
   854 			       *       (In fact, we currently do just that!)
       
   855 			       */
       
   856 			  free(current_tracking);
       
   857 			  if (include_stack_ptr == 0) {
       
   858 			      /* yyterminate() terminates the scanner and returns a 0 to the 
       
   859 			       * scanner's  caller, indicating "all done".
       
   860 			       *	
       
   861 			       * Our syntax parser (written with bison) has the token	
       
   862 			       * END_OF_INPUT associated to the value 0, so even though
       
   863 			       * we don't explicitly return the token END_OF_INPUT
       
   864 			       * calling yyterminate() is equivalent to doing that. 
       
   865 			       */ 	
       
   866 			    yyterminate();
       
   867 			  }      
       
   868  else {
       
   869 			    --include_stack_ptr;
       
   870 			    yy_delete_buffer(YY_CURRENT_BUFFER);
       
   871 			    yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
       
   872 			    current_tracking = include_stack[include_stack_ptr].env;
       
   873 			      /* removing constness of char *. This is safe actually,
       
   874 			       * since the only real const char * that is stored on the stack is
       
   875 			       * the first one (i.e. the one that gets stored in include_stack[0],
       
   876 			       * which is never free'd!
       
   877 			       */
       
   878 			    /* NOTE: We do __NOT__ free the malloc()'d memory since 
       
   879 			     *       pointers to this filename will be kept by many objects
       
   880 			     *       in the abstract syntax tree.
       
   881 			     *       This will later be used to provide correct error
       
   882 			     *       messages during semantic analysis (stage 3)
       
   883 			     */
       
   884 			    /* free((char *)current_filename); */
       
   885 			    current_filename = include_stack[include_stack_ptr].filename;
       
   886 			    yy_push_state(include_end);
       
   887 			  }
       
   888 			}
       
   889 
       
   890 <include_end>{file_include_pragma_end}	yy_pop_state();
       
   891 
       
   892 
       
   893 	/*********************************/
       
   894 	/* Handle all the state changes! */
       
   895 	/*********************************/
       
   896 
       
   897 	/* INITIAL -> decl_state */
       
   898 <INITIAL>{
       
   899 	/* NOTE: how about functions that do not declare variables, and go directly to the body_state???
       
   900 	 *      - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
       
   901 	 *        must have at least one input argument, so a correct declaration will have at least
       
   902 	 *        one VAR_INPUT ... VAR_END construct!
       
   903 	 *      - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
       
   904 	 *        must have at least one input argument, so a correct declaration will have at least
       
   905 	 *        one VAR_INPUT ... VAR_END construct!
       
   906 	 *      - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
       
   907 	 *        argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
       
   908 	 *        construct!
       
   909 	 *
       
   910 	 *       All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
       
   911 	 *       FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
       
   912 	 *       If the code has an error, and no VAR_END before the body, we will simply
       
   913 	 *       continue in the <decl_state> state, untill the end of the FUNCTION, FUNCTION_BLOCK
       
   914 	 *       or PROGAM.
       
   915 	 */
       
   916 FUNCTION				BEGIN(decl_state); return FUNCTION;
       
   917 FUNCTION_BLOCK				BEGIN(decl_state); return FUNCTION_BLOCK;
       
   918 PROGRAM					BEGIN(decl_state); return PROGRAM;
       
   919 CONFIGURATION				BEGIN(config_state); return CONFIGURATION;
       
   920 }
       
   921 
       
   922 	/* INITIAL -> body_state */
       
   923 	/* required if the function, program, etc.. has no VAR block! */
       
   924 	/* We comment it out since the standard does not allow this.  */
       
   925 	/* NOTE: Even if we were to include the following code, it    */
       
   926 	/*       would have no effect whatsoever since the above      */
       
   927 	/*       rules will take precendence!                         */
       
   928 	/*
       
   929 <INITIAL>{
       
   930 FUNCTION	BEGIN(body_state); return FUNCTION;
       
   931 FUNCTION_BLOCK	BEGIN(body_state); return FUNCTION_BLOCK;
       
   932 PROGRAM		BEGIN(body_state); return PROGRAM;
       
   933 }
       
   934 	*/
       
   935 
       
   936 	/* decl_state -> (body_state | sfc_state) */
       
   937 <decl_state>{
       
   938 END_VAR{st_whitespace}VAR		{unput_text(strlen("END_VAR")); 
       
   939 					 return END_VAR;
       
   940 					}
       
   941 END_VAR{st_whitespace}INITIAL_STEP	{unput_text(strlen("END_VAR")); 
       
   942 					 yy_push_state(sfc_state); 
       
   943 					 return END_VAR;
       
   944 					}
       
   945 END_VAR{st_whitespace}			{unput_text(strlen("END_VAR")); 
       
   946 					 cmd_goto_body_state(); 
       
   947 					 return END_VAR;
       
   948 					}
       
   949 }
       
   950 
       
   951 	/* body_state -> (il_state | st_state) */
       
   952 <body_state>{
       
   953 {st_whitespace_no_pragma}			/* Eat any whitespace */
       
   954 {qualified_identifier}{st_whitespace}":="	  unput_text(0); BEGIN(st_state);
       
   955 {direct_variable_standard}{st_whitespace}":="	  unput_text(0); BEGIN(st_state);
       
   956 {qualified_identifier}"["			unput_text(0); BEGIN(st_state);
       
   957 
       
   958 RETURN					unput_text(0); BEGIN(st_state);
       
   959 IF							unput_text(0); BEGIN(st_state);
       
   960 CASE						unput_text(0); BEGIN(st_state);
       
   961 FOR							unput_text(0); BEGIN(st_state);
       
   962 WHILE						unput_text(0); BEGIN(st_state);
       
   963 REPEAT					unput_text(0); BEGIN(st_state);
       
   964 EXIT						unput_text(0); BEGIN(st_state);
       
   965 
       
   966 	/* ':=' occurs only in transitions, and not Function or FB bodies! */
       
   967 :=							unput_text(0); BEGIN(st_state);
       
   968 
       
   969 	/* Hopefully, the above rules (along with the last one),
       
   970          * used to distinguish ST from IL, are 
       
   971 	 * enough to handle all ocurrences. However, if
       
   972 	 * there is some situation where the compiler is getting confused,
       
   973 	 * we add the following rule to detect 'label:' in IL code. This will
       
   974 	 * allow the user to insert a label right at the beginning (which
       
   975 	 * will probably not be used further by his code) simply as a way
       
   976 	 * to force the compiler to interpret his code as IL code.
       
   977 	 */
       
   978 {identifier}{st_whitespace}":"{st_whitespace}	unput_text(0); BEGIN(il_state);
       
   979 
       
   980 {identifier}	{int token = get_identifier_token(yytext);
       
   981 		 if (token == prev_declared_fb_name_token) {
       
   982 		   /* the code has a call to a function block */
       
   983 		   /* NOTE: if we ever decide to allow the user to use IL operator tokens
       
   984 		    * (LD, ST, ...) as identifiers for variable names (including
       
   985 		    * function block instances), then the above inference/conclusion 
       
   986 		    * may be incorrect, and this condition may have to be changed!
       
   987 		    */	
       
   988 		   BEGIN(st_state);
       
   989 		 } else {
       
   990 		   BEGIN(il_state);
       
   991 		 }
       
   992 		 unput_text(0);
       
   993 		}
       
   994 
       
   995 .		unput_text(0); BEGIN(il_state);
       
   996 }	/* end of body_state lexical parser */
       
   997 
       
   998 	/* (il_state | st_state) -> $previous_state (decl_state or sfc_state) */
       
   999 <il_state,st_state>{
       
  1000 END_FUNCTION		yy_pop_state(); unput_text(0);
       
  1001 END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
       
  1002 END_PROGRAM		yy_pop_state(); unput_text(0);
       
  1003 END_TRANSITION		yy_pop_state(); unput_text(0);
       
  1004 END_ACTION		yy_pop_state(); unput_text(0);
       
  1005 }
       
  1006 
       
  1007 	/* sfc_state -> INITIAL */
       
  1008 <sfc_state>{
       
  1009 END_FUNCTION		yy_pop_state(); unput_text(0);
       
  1010 END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
       
  1011 END_PROGRAM		yy_pop_state(); unput_text(0);
       
  1012 }
       
  1013 
       
  1014 	/* decl_state -> INITIAL */
       
  1015 <decl_state>{
       
  1016 END_FUNCTION		BEGIN(INITIAL); return END_FUNCTION;
       
  1017 END_FUNCTION_BLOCK	BEGIN(INITIAL); return END_FUNCTION_BLOCK;
       
  1018 END_PROGRAM		BEGIN(INITIAL); return END_PROGRAM;
       
  1019 }
       
  1020 	/* config -> INITIAL */
       
  1021 END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;
       
  1022 
       
  1023 
       
  1024 
       
  1025 	/***************************************/
       
  1026 	/* Next is to to remove all whitespace */
       
  1027 	/***************************************/
       
  1028 	/* NOTE: pragmas are handled right at the beginning... */
       
  1029 
       
  1030 <INITIAL,config_state,decl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace_no_pragma}	/* Eat any whitespace */
       
  1031 <il_state>{il_whitespace_no_pragma}		/* Eat any whitespace */
       
  1032 
       
  1033 
       
  1034 
       
  1035 	/*****************************************/
       
  1036 	/* B.1.1 Letters, digits and identifiers */
       
  1037 	/*****************************************/
       
  1038 	/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
       
  1039 	 *       On the other hand, the spec does not define them as keywords,
       
  1040 	 *       which means they may be re-used for variable names, etc...!
       
  1041 	 *       The syntax parser already caters for the possibility of these
       
  1042 	 *       tokens being used for variable names in their declarations.
       
  1043 	 *       When they are declared, they will be added to the variable symbol table!
       
  1044 	 *       Further appearances of these tokens must no longer be parsed
       
  1045 	 *       as R1_tokens etc..., but rather as variable_name_tokens!
       
  1046 	 *
       
  1047 	 *       That is why the first thing we do with identifiers, even before
       
  1048 	 *       checking whether they may be a 'keyword', is to check whether
       
  1049 	 *       they have been previously declared as a variable name,
       
  1050 	 *
       
  1051 	 *       However, we have a dilema! Should we here also check for
       
  1052 	 *       prev_declared_derived_function_name_token?
       
  1053 	 *       If we do, then the 'MOD' default library function (defined in
       
  1054 	 *       the standard) will always be returned as a function name, and
       
  1055 	 *       it will therefore not be possible to use it as an operator as 
       
  1056 	 *       in the following ST expression 'X := Y MOD Z;' !
       
  1057 	 *       If we don't, then even it will not be possible to use 'MOD'
       
  1058 	 *       as a funtion as in 'X := MOD(Y, Z);'
       
  1059 	 *       We solve this by NOT testing for function names here, and
       
  1060 	 *       handling this function and keyword clash in bison!
       
  1061 	 */
       
  1062  /*
       
  1063 {identifier} 	{int token = get_identifier_token(yytext);
       
  1064 		 // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); 
       
  1065 		 if ((token == prev_declared_variable_name_token) ||
       
  1066 //		     (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
       
  1067 		     (token == prev_declared_fb_name_token)) {
       
  1068 		 // if (token != identifier_token)
       
  1069 		 // * NOTE: if we replace the above uncommented conditions with
       
  1070                   *       the simple test of (token != identifier_token), then 
       
  1071                   *       'MOD' et al must be removed from the 
       
  1072                   *       library_symbol_table as a default function name!
       
  1073 		  * //
       
  1074 		   yylval.ID=strdup(yytext);
       
  1075 		   // fprintf(stderr, "returning token %d\n", token); 
       
  1076 		   return token;
       
  1077 		 }
       
  1078 		 // otherwise, leave it for the other lexical parser rules... 
       
  1079 		 // fprintf(stderr, "rejecting\n"); 
       
  1080 		 REJECT;
       
  1081 		}
       
  1082  */
       
  1083 
       
  1084 	/******************************************************/
       
  1085 	/******************************************************/
       
  1086 	/******************************************************/
       
  1087 	/*****                                            *****/
       
  1088 	/*****                                            *****/
       
  1089 	/*****   N O W    D O   T H E   K E Y W O R D S   *****/
       
  1090 	/*****                                            *****/
       
  1091 	/*****                                            *****/
       
  1092 	/******************************************************/
       
  1093 	/******************************************************/
       
  1094 	/******************************************************/
       
  1095 
       
  1096 
       
  1097 EN	return EN;			/* Keyword */
       
  1098 ENO	return ENO;			/* Keyword */
       
  1099 
       
  1100 
       
  1101 	/******************************/
       
  1102 	/* B 1.2.1 - Numeric Literals */
       
  1103 	/******************************/
       
  1104 TRUE		return TRUE;		/* Keyword */
       
  1105 BOOL#1  	return boolean_true_literal_token;
       
  1106 BOOL#TRUE	return boolean_true_literal_token;
       
  1107 SAFEBOOL#1	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
       
  1108 SAFEBOOL#TRUE	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
       
  1109 
       
  1110 FALSE		return FALSE;		/* Keyword */
       
  1111 BOOL#0  	return boolean_false_literal_token;
       
  1112 BOOL#FALSE  	return boolean_false_literal_token;
       
  1113 SAFEBOOL#0	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
       
  1114 SAFEBOOL#FALSE	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
       
  1115 
       
  1116 
       
  1117 	/************************/
       
  1118 	/* B 1.2.3.1 - Duration */
       
  1119 	/************************/
       
  1120 t#		return T_SHARP;		/* Delimiter */
       
  1121 T#		return T_SHARP;		/* Delimiter */
       
  1122 TIME		return TIME;		/* Keyword (Data Type) */
       
  1123 
       
  1124 
       
  1125 	/************************************/
       
  1126 	/* B 1.2.3.2 - Time of day and Date */
       
  1127 	/************************************/
       
  1128 TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
       
  1129 TOD		return TIME_OF_DAY;	/* Keyword (Data Type) */
       
  1130 DATE		return DATE;		/* Keyword (Data Type) */
       
  1131 d#		return D_SHARP;		/* Delimiter */
       
  1132 D#		return D_SHARP;		/* Delimiter */
       
  1133 DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
       
  1134 DT		return DATE_AND_TIME;	/* Keyword (Data Type) */
       
  1135 
       
  1136 
       
  1137 	/***********************************/
       
  1138 	/* B 1.3.1 - Elementary Data Types */
       
  1139 	/***********************************/
       
  1140 BOOL		return BOOL;		/* Keyword (Data Type) */
       
  1141 
       
  1142 BYTE		return BYTE;		/* Keyword (Data Type) */
       
  1143 WORD		return WORD;		/* Keyword (Data Type) */
       
  1144 DWORD		return DWORD;		/* Keyword (Data Type) */
       
  1145 LWORD		return LWORD;		/* Keyword (Data Type) */
       
  1146 
       
  1147 SINT		return SINT;		/* Keyword (Data Type) */
       
  1148 INT		return INT;		/* Keyword (Data Type) */
       
  1149 DINT		return DINT;		/* Keyword (Data Type) */
       
  1150 LINT		return LINT;		/* Keyword (Data Type) */
       
  1151 
       
  1152 USINT		return USINT;		/* Keyword (Data Type) */
       
  1153 UINT		return UINT;		/* Keyword (Data Type) */
       
  1154 UDINT		return UDINT;		/* Keyword (Data Type) */
       
  1155 ULINT		return ULINT;		/* Keyword (Data Type) */
       
  1156 
       
  1157 REAL		return REAL;		/* Keyword (Data Type) */
       
  1158 LREAL		return LREAL;		/* Keyword (Data Type) */
       
  1159 
       
  1160 WSTRING		return WSTRING;		/* Keyword (Data Type) */
       
  1161 STRING		return STRING;		/* Keyword (Data Type) */
       
  1162 
       
  1163 TIME		return TIME;		/* Keyword (Data Type) */
       
  1164 DATE		return DATE;		/* Keyword (Data Type) */
       
  1165 DT		return DT;		/* Keyword (Data Type) */
       
  1166 TOD		return TOD;		/* Keyword (Data Type) */
       
  1167 DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
       
  1168 TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
       
  1169 
       
  1170 	/*****************************************************************/
       
  1171 	/* Keywords defined in "Safety Software Technical Specification" */
       
  1172 	/*****************************************************************/
       
  1173         /* 
       
  1174          * NOTE: The following keywords are define in 
       
  1175          *       "Safety Software Technical Specification,
       
  1176          *        Part 1: Concepts and Function Blocks,  
       
  1177          *        Version 1.0 – Official Release"
       
  1178          *        written by PLCopen - Technical Committee 5
       
  1179          *
       
  1180          *        We only support these extensions and keywords
       
  1181          *        if the apropriate command line option is given.
       
  1182          */
       
  1183 SAFEBOOL	     {if (get_opt_safe_extensions()) {return SAFEBOOL;}          else {REJECT;}} 
       
  1184 
       
  1185 SAFEBYTE	     {if (get_opt_safe_extensions()) {return SAFEBYTE;}          else {REJECT;}} 
       
  1186 SAFEWORD	     {if (get_opt_safe_extensions()) {return SAFEWORD;}          else {REJECT;}} 
       
  1187 SAFEDWORD	     {if (get_opt_safe_extensions()) {return SAFEDWORD;}         else{REJECT;}}
       
  1188 SAFELWORD	     {if (get_opt_safe_extensions()) {return SAFELWORD;}         else{REJECT;}}
       
  1189                
       
  1190 SAFEREAL	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
       
  1191 SAFELREAL    	     {if (get_opt_safe_extensions()) {return SAFELREAL;}         else{REJECT;}}
       
  1192                   
       
  1193 SAFESINT	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
       
  1194 SAFEINT	             {if (get_opt_safe_extensions()) {return SAFEINT;}           else{REJECT;}}
       
  1195 SAFEDINT	     {if (get_opt_safe_extensions()) {return SAFEDINT;}          else{REJECT;}}
       
  1196 SAFELINT             {if (get_opt_safe_extensions()) {return SAFELINT;}          else{REJECT;}}
       
  1197 
       
  1198 SAFEUSINT            {if (get_opt_safe_extensions()) {return SAFEUSINT;}         else{REJECT;}}
       
  1199 SAFEUINT             {if (get_opt_safe_extensions()) {return SAFEUINT;}          else{REJECT;}}
       
  1200 SAFEUDINT            {if (get_opt_safe_extensions()) {return SAFEUDINT;}         else{REJECT;}}
       
  1201 SAFEULINT            {if (get_opt_safe_extensions()) {return SAFEULINT;}         else{REJECT;}}
       
  1202 
       
  1203  /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
       
  1204  /*  so it is best not to support them at all... */
       
  1205  /*
       
  1206 SAFEWSTRING          {if (get_opt_safe_extensions()) {return SAFEWSTRING;}       else{REJECT;}}
       
  1207 SAFESTRING           {if (get_opt_safe_extensions()) {return SAFESTRING;}        else{REJECT;}}
       
  1208  */
       
  1209 
       
  1210 SAFETIME             {if (get_opt_safe_extensions()) {return SAFETIME;}          else{REJECT;}}
       
  1211 SAFEDATE             {if (get_opt_safe_extensions()) {return SAFEDATE;}          else{REJECT;}}
       
  1212 SAFEDT               {if (get_opt_safe_extensions()) {return SAFEDT;}            else{REJECT;}}
       
  1213 SAFETOD              {if (get_opt_safe_extensions()) {return SAFETOD;}           else{REJECT;}}
       
  1214 SAFEDATE_AND_TIME    {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
       
  1215 SAFETIME_OF_DAY      {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;}   else{REJECT;}}
       
  1216 
       
  1217 	/********************************/
       
  1218 	/* B 1.3.2 - Generic data types */
       
  1219 	/********************************/
       
  1220 	/* Strangely, the following symbols do not seem to be required! */
       
  1221 	/* But we include them so they become reserved words, and do not
       
  1222 	 * get passed up to bison as an identifier...
       
  1223 	 */
       
  1224 ANY		return ANY;		/* Keyword (Data Type) */
       
  1225 ANY_DERIVED	return ANY_DERIVED;	/* Keyword (Data Type) */
       
  1226 ANY_ELEMENTARY	return ANY_ELEMENTARY;	/* Keyword (Data Type) */
       
  1227 ANY_MAGNITUDE	return ANY_MAGNITUDE;	/* Keyword (Data Type) */
       
  1228 ANY_NUM		return ANY_NUM;		/* Keyword (Data Type) */
       
  1229 ANY_REAL	return ANY_REAL;	/* Keyword (Data Type) */
       
  1230 ANY_INT		return ANY_INT;		/* Keyword (Data Type) */
       
  1231 ANY_BIT		return ANY_BIT;		/* Keyword (Data Type) */
       
  1232 ANY_STRING	return ANY_STRING;	/* Keyword (Data Type) */
       
  1233 ANY_DATE	return ANY_DATE;	/* Keyword (Data Type) */
       
  1234 
       
  1235 
       
  1236 	/********************************/
       
  1237 	/* B 1.3.3 - Derived data types */
       
  1238 	/********************************/
       
  1239 ":="		return ASSIGN;		/* Delimiter */
       
  1240 ".."		return DOTDOT;		/* Delimiter */
       
  1241 TYPE		return TYPE;		/* Keyword */
       
  1242 END_TYPE	return END_TYPE;	/* Keyword */
       
  1243 ARRAY		return ARRAY;		/* Keyword */
       
  1244 OF		return OF;		/* Keyword */
       
  1245 STRUCT		return STRUCT;		/* Keyword */
       
  1246 END_STRUCT	return END_STRUCT;	/* Keyword */
       
  1247 
       
  1248 
       
  1249 	/*********************/
       
  1250 	/* B 1.4 - Variables */
       
  1251 	/*********************/
       
  1252 
       
  1253 	/******************************************/
       
  1254 	/* B 1.4.3 - Declaration & Initialisation */
       
  1255 	/******************************************/
       
  1256 VAR_INPUT	return VAR_INPUT;	/* Keyword */
       
  1257 VAR_OUTPUT	return VAR_OUTPUT;	/* Keyword */
       
  1258 VAR_IN_OUT	return VAR_IN_OUT;	/* Keyword */
       
  1259 VAR_EXTERNAL	return VAR_EXTERNAL;	/* Keyword */
       
  1260 VAR_GLOBAL	return VAR_GLOBAL;	/* Keyword */
       
  1261 END_VAR		return END_VAR;		/* Keyword */
       
  1262 RETAIN		return RETAIN;		/* Keyword */
       
  1263 NON_RETAIN	return NON_RETAIN;	/* Keyword */
       
  1264 R_EDGE		return R_EDGE;		/* Keyword */
       
  1265 F_EDGE		return F_EDGE;		/* Keyword */
       
  1266 AT		return AT;		/* Keyword */
       
  1267 
       
  1268 
       
  1269 	/***********************/
       
  1270 	/* B 1.5.1 - Functions */
       
  1271 	/***********************/
       
  1272 FUNCTION	return FUNCTION;	/* Keyword */
       
  1273 END_FUNCTION	return END_FUNCTION;	/* Keyword */
       
  1274 VAR		return VAR;		/* Keyword */
       
  1275 CONSTANT	return CONSTANT;	/* Keyword */
       
  1276 
       
  1277 
       
  1278 	/*****************************/
       
  1279 	/* B 1.5.2 - Function Blocks */
       
  1280 	/*****************************/
       
  1281 FUNCTION_BLOCK		return FUNCTION_BLOCK;		/* Keyword */
       
  1282 END_FUNCTION_BLOCK	return END_FUNCTION_BLOCK;	/* Keyword */
       
  1283 VAR_TEMP		return VAR_TEMP;		/* Keyword */
       
  1284 VAR			return VAR;			/* Keyword */
       
  1285 NON_RETAIN		return NON_RETAIN;		/* Keyword */
       
  1286 END_VAR			return END_VAR;			/* Keyword */
       
  1287 
       
  1288 
       
  1289 	/**********************/
       
  1290 	/* B 1.5.3 - Programs */
       
  1291 	/**********************/
       
  1292 PROGRAM		return PROGRAM;			/* Keyword */
       
  1293 END_PROGRAM	return END_PROGRAM;		/* Keyword */
       
  1294 
       
  1295 
       
  1296 	/********************************************/
       
  1297 	/* B 1.6 Sequential Function Chart elements */
       
  1298 	/********************************************/
       
  1299 	/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
       
  1300 	.* as other identifiers that may be used as variable names inside IL and ST programs.
       
  1301 	 * They will have to be handled when we include parsing of SFC... For now, simply
       
  1302 	 * ignore them!
       
  1303 	 */
       
  1304 	 
       
  1305 ACTION		return ACTION;			/* Keyword */
       
  1306 END_ACTION	return END_ACTION;		/* Keyword */
       
  1307 
       
  1308 TRANSITION	return TRANSITION;		/* Keyword */
       
  1309 END_TRANSITION	return END_TRANSITION;		/* Keyword */
       
  1310 FROM		return FROM;			/* Keyword */
       
  1311 TO		return TO;			/* Keyword */
       
  1312 
       
  1313 INITIAL_STEP	return INITIAL_STEP;		/* Keyword */
       
  1314 STEP		return STEP;			/* Keyword */
       
  1315 END_STEP	return END_STEP;		/* Keyword */
       
  1316 
       
  1317 	/* PRIORITY is not a keyword, so we only return it when 
       
  1318 	 * it is explicitly required and we are not expecting any identifiers
       
  1319 	 * that could also use the same letter sequence (i.e. an identifier: piority)
       
  1320 	 */
       
  1321 <sfc_priority_state>PRIORITY	return PRIORITY;
       
  1322 
       
  1323 <sfc_qualifier_state>{
       
  1324 L		return L;
       
  1325 D		return D;
       
  1326 SD		return SD;
       
  1327 DS		return DS;
       
  1328 SL		return SL;
       
  1329 N		return N;
       
  1330 P		return P;
       
  1331 R		return R;
       
  1332 S		return S;
       
  1333 }
       
  1334 
       
  1335 
       
  1336 	/********************************/
       
  1337 	/* B 1.7 Configuration elements */
       
  1338 	/********************************/
       
  1339 CONFIGURATION		return CONFIGURATION;		/* Keyword */
       
  1340 END_CONFIGURATION	return END_CONFIGURATION;	/* Keyword */
       
  1341 TASK			return TASK;			/* Keyword */
       
  1342 RESOURCE		return RESOURCE;		/* Keyword */
       
  1343 ON			return ON;			/* Keyword */
       
  1344 END_RESOURCE		return END_RESOURCE;		/* Keyword */
       
  1345 VAR_CONFIG		return VAR_CONFIG;		/* Keyword */
       
  1346 VAR_ACCESS		return VAR_ACCESS;		/* Keyword */
       
  1347 END_VAR			return END_VAR;			/* Keyword */
       
  1348 WITH			return WITH;			/* Keyword */
       
  1349 PROGRAM			return PROGRAM;			/* Keyword */
       
  1350 RETAIN			return RETAIN;			/* Keyword */
       
  1351 NON_RETAIN		return NON_RETAIN;		/* Keyword */
       
  1352 READ_WRITE		return READ_WRITE;		/* Keyword */
       
  1353 READ_ONLY		return READ_ONLY;		/* Keyword */
       
  1354 
       
  1355 	/* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when 
       
  1356 	 * it is explicitly required and we are not expecting any identifiers
       
  1357 	 * that could also use the same letter sequence (i.e. an identifier: piority, ...)
       
  1358 	 */
       
  1359 <task_init_state>{
       
  1360 PRIORITY		return PRIORITY;
       
  1361 SINGLE			return SINGLE;
       
  1362 INTERVAL		return INTERVAL;
       
  1363 }
       
  1364 
       
  1365 	/***********************************/
       
  1366 	/* B 2.1 Instructions and Operands */
       
  1367 	/***********************************/
       
  1368 <il_state>\n		return EOL;
       
  1369 
       
  1370 
       
  1371 	/*******************/
       
  1372 	/* B 2.2 Operators */
       
  1373 	/*******************/
       
  1374 	/* NOTE: we can't have flex return the same token for
       
  1375 	 *       ANDN and &N, neither for AND and &, since
       
  1376 	 *       AND and ANDN are considered valid variable
       
  1377 	 *       function or functionblock type names!
       
  1378 	 *       This means that the parser may decide that the
       
  1379 	 *       AND or ANDN strings found in the source code
       
  1380 	 *       are being used as variable names
       
  1381 	 *       and not as operators, and will therefore transform
       
  1382 	 *       these tokens into indentifier tokens!
       
  1383 	 *       We can't have the parser thinking that the source
       
  1384 	 *       code contained the string AND (which may be interpreted
       
  1385 	 *       as a vairable name) when in reality the source code
       
  1386 	 *       merely contained the character &, so we use two
       
  1387 	 *       different tokens for & and AND (and similarly
       
  1388 	 *       ANDN and &N)!
       
  1389 	 */
       
  1390  /* The following tokens clash with ST expression operators and Standard Functions */
       
  1391  /* They are also keywords! */
       
  1392 AND		return AND;		/* Keyword */
       
  1393 MOD		return MOD;		/* Keyword */
       
  1394 OR		return OR;		/* Keyword */
       
  1395 XOR		return XOR;		/* Keyword */
       
  1396 NOT		return NOT;		/* Keyword */
       
  1397 
       
  1398  /* The following tokens clash with Standard Functions */
       
  1399  /* They are keywords because they are a function name */
       
  1400 <il_state>{
       
  1401 ADD		return ADD;		/* Keyword (Standard Function) */
       
  1402 DIV		return DIV;		/* Keyword (Standard Function) */
       
  1403 EQ		return EQ;		/* Keyword (Standard Function) */
       
  1404 GE		return GE;		/* Keyword (Standard Function) */
       
  1405 GT		return GT;		/* Keyword (Standard Function) */
       
  1406 LE		return LE;		/* Keyword (Standard Function) */
       
  1407 LT		return LT;		/* Keyword (Standard Function) */
       
  1408 MUL		return MUL;		/* Keyword (Standard Function) */
       
  1409 NE		return NE;		/* Keyword (Standard Function) */
       
  1410 SUB		return SUB;		/* Keyword (Standard Function) */
       
  1411 }
       
  1412 
       
  1413  /* The following tokens clash with SFC action qualifiers */
       
  1414  /* They are not keywords! */
       
  1415 <il_state>{
       
  1416 S		return S;
       
  1417 R		return R;
       
  1418 }
       
  1419 
       
  1420  /* The following tokens clash with ST expression operators */
       
  1421 &		return AND2;		/* NOT a Delimiter! */
       
  1422 
       
  1423  /* The following tokens have no clashes */
       
  1424  /* They are not keywords! */
       
  1425 <il_state>{
       
  1426 LD		return LD;
       
  1427 LDN		return LDN;
       
  1428 ST		return ST;
       
  1429 STN		return STN;
       
  1430 S1		return S1;
       
  1431 R1		return R1;
       
  1432 CLK		return CLK;
       
  1433 CU		return CU;
       
  1434 CD		return CD;
       
  1435 PV		return PV;
       
  1436 IN		return IN;
       
  1437 PT		return PT;
       
  1438 ANDN		return ANDN;
       
  1439 &N		return ANDN2;
       
  1440 ORN		return ORN;
       
  1441 XORN		return XORN;
       
  1442 CAL		return CAL;
       
  1443 CALC		return CALC;
       
  1444 CALCN		return CALCN;
       
  1445 RET		return RET;
       
  1446 RETC		return RETC;
       
  1447 RETCN		return RETCN;
       
  1448 JMP		return JMP;
       
  1449 JMPC		return JMPC;
       
  1450 JMPCN		return JMPCN;
       
  1451 }
       
  1452 
       
  1453 	/***********************/
       
  1454 	/* B 3.1 - Expressions */
       
  1455 	/***********************/
       
  1456 "**"		return OPER_EXP;	/* NOT a Delimiter! */
       
  1457 "<>"		return OPER_NE;		/* NOT a Delimiter! */
       
  1458 ">="		return OPER_GE;		/* NOT a Delimiter! */
       
  1459 "<="		return OPER_LE;		/* NOT a Delimiter! */
       
  1460 &		return AND2;		/* NOT a Delimiter! */
       
  1461 AND		return AND;		/* Keyword */
       
  1462 XOR		return XOR;		/* Keyword */
       
  1463 OR		return OR;		/* Keyword */
       
  1464 NOT		return NOT;		/* Keyword */
       
  1465 MOD		return MOD;		/* Keyword */
       
  1466 
       
  1467 
       
  1468 	/*****************************************/
       
  1469 	/* B 3.2.2 Subprogram Control Statements */
       
  1470 	/*****************************************/
       
  1471 :=		return ASSIGN;		/* Delimiter */
       
  1472 =>		return SENDTO;		/* Delimiter */
       
  1473 RETURN		return RETURN;		/* Keyword */
       
  1474 
       
  1475 
       
  1476 	/********************************/
       
  1477 	/* B 3.2.3 Selection Statements */
       
  1478 	/********************************/
       
  1479 IF		return IF;		/* Keyword */
       
  1480 THEN		return THEN;		/* Keyword */
       
  1481 ELSIF		return ELSIF;		/* Keyword */
       
  1482 ELSE		return ELSE;		/* Keyword */
       
  1483 END_IF		return END_IF;		/* Keyword */
       
  1484 
       
  1485 CASE		return CASE;		/* Keyword */
       
  1486 OF		return OF;		/* Keyword */
       
  1487 ELSE		return ELSE;		/* Keyword */
       
  1488 END_CASE	return END_CASE;	/* Keyword */
       
  1489 
       
  1490 
       
  1491 	/********************************/
       
  1492 	/* B 3.2.4 Iteration Statements */
       
  1493 	/********************************/
       
  1494 FOR		return FOR;		/* Keyword */
       
  1495 TO		return TO;		/* Keyword */
       
  1496 BY		return BY;		/* Keyword */
       
  1497 DO		return DO;		/* Keyword */
       
  1498 END_FOR		return END_FOR;		/* Keyword */
       
  1499 
       
  1500 WHILE		return WHILE;		/* Keyword */
       
  1501 DO		return DO;		/* Keyword */
       
  1502 END_WHILE	return END_WHILE;	/* Keyword */
       
  1503 
       
  1504 REPEAT		return REPEAT;		/* Keyword */
       
  1505 UNTIL		return UNTIL;		/* Keyword */
       
  1506 END_REPEAT	return END_REPEAT;	/* Keyword */
       
  1507 
       
  1508 EXIT		return EXIT;		/* Keyword */
       
  1509 
       
  1510 
       
  1511 
       
  1512 
       
  1513 
       
  1514 
       
  1515 	/********************************************************/
       
  1516 	/********************************************************/
       
  1517 	/********************************************************/
       
  1518 	/*****                                              *****/
       
  1519 	/*****                                              *****/
       
  1520 	/*****  N O W    W O R K    W I T H    V A L U E S  *****/
       
  1521 	/*****                                              *****/
       
  1522 	/*****                                              *****/
       
  1523 	/********************************************************/
       
  1524 	/********************************************************/
       
  1525 	/********************************************************/
       
  1526 
       
  1527 
       
  1528 	/********************************************/
       
  1529 	/* B.1.4.1   Directly Represented Variables */
       
  1530 	/********************************************/
       
  1531 {direct_variable}   {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
       
  1532 
       
  1533 
       
  1534 	/******************************************/
       
  1535 	/* B 1.4.3 - Declaration & Initialisation */
       
  1536 	/******************************************/
       
  1537 {incompl_location}	{yylval.ID=strdup(yytext); return incompl_location_token;}
       
  1538 
       
  1539 
       
  1540 	/************************/
       
  1541 	/* B 1.2.3.1 - Duration */
       
  1542 	/************************/
       
  1543 {fixed_point}		{yylval.ID=strdup(yytext); return fixed_point_token;}
       
  1544 
       
  1545 {fixed_point_d}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
       
  1546 {integer_d}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
       
  1547 
       
  1548 {fixed_point_h}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
       
  1549 {integer_h}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
       
  1550 
       
  1551 {fixed_point_m}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
       
  1552 {integer_m}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
       
  1553 
       
  1554 {fixed_point_s}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
       
  1555 {integer_s}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
       
  1556 
       
  1557 {fixed_point_ms}	{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
       
  1558 {integer_ms}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
       
  1559 
       
  1560 
       
  1561 	/*******************************/
       
  1562 	/* B.1.2.2   Character Strings */
       
  1563 	/*******************************/
       
  1564 {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
       
  1565 {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
       
  1566 
       
  1567 
       
  1568 	/******************************/
       
  1569 	/* B.1.2.1   Numeric literals */
       
  1570 	/******************************/
       
  1571 {integer}		{yylval.ID=strdup(yytext); return integer_token;}
       
  1572 {real}			{yylval.ID=strdup(yytext); return real_token;}
       
  1573 {binary_integer}	{yylval.ID=strdup(yytext); return binary_integer_token;}
       
  1574 {octal_integer} 	{yylval.ID=strdup(yytext); return octal_integer_token;}
       
  1575 {hex_integer} 		{yylval.ID=strdup(yytext); return hex_integer_token;}
       
  1576 
       
  1577 
       
  1578 	/*****************************************/
       
  1579 	/* B.1.1 Letters, digits and identifiers */
       
  1580 	/*****************************************/
       
  1581 <st_state>{identifier}/({st_whitespace})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
       
  1582 <il_state>{identifier}/({il_whitespace})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
       
  1583 {identifier} 				{yylval.ID=strdup(yytext);
       
  1584 					 // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
       
  1585 					 return get_identifier_token(yytext);}
       
  1586 
       
  1587 
       
  1588 
       
  1589 
       
  1590 
       
  1591 
       
  1592 	/************************************************/
       
  1593 	/************************************************/
       
  1594 	/************************************************/
       
  1595 	/*****                                      *****/
       
  1596 	/*****                                      *****/
       
  1597 	/*****   T H E    L E F T O V E R S . . .   *****/
       
  1598 	/*****                                      *****/
       
  1599 	/*****                                      *****/
       
  1600 	/************************************************/
       
  1601 	/************************************************/
       
  1602 	/************************************************/
       
  1603 
       
  1604 	/* do the single character tokens...
       
  1605 	 *
       
  1606 	 *  e.g.:  ':'  '('  ')'  '+'  '*'  ...
       
  1607 	 */
       
  1608 .	{return yytext[0];}
       
  1609 
       
  1610 
       
  1611 %%
       
  1612 
       
  1613 
       
  1614 /***********************************/
       
  1615 /* Utility function definitions... */
       
  1616 /***********************************/
       
  1617 
       
  1618 /* print the include file stack to stderr... */
       
  1619 void print_include_stack(void) {
       
  1620   int i;
       
  1621 
       
  1622   if ((include_stack_ptr - 1) >= 0)
       
  1623     fprintf (stderr, "in file "); 
       
  1624   for (i = include_stack_ptr - 1; i >= 0; i--)
       
  1625     fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
       
  1626 }
       
  1627 
       
  1628 
       
  1629 /* return all the text in the current token back to the input stream, except the first n chars. */
       
  1630 void unput_text(unsigned int n) {
       
  1631   /* it seems that flex has a bug in that it will not correctly count the line numbers
       
  1632    * if we return newlines back to the input stream. These newlines will be re-counted
       
  1633    * a second time when they are processed again by flex.
       
  1634    * We therefore determine how many newlines are in the text we are returning,
       
  1635    * and decrement the line counter acordingly...
       
  1636    */
       
  1637   /*unsigned int i;
       
  1638   
       
  1639   for (i = n; i < strlen(yytext); i++)
       
  1640     if (yytext[i] == '\n')
       
  1641       current_tracking->lineNumber--;*/
       
  1642 
       
  1643   /* now return all the text back to the input stream... */
       
  1644   yyless(n);
       
  1645 }
       
  1646 
       
  1647 
       
  1648 /* Called by flex when it reaches the end-of-file */
       
  1649 int yywrap(void)
       
  1650 {
       
  1651   /* We reached the end of the input file... */
       
  1652 
       
  1653   /* Should we continue with another file? */
       
  1654   /* If so:
       
  1655    *   open the new file...
       
  1656    *   return 0;
       
  1657    */
       
  1658 
       
  1659   /* to we stop processing...
       
  1660    *
       
  1661    *   return 1;
       
  1662    */
       
  1663 
       
  1664 
       
  1665   return 1;  /* Stop scanning at end of input file. */
       
  1666 }
       
  1667 
       
  1668 
       
  1669 
       
  1670 /*************************************/
       
  1671 /* Include a main() function to test */
       
  1672 /* the token parsing by flex....     */
       
  1673 /*************************************/
       
  1674 #ifdef TEST_MAIN
       
  1675 
       
  1676 #include "../util/symtable.hh"
       
  1677 
       
  1678 yystype yylval;
       
  1679 YYLTYPE yylloc;
       
  1680 
       
  1681 const char *current_filename;
       
  1682 
       
  1683 
       
  1684 
       
  1685 int get_identifier_token(const char *identifier_str) {return 0;}
       
  1686 int get_direct_variable_token(const char *direct_variable_str) {return 0;}
       
  1687 
       
  1688 
       
  1689 int main(int argc, char **argv) {
       
  1690 
       
  1691   FILE *in_file;
       
  1692   int res;
       
  1693 	
       
  1694   if (argc == 1) {
       
  1695     /* Work as an interactive (command line) parser... */
       
  1696     while((res=yylex()))
       
  1697       fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
       
  1698   } else {
       
  1699     /* Work as non-interactive (file) parser... */
       
  1700     if((in_file = fopen(argv[1], "r")) == NULL) {
       
  1701       char *errmsg = strdup2("Error opening main file ", argv[1]);
       
  1702       perror(errmsg);
       
  1703       free(errmsg);
       
  1704       return -1;
       
  1705     }
       
  1706 
       
  1707     /* parse the file... */
       
  1708     yyin = in_file;
       
  1709     current_filename = argv[1];
       
  1710     while(1) {
       
  1711       res=yylex();
       
  1712       fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
       
  1713     }
       
  1714   }
       
  1715 	
       
  1716 	return 0;
       
  1717 
       
  1718 }
       
  1719 #endif