stage1_2/iec.flex
changeset 0 fb772792efd1
child 1 5d893a68be6e
equal deleted inserted replaced
-1:000000000000 0:fb772792efd1
       
     1 /*
       
     2  * (c) 2003 Mario de Sousa
       
     3  *
       
     4  * Offered to the public under the terms of the GNU General Public License
       
     5  * as published by the Free Software Foundation; either version 2 of the
       
     6  * License, or (at your option) any later version.
       
     7  *
       
     8  * This program is distributed in the hope that it will be useful, but
       
     9  * WITHOUT ANY WARRANTY; without even the implied warranty of
       
    10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
       
    11  * Public License for more details.
       
    12  *
       
    13  * This code is made available on the understanding that it will not be
       
    14  * used in safety-critical situations without a full and competent review.
       
    15  */
       
    16 
       
    17 /*
       
    18  * An IEC 61131-3 IL and ST compiler.
       
    19  *
       
    20  * Based on the
       
    21  * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
       
    22  *
       
    23  */
       
    24 
       
    25 /*
       
    26  * Stage 1
       
    27  * =======
       
    28  *
       
    29  * This file contains the lexical tokens definitions, from which
       
    30  * the flex utility will generate a lexical parser function.
       
    31  */
       
    32 
       
    33 
       
    34 
       
    35 
       
    36 /*****************************/
       
    37 /* Lexical Parser Options... */
       
    38 /*****************************/
       
    39 
       
    40 /* The lexical analyser will never work in interactive mode,
       
    41  * i.e., it will only process programs saved to files, and never
       
    42  * programs being written inter-actively by the user.
       
    43  * This option saves the resulting parser from calling the
       
    44  * isatty() function, that seems to be generating some compile
       
    45  * errors under some (older?) versions of flex.
       
    46  */
       
    47 %option never-interactive
       
    48 
       
    49 /* Have the lexical analyser use a 'char *yytext' instead of an
       
    50  * array of char 'char yytext[??]' to store the lexical token.
       
    51  */
       
    52 %pointer
       
    53 
       
    54 
       
    55 /* Have the lexical analyser ignore the case of letters.
       
    56  * This will occur for all the tokens and keywords, but
       
    57  * the resulting text handed up to the syntax parser
       
    58  * will not be changed, and keep the original case
       
    59  * of the letters in the input file.
       
    60  */
       
    61 %option case-insensitive
       
    62 
       
    63 /* Have the generated lexical analyser keep track of the
       
    64  * line number it is currently analysing.
       
    65  * This is used to pass up to the syntax parser
       
    66  * the number of the line on which the current
       
    67  * token was found. It will enable the syntax parser
       
    68  * to generate more informatve error messages...
       
    69  */
       
    70 %option yylineno
       
    71 
       
    72 /* required for the use of the yy_pop_state() and
       
    73  * yy_push_state() functions
       
    74  */
       
    75 %option stack
       
    76 
       
    77 /* The '%option stack' also requests the inclusion of 
       
    78  * the yy_top_state(), however this function is not
       
    79  * currently being used. This means that the compiler
       
    80  * is complaining about the existance of this function.
       
    81  * The following option removes the yy_top_state()
       
    82  * function from the resulting c code, so the compiler 
       
    83  * no longer complains.
       
    84  */
       
    85 %option noyy_top_state
       
    86 
       
    87 /**************************************************/
       
    88 /* External Variable and Function declarations... */
       
    89 /**************************************************/
       
    90 
       
    91 
       
    92 %{
       
    93 /* Define TEST_MAIN to include a main() function.
       
    94  * Useful for testing the parser generated by flex.
       
    95  */
       
    96 /*
       
    97 #define TEST_MAIN
       
    98 */
       
    99 /* If lexical parser is compiled by itself, we need to define the following
       
   100  * constant to some string. Under normal circumstances LIBDIRECTORY is set
       
   101  * in the syntax parser header file...
       
   102  */
       
   103 #ifdef TEST_MAIN
       
   104 #define LIBDIRECTORY "just_testing"
       
   105 #endif
       
   106 
       
   107 
       
   108 
       
   109 /* Required for strdup() */
       
   110 #include <string.h>
       
   111 
       
   112 /* Required only for the declaration of abstract syntax classes
       
   113  * (class symbol_c; class token_c; class list_c;)
       
   114  * These will not be used in flex, but the token type union defined
       
   115  * in iec.hh contains pointers to these classes, so we must include
       
   116  * it here.
       
   117  */
       
   118 #include "../absyntax/absyntax.hh"
       
   119 
       
   120 /* generated by bison.
       
   121  * Contains the definition of the token constants, and the
       
   122  * token value type YYSTYPE (in our case, a 'const char *')
       
   123  */
       
   124 #include "iec.y.hh"
       
   125 
       
   126 /* Variable defined by the bison parser,
       
   127  * where the value of the tokens will be stored
       
   128  */
       
   129 extern YYSTYPE yylval;
       
   130 
       
   131 /* The name of the file currently being parsed...
       
   132  * This variable is declared and read from the code generated by bison!
       
   133  * Note that flex accesses and updates this global variable
       
   134  * apropriately whenever it comes across an (*#include <filename> *)
       
   135  * directive...
       
   136  */
       
   137 extern const char *current_filename;
       
   138 
       
   139 /* We will not be using unput() in our flex code... */
       
   140 #define YY_NO_UNPUT
       
   141 
       
   142 /* Variable defined by the bison parser.
       
   143  * It must be initialised with the location
       
   144  * of the token being parsed.
       
   145  * This is only needed if we want to keep
       
   146  * track of the locations, in order to give
       
   147  * more meaningful error messages!
       
   148  */
       
   149 extern YYLTYPE yylloc;
       
   150 
       
   151 /* Macro that is executed for every action.
       
   152  * We use it to pass the location of the token
       
   153  * back to the bison parser...
       
   154  */
       
   155 #define YY_USER_ACTION { 					\
       
   156 	yylloc.first_line = yylloc.last_line = yylineno;	\
       
   157 	yylloc.first_column = yylloc.last_column = 0;		\
       
   158 	}
       
   159 
       
   160 
       
   161 /* Since this lexical parser we defined only works in ASCII based
       
   162  * systems, we might as well make sure it is being compiled on
       
   163  * one...
       
   164  * Lets check a few random characters...
       
   165  */
       
   166 #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
       
   167      ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
       
   168 #error This lexical analyser is not portable to a non ASCII based system.
       
   169 #endif
       
   170 
       
   171 
       
   172 /* Function only called from within flex, but defined
       
   173  * in iec.y!
       
   174  * We delcare it here...
       
   175  *
       
   176  * Search for a symbol in either of the two symbol tables
       
   177  * and return the token id of the first symbol found.
       
   178  * Searches first in the variables, and only if not found
       
   179  * does it continue searching in the library elements
       
   180  */
       
   181 //token_id_t get_identifier_token(const char *identifier_str);
       
   182 int get_identifier_token(const char *identifier_str);
       
   183 %}
       
   184 
       
   185 
       
   186 /***************************************************/
       
   187 /* Forward Declaration of functions defined later. */
       
   188 /***************************************************/
       
   189 
       
   190 %{
       
   191 /* return all the text in the current token back to the input stream. */
       
   192 void unput_text(unsigned int n);
       
   193 %}
       
   194 
       
   195 
       
   196 
       
   197 /****************************/
       
   198 /* Lexical Parser States... */
       
   199 /****************************/
       
   200 
       
   201 /* NOTE: Our psrser can parse st or il code, intermixed
       
   202  *       within the same file.
       
   203  *       With IL we come across the issue of the EOL (end of line) token.
       
   204  *       ST, and the declaration parts of IL do not use this token!
       
   205  *       If the lexical analyser were to issue this token during ST
       
   206  *       language parsing, or during the declaration of data types,
       
   207  *       function headers, etc. in IL, the syntax parser would crash.
       
   208  *
       
   209  *       We can solve this issue using one of three methods:
       
   210  *        (1) Augment all the syntax that does not accept the EOL
       
   211  *            token to simply ignore it. This makes the syntax
       
   212  *            definition (in iec.y) very cluttered!
       
   213  *        (2) Let the lexical parser figure out which language
       
   214  *            it is parsing, and decide whether or not to issue
       
   215  *            the EOL token. This requires the lexical parser
       
   216  *            to have knowledge of the syntax!, making for a poor
       
   217  *            overall organisation of the code. It would also make it
       
   218  *            very difficult to understand the lexical parser as it
       
   219  *            would use several states, and a state machine to transition
       
   220  *            between the states. The state transitions would be
       
   221  *            intermingled with the lexical parser defintion!
       
   222  *        (3) Use a mixture of (1) and (2). The lexical analyser
       
   223  *            merely distinguishes between function headers and function
       
   224  *            bodies, but no longer makes a distinction between il and
       
   225  *            st language bodies. When parsing a body, it will return
       
   226  *            the EOL token. In other states '\n' will be ignored as
       
   227  *            whitespace.
       
   228  *            The ST language syntax has been augmented in the syntax
       
   229  *            parser configuration to ignore any EOL tokens that it may
       
   230  *            come across!
       
   231  *            This option has both drawbacks of option (1) and (2), but
       
   232  *            much less intensely.
       
   233  *            The syntax that gets cluttered is limited to the ST statements
       
   234  *            (which is rather limited, compared to the function headers and
       
   235  *            data type declarations, etc...), while the state machine in
       
   236  *            the lexical parser becomes very simple. All state transitions
       
   237  *            can be handled within the lexical parser by itself, and can be
       
   238  *            easily identified. Thus knowledge of the syntax required by
       
   239  *            the lexical parser is very limited!
       
   240  *
       
   241  * Amazingly enough, I (Mario) got to implement option (3)
       
   242  * at first, requiring two basic states, decl and body.
       
   243  * The lexical parser will enter the body state when
       
   244  * it is parsing the body of a function/program/function block. The
       
   245  * state transition is done when we find a VAR_END that is not followed
       
   246  * by a VAR! This is the syntax knowledge that gets included in the
       
   247  * lexical analyser with this option!
       
   248  * Unfortunately, getting the st syntax parser to ignore EOL anywhere
       
   249  * where they might appear leads to conflicts. This is due to the fact
       
   250  * that the syntax parser uses the single look-ahead token to remove
       
   251  * possible conflicts. When we insert a possible EOL, the single
       
   252  * look ahead token becomes the EOL, which means the potential conflicts
       
   253  * could no longer be resolved.
       
   254  * Removing these conflicts would make the st syntax parser very convoluted,
       
   255  * and adding the extraneous EOL would make it very cluttered.
       
   256  * This option was therefore dropped in favour of another!
       
   257  *
       
   258  * I ended up implementing (2). Unfortunately the lexical analyser can
       
   259  * not easily distinguish between il and st code, since function
       
   260  * calls in il are very similar to function block calls in st.
       
   261  * We therefore use an extra 'body' state. When the lexical parser
       
   262  * finds that last END_VAR, it enters the body state. This state
       
   263  * must figure out what language is being parsed from the first few
       
   264  * tokens, and switch to the correct state (st or il) according to the
       
   265  * language. This means that we insert quite a bit of knowledge of the
       
   266  * syntax of the languages into the lexical parser. This is ugly, but it
       
   267  * works, and at least it is possible to keep all the state changes together
       
   268  * to make it easier to remove them later on if need be.
       
   269  * The body state returns any matched text back to the buffer with unput(),
       
   270  * to be later matched correctly by the apropriate language parser (st or il).
       
   271  * The state machine has 6 possible states (INITIAL, config, decl, body, st, il)
       
   272  * Possible state changes are:
       
   273  *   INITIAL -> decl (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
       
   274  *                    and followed by a VAR declaration)
       
   275  *   INITIAL -> body (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
       
   276  *                    and _not_ followed by a VAR declaration)
       
   277  *   INITIAL -> config (when a CONFIGURATION is found)
       
   278  *   decl    -> body (when the last END_VAR is found, i.e. the function body starts)
       
   279  *   body    -> st (when it figures out it is parsing st language)
       
   280  *   body    -> il (when it figures out it is parsing il language)
       
   281  *   decl    -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
       
   282  *   st      -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
       
   283  *   il      -> INITIAL (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
       
   284  *   config  -> INITIAL (when a END_CONFIGURATION is found)
       
   285  */
       
   286 /* we are parsing a configuration. */
       
   287 %s config
       
   288 
       
   289 /* we are parsing a function, program or function block declaration */
       
   290 %s decl
       
   291 
       
   292 /* we will be parsing a function body. Whether il/st is remains unknown */
       
   293 %x body
       
   294 
       
   295 /* we are parsing il code -> flex must return the EOL tokens!       */
       
   296 %s il
       
   297 
       
   298 /* we are parsing st code -> flex must not return the EOL tokens!   */
       
   299 %s st
       
   300 
       
   301 
       
   302 
       
   303 
       
   304 /*******************/
       
   305 /* File #include's */
       
   306 /*******************/
       
   307 
       
   308 /* We extend the IEC 61131-3 standard syntax to allow inclusion
       
   309  * of other files, using the IEC 61131-3 pragma directive...
       
   310  * The accepted syntax is:
       
   311  *  {#include "<filename>"}
       
   312  */
       
   313 
       
   314 /* the "include" states are used for picking up the name of an include file */
       
   315 %x include_beg
       
   316 %x include_filename
       
   317 %x include_end
       
   318 
       
   319 
       
   320 file_include_pragma_filename	[^\"]*
       
   321 file_include_pragma_beg		"{#include"{st_whitespace_only}\"
       
   322 file_include_pragma_end		\"{st_whitespace_only}"}"
       
   323 file_include_pragma			{file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
       
   324 
       
   325 
       
   326 %{
       
   327 #define MAX_INCLUDE_DEPTH 16
       
   328 
       
   329 typedef struct {
       
   330 	  YY_BUFFER_STATE buffer_state;
       
   331 	  int lineno;
       
   332 	  const char *filename;
       
   333 	} include_stack_t;
       
   334 
       
   335 include_stack_t include_stack[MAX_INCLUDE_DEPTH];
       
   336 int include_stack_ptr = 0;
       
   337 
       
   338 const char *INCLUDE_DIRECTORIES[] = {
       
   339 	"",
       
   340 	"lib/",
       
   341 	"/lib/",
       
   342 	"/usr/lib/",
       
   343 	"/usr/lib/iec/",
       
   344 	LIBDIRECTORY "/",
       
   345 	NULL /* must end with NULL!! */
       
   346 	};
       
   347 
       
   348 
       
   349 /*
       
   350  * Join two strings together. Allocate space with malloc(3).
       
   351  */
       
   352 static char *strdup2(const char *a, const char *b) {
       
   353   char *res = (char *)malloc(strlen(a) + strlen(b) + 1);
       
   354 
       
   355   if (!res)
       
   356     return NULL;
       
   357   return strcat(strcpy(res, a), b);  /* safe, actually */
       
   358 }
       
   359 %}
       
   360 
       
   361 
       
   362 
       
   363 /*****************************/
       
   364 /* Prelimenary constructs... */
       
   365 /*****************************/
       
   366 
       
   367 
       
   368 /* A pragma... */
       
   369 
       
   370 pragma "{"[^}]*"}"
       
   371 
       
   372 /* NOTE: this seemingly unnecessary complex definition is required
       
   373  *       to be able to eat up comments such as:
       
   374  *          '(* Testing... ! ***** ******)'
       
   375  *       without using the trailing context command in flex (/{context})
       
   376  *       since {comment} itself will later be used with
       
   377  *       trailing context ({comment}/{context})
       
   378  */
       
   379 not_asterisk				[^*]
       
   380 not_close_parenthesis_nor_asterisk	[^*)]
       
   381 asterisk				"*"
       
   382 comment_text		{not_asterisk}|(({asterisk}+){not_close_parenthesis_nor_asterisk})
       
   383 
       
   384 comment		"(*"({comment_text}*)({asterisk}+)")"
       
   385 
       
   386 
       
   387 /*
       
   388 3.1 Whitespace
       
   389  (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
       
   390  No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
       
   391  The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
       
   392  On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple3.1 Whitespace
       
   393  (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
       
   394  No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
       
   395  The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
       
   396  On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple lines, which means that the newline character is being considered as whitespace.
       
   397  Our implementation works around this issue by including the new line character in the whitespace while parsing function declarations and the ST language, and parsing it as the EOL token only while parsing IL language statements. This requires the use of a state machine in the lexical parser that needs at least some knowledge of the syntax itself.
       
   398 */
       
   399 /* NOTE: Our definition of whitespace will only work in ASCII!
       
   400  *
       
   401  *       Since the IL language needs to know the location of newline
       
   402  *       (token EOL -> '\n' ), we need one definition of whitespace
       
   403  *       for each language...
       
   404  */
       
   405 /*
       
   406  * NOTE: we cannot use
       
   407  *         st_whitespace	[:space:]*
       
   408  *       since we use {st_whitespace} as trailing context. In our case
       
   409  *       this would not constitute "dangerous trailing context", but the
       
   410  *       lexical generator (i.e. flex) does not know this (since it does
       
   411  *       not know which characters belong to the set [:space:]), and will
       
   412  *       generate a "dangerous trailing context" warning!
       
   413  *       We use this alternative just to stop the flex utility from
       
   414  *       generating the invalid (in this case) warning...
       
   415  */
       
   416 
       
   417 st_whitespace_only	[ \f\n\r\t\v]*
       
   418 il_whitespace_only	[ \f\r\t\v]*
       
   419 
       
   420 st_whitespace_text	{st_whitespace_only}|{comment}|{pragma}
       
   421 il_whitespace_text	{il_whitespace_only}|{comment}|{pragma}
       
   422 
       
   423 st_whitespace	{st_whitespace_text}*
       
   424 il_whitespace	{il_whitespace_text}*
       
   425 
       
   426 st_whitespace_text_no_pragma	{st_whitespace_only}|{comment}
       
   427 il_whitespace_text_no_pragma	{il_whitespace_only}|{comment}
       
   428 
       
   429 st_whitespace_no_pragma	{st_whitespace_text_no_pragma}*
       
   430 il_whitespace_no_pragma	{il_whitespace_text_no_pragma}*
       
   431 
       
   432 qualified_identifier	{identifier}(\.{identifier})?
       
   433 
       
   434 
       
   435 
       
   436 /*****************************************/
       
   437 /* B.1.1 Letters, digits and identifiers */
       
   438 /*****************************************/
       
   439 /* NOTE: The following definitions only work if the host computer
       
   440  *       is using the ASCII maping. For e.g., with EBCDIC [A-Z]
       
   441  *       contains non-alphabetic characters!
       
   442  *       The correct way of doing it would be to use
       
   443  *       the [:upper:] etc... definitions.
       
   444  *
       
   445  *       Unfortunately, further on we need all printable
       
   446  *       characters (i.e. [:print:]), but excluding '$'.
       
   447  *       Flex does not allow sets to be composed by excluding
       
   448  *       elements. Sets may only be constructed by adding new
       
   449  *       elements, which means that we have to revert to
       
   450  *       [\x20\x21\x23\x25\x26\x28-x7E] for the definition
       
   451  *       of the printable characters with the required exceptions.
       
   452  *       The above also implies the use of ASCII, but now we have
       
   453  *       no way to work around it|
       
   454  *
       
   455  *       The conclusion is that our parser is limited to ASCII
       
   456  *       based host computers!!
       
   457  */
       
   458 letter		[A-Za-z]
       
   459 digit		[0-9]
       
   460 octal_digit	[0-7]
       
   461 hex_digit	{digit}|[A-F]
       
   462 identifier	({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
       
   463 
       
   464 
       
   465 /*******************/
       
   466 /* B.1.2 Constants */
       
   467 /*******************/
       
   468 
       
   469 /******************************/
       
   470 /* B.1.2.1   Numeric literals */
       
   471 /******************************/
       
   472 integer         {digit}((_?{digit})*)
       
   473 binary_integer  2#{bit}((_?{bit})*)
       
   474 bit		[0-1]
       
   475 octal_integer   8#{octal_digit}((_?{octal_digit})*)
       
   476 hex_integer     16#{hex_digit}((_?{hex_digit})*)
       
   477 exponent        [Ee]([+-]?){integer}
       
   478 /* The correct definition for real would be:
       
   479  * real		{integer}\.{integer}({exponent}?)
       
   480  *
       
   481  * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
       
   482  * fixed_point		{integer}\.{integer}
       
   483  *
       
   484  * This means that {integer}\.{integer} could be interpreted
       
   485  * as either a fixed_point or a real.
       
   486  * I have opted to interpret {integer}\.{integer} as a fixed_point.
       
   487  * In order to do this, the definition of real has been changed to:
       
   488  * real		{integer}\.{integer}{exponent}
       
   489  *
       
   490  * This means that the syntax parser now needs to define a real to be
       
   491  * either a real_token or a fixed_point_token!
       
   492  */
       
   493 real		{integer}\.{integer}{exponent}
       
   494 
       
   495 
       
   496 /*******************************/
       
   497 /* B.1.2.2   Character Strings */
       
   498 /*******************************/
       
   499 /*
       
   500 common_character_representation :=
       
   501 <any printable character except '$', '"' or "'">
       
   502 |'$$'
       
   503 |'$L'|'$N'|'$P'|'$R'|'$T'
       
   504 |'$l'|'$n'|'$p'|'$r'|'$t'
       
   505 
       
   506 NOTE: 	$ = 0x24
       
   507 	" = 0x22
       
   508 	' = 0x27
       
   509 
       
   510 	printable chars in ASCII: 0x20-0x7E
       
   511 */
       
   512 
       
   513 esc_char_u		$L|$N|$P|$R|$T
       
   514 esc_char_l		$l|$n|$p|$r|$t
       
   515 esc_char		$$|{esc_char_u}|{esc_char_l}
       
   516 double_byte_char	(${hex_digit}{hex_digit}{hex_digit}{hex_digit})
       
   517 single_byte_char	(${hex_digit}{hex_digit})
       
   518 
       
   519 /* WARNING:
       
   520  * This definition is only valid in ASCII...
       
   521  *
       
   522  * Flex includes the function print_char() that defines
       
   523  * all printable characters portably (i.e. whatever character
       
   524  * encoding is currently being used , ASCII, EBCDIC, etc...)
       
   525  * Unfortunately, we cannot generate the definition of
       
   526  * common_character_representation portably, since flex
       
   527  * does not allow definition of sets by subtracting
       
   528  * elements in one set from another set.
       
   529  * This means we must build up the defintion of
       
   530  * common_character_representation using only set addition,
       
   531  * which leaves us with the only choice of defining the
       
   532  * characters non-portably...
       
   533  */
       
   534 common_character_representation		[\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
       
   535 double_byte_character_representation 	$\"|'|{double_byte_char}|{common_character_representation}
       
   536 single_byte_character_representation 	$'|\"|{single_byte_char}|{common_character_representation}
       
   537 
       
   538 
       
   539 double_byte_character_string	\"({double_byte_character_representation}*)\"
       
   540 single_byte_character_string	'({single_byte_character_representation}*)'
       
   541 
       
   542 
       
   543 /************************/
       
   544 /* B 1.2.3.1 - Duration */
       
   545 /************************/
       
   546 fixed_point		{integer}\.{integer}
       
   547 
       
   548 fixed_point_d		{fixed_point}d
       
   549 integer_d		{integer}d
       
   550 
       
   551 fixed_point_h		{fixed_point}h
       
   552 integer_h		{integer}h
       
   553 
       
   554 fixed_point_m		{fixed_point}m
       
   555 integer_m		{integer}m
       
   556 
       
   557 fixed_point_s		{fixed_point}s
       
   558 integer_s		{integer}s
       
   559 
       
   560 fixed_point_ms		{fixed_point}ms
       
   561 integer_ms		{integer}ms
       
   562 
       
   563 
       
   564 /********************************************/
       
   565 /* B.1.4.1   Directly Represented Variables */
       
   566 /********************************************/
       
   567 /* The correct definition, if the standard were to be followed... */
       
   568 /*
       
   569 location_prefix		[IQM]
       
   570 size_prefix		[XBWDL]
       
   571 direct_variable		%{location_prefix}({size_prefix}?){integer}((.{integer})*)
       
   572 */
       
   573 
       
   574 /* For the MatPLC, we will accept %<identifier>
       
   575  * as a direct variable, this being mapped onto the MatPLC point
       
   576  * named <identifier>
       
   577  */
       
   578 /* TODO: we should not restrict it to only the accepted syntax
       
   579  * of <identifier> as specified by the standard. MatPLC point names
       
   580  * have a more permissive syntax.
       
   581  *
       
   582  * e.g. "P__234"
       
   583  *    Is a valid MatPLC point name, but not a valid <identifier> !!
       
   584  *    The same happens with names such as "333", "349+23", etc...
       
   585  *    How can we handle these more expressive names in our case?
       
   586  *    Remember that some direct variable may remain anonymous, with
       
   587  *    declarations such as:
       
   588  *    VAR
       
   589  *       AT %I3 : BYTE := 255;
       
   590  *    END_VAR
       
   591  *    in which case we are currently using "I3" as the variable
       
   592  *    name. For the other names, this would create havoc!!!
       
   593  */
       
   594 direct_variable		%{identifier}
       
   595 
       
   596 /******************************************/
       
   597 /* B 1.4.3 - Declaration & Initialisation */
       
   598 /******************************************/
       
   599 incompl_location	%[IQM]\*
       
   600 
       
   601 
       
   602 
       
   603 
       
   604 %%
       
   605 	/* fprintf(stderr, "flex: state %d\n", YY_START); */
       
   606 
       
   607 	/*****************************************************/
       
   608 	/*****************************************************/
       
   609 	/*****************************************************/
       
   610 	/*****                                           *****/
       
   611 	/*****                                           *****/
       
   612 	/*****   F I R S T    T H I N G S    F I R S T   *****/
       
   613 	/*****                                           *****/
       
   614 	/*****                                           *****/
       
   615 	/*****************************************************/
       
   616 	/*****************************************************/
       
   617 	/*****************************************************/
       
   618 
       
   619 	/*********************************/
       
   620 	/* Handle the pragmas!     */
       
   621 	/*********************************/
       
   622 
       
   623 	/* We start off by searching for the pragmas we handle in the lexical parser. */
       
   624 <INITIAL>{file_include_pragma}	unput_text(0); yy_push_state(include_beg);
       
   625 
       
   626 	/* Any other pragma we find, we just pass it up to the syntax parser...   */
       
   627 	/* Note that the <body> state is exclusive, so we have to include it here too. */
       
   628 {pragma}	{/* return the pragmma without the enclosing '{' and '}' */
       
   629 		 yytext[strlen(yytext)-2] = '\0';
       
   630 		 yylval.ID=strdup(yytext+1);
       
   631 		 return pragma_token;
       
   632 		}
       
   633 <body>{pragma} {/* return the pragmma without the enclosing '{' and '}' */
       
   634 		 yytext[strlen(yytext)-2] = '\0';
       
   635 		 yylval.ID=strdup(yytext+1);
       
   636 		 return pragma_token;
       
   637 		}
       
   638 
       
   639 
       
   640 	/*********************************/
       
   641 	/* Handle the file includes!     */
       
   642 	/*********************************/
       
   643 <include_beg>{file_include_pragma_beg}	BEGIN(include_filename);
       
   644 
       
   645 <include_filename>{file_include_pragma_filename}	{
       
   646 			  /* got the include file name */
       
   647 			  int i;
       
   648 
       
   649 			  if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
       
   650 			    fprintf(stderr, "Includes nested too deeply\n");
       
   651 			    exit( 1 );
       
   652 			  }
       
   653 
       
   654 			  (include_stack[include_stack_ptr]).buffer_state = YY_CURRENT_BUFFER;
       
   655 			  (include_stack[include_stack_ptr]).lineno = yylineno;
       
   656 			  (include_stack[include_stack_ptr]).filename = current_filename;
       
   657 			  include_stack_ptr++;
       
   658 			  yylineno = 1;
       
   659 			  current_filename = strdup(yytext);
       
   660 
       
   661 			  for (i = 0, yyin = NULL; (INCLUDE_DIRECTORIES[i] != NULL) && (yyin == NULL); i++) {
       
   662 			    char *full_name = strdup2(INCLUDE_DIRECTORIES[i], yytext);
       
   663 			    if (full_name == NULL) {
       
   664 			      fprintf(stderr, "Out of memory!\n");
       
   665 			      exit( 1 );
       
   666 			    }
       
   667 			    yyin = fopen(full_name, "r");
       
   668 			    free(full_name);
       
   669 			  }
       
   670 
       
   671 			  if (!yyin) {
       
   672 			    fprintf(stderr, "Error opening included file %s\n", yytext);
       
   673 			    exit( 1 );
       
   674 			  }
       
   675 
       
   676 			  /* switch input buffer to new file... */
       
   677 			  yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
       
   678 			  /* switch to whatever state was active before the include file */
       
   679 			  yy_pop_state();
       
   680 			  /* now process the new file... */
       
   681 			}
       
   682 
       
   683 
       
   684 <<EOF>>			{
       
   685 			  if (--include_stack_ptr < 0) {
       
   686 			    yyterminate();
       
   687 			  } else {
       
   688 			    yy_delete_buffer(YY_CURRENT_BUFFER);
       
   689 			    yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
       
   690 			    yylineno = include_stack[include_stack_ptr].lineno;
       
   691 			      /* removing constness of char *. This is safe actually,
       
   692 			       * since the only real const char * that is stored on the stack is
       
   693 			       * the first one (i.e. the opne that gets stored in include_stack[0],
       
   694 			       * which is never free'd!
       
   695 			       */
       
   696 			    free((char *)current_filename);
       
   697 			    current_filename = include_stack[include_stack_ptr].filename;
       
   698 			    yy_push_state(include_end);
       
   699 			  }
       
   700 			}
       
   701 
       
   702 <include_end>{file_include_pragma_end}	yy_pop_state();
       
   703 
       
   704 
       
   705 	/*********************************/
       
   706 	/* Handle all the state changes! */
       
   707 	/*********************************/
       
   708 
       
   709 	/* INITIAL -> decl */
       
   710 <INITIAL>{
       
   711 	/* NOTE: how about functions that do not delcare variables, and go directly to the body???
       
   712 	 *      - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
       
   713 	 *        must have at least one input argument, so a correct declaration will have at least
       
   714 	 *        one VAR_INPUT ... VAR_END construct!
       
   715 	 *      - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
       
   716 	 *        must have at least one input argument, so a correct declaration will have at least
       
   717 	 *        one VAR_INPUT ... VAR_END construct!
       
   718 	 *      - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
       
   719 	 *        argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
       
   720 	 *        construct!
       
   721 	 *
       
   722 	 *       All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
       
   723 	 *       FUNCTION_BLOCKs that do not have at least one VAR_END before the body.
       
   724 	 *       If the code has an error, and no VAR_END before the body, we will simply
       
   725 	 *       continue in the <decl> state, untill the end of the FUNCTION, FUNCTION_BLOCK
       
   726 	 *       or PROGAM.
       
   727 	 */
       
   728 FUNCTION				BEGIN(decl); return FUNCTION;
       
   729 FUNCTION_BLOCK				BEGIN(decl); return FUNCTION_BLOCK;
       
   730 PROGRAM					BEGIN(decl); return PROGRAM;
       
   731 CONFIGURATION				BEGIN(config); return CONFIGURATION;
       
   732 }
       
   733 
       
   734 	/* INITIAL -> body */
       
   735 	/* required if the function, program, etc.. has no VAR block! */
       
   736 <INITIAL>{
       
   737 FUNCTION	BEGIN(body); return FUNCTION;
       
   738 FUNCTION_BLOCK	BEGIN(body); return FUNCTION_BLOCK;
       
   739 PROGRAM		BEGIN(body); return PROGRAM;
       
   740 }
       
   741 
       
   742 	/* decl -> body */
       
   743 <decl>{
       
   744 END_VAR{st_whitespace}VAR			unput_text(strlen("END_VAR")); return END_VAR;
       
   745 END_VAR{st_whitespace}				unput_text(strlen("END_VAR")); BEGIN(body); return END_VAR;
       
   746 }
       
   747 
       
   748 	/* body -> (il | st) */
       
   749 <body>{
       
   750 {qualified_identifier}{st_whitespace}":="	unput_text(0); BEGIN(st);
       
   751 {qualified_identifier}"["			unput_text(0); BEGIN(st);
       
   752 
       
   753 RETURN						unput_text(0); BEGIN(st);
       
   754 IF						unput_text(0); BEGIN(st);
       
   755 CASE						unput_text(0); BEGIN(st);
       
   756 FOR						unput_text(0); BEGIN(st);
       
   757 WHILE						unput_text(0); BEGIN(st);
       
   758 REPEAT						unput_text(0); BEGIN(st);
       
   759 EXIT						unput_text(0); BEGIN(st);
       
   760 
       
   761 
       
   762 {identifier}	{int token = get_identifier_token(yytext);
       
   763 		 if (token == prev_declared_fb_name_token) {
       
   764 		   /* the code has a call to a function block */
       
   765 		   BEGIN(st);
       
   766 		 } else {
       
   767 		   BEGIN(il);
       
   768 		 }
       
   769 		 unput_text(0);
       
   770 		}
       
   771 .		unput_text(0); BEGIN(il);
       
   772 
       
   773 }	/* end of body lexical parser */
       
   774 
       
   775 	/* (decl | body | il | st) -> INITIAL */
       
   776 END_FUNCTION		BEGIN(INITIAL); return END_FUNCTION;
       
   777 END_FUNCTION_BLOCK	BEGIN(INITIAL); return END_FUNCTION_BLOCK;
       
   778 END_PROGRAM		BEGIN(INITIAL); return END_PROGRAM;
       
   779 
       
   780 	/* config -> INITIAL */
       
   781 END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;
       
   782 
       
   783 
       
   784 
       
   785 	/***************************************/
       
   786 	/* Next is to to remove all whitespace */
       
   787 	/***************************************/
       
   788 	/* NOTE: pragmas are handled right at the beginning... */
       
   789 
       
   790 <INITIAL,config,decl,st,body>{st_whitespace_no_pragma}	/* Eat any whitespace */
       
   791 <il,body>{il_whitespace_no_pragma}		/* Eat any whitespace */
       
   792 
       
   793 
       
   794 	/*****************************************/
       
   795 	/* B.1.1 Letters, digits and identifiers */
       
   796 	/*****************************************/
       
   797 	/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
       
   798 	 *       On the other hand, the spec does not define them as keywords,
       
   799 	 *       which means they may be re-used for variable names, etc...!
       
   800 	 *       The syntax parser already caters for the possibility of these
       
   801 	 *       tokens being used for variable names in their declarations.
       
   802 	 *       When they are declared, they will be added to the variable symbol table!
       
   803 	 *       Further appearances of these tokens must no longer be parsed
       
   804 	 *       as R1_tokens etc..., but rather as variable_name_tokens!
       
   805 	 *
       
   806 	 *       That is why the first thing we do with identifiers, even before
       
   807 	 *       checking whether they may be a 'keyword', is to check whether
       
   808 	 *       they have been previously declared as a variable name,
       
   809 	 *
       
   810 	 *      TODO: how about function names?
       
   811 	 */
       
   812 {identifier} 	{int token = get_identifier_token(yytext);
       
   813 		 if ((token == prev_declared_variable_name_token) ||
       
   814 		     (token == prev_declared_fb_name_token)) {
       
   815 		 /*
       
   816 		 if (token != identifier_token)
       
   817 		 */
       
   818 		 /* NOTE: if we use the above line, then 'MOD' et al must be removed
       
   819 		  * from the library_symbol_table as a default function name!
       
   820 		  */
       
   821 		   yylval.ID=strdup(yytext);
       
   822 		   return token;
       
   823 		 }
       
   824 		 /* otherwise, leave it for the other lexical parser rules... */
       
   825 		 REJECT;
       
   826 		}
       
   827 
       
   828 
       
   829 	/******************************************************/
       
   830 	/******************************************************/
       
   831 	/******************************************************/
       
   832 	/*****                                            *****/
       
   833 	/*****                                            *****/
       
   834 	/*****   N O W    D O   T H E   K E Y W O R D S   *****/
       
   835 	/*****                                            *****/
       
   836 	/*****                                            *****/
       
   837 	/******************************************************/
       
   838 	/******************************************************/
       
   839 	/******************************************************/
       
   840 
       
   841 
       
   842 EN	return EN;
       
   843 ENO	return ENO;
       
   844 
       
   845 
       
   846 	/******************************/
       
   847 	/* B 1.2.1 - Numeric Literals */
       
   848 	/******************************/
       
   849 TRUE		return TRUE;
       
   850 BOOL#1  	return TRUE;
       
   851 FALSE		return FALSE;
       
   852 BOOL#0  	return FALSE;
       
   853 
       
   854 
       
   855 	/************************/
       
   856 	/* B 1.2.3.1 - Duration */
       
   857 	/************************/
       
   858 t#		return T_SHARP;
       
   859 T#		return T_SHARP;
       
   860 TIME		return TIME;
       
   861 
       
   862 
       
   863 	/************************************/
       
   864 	/* B 1.2.3.2 - Time of day and Date */
       
   865 	/************************************/
       
   866 TIME_OF_DAY	return TIME_OF_DAY;
       
   867 TOD		return TIME_OF_DAY;
       
   868 DATE		return DATE;
       
   869 d#		return D_SHARP;
       
   870 D#		return D_SHARP;
       
   871 DATE_AND_TIME	return DATE_AND_TIME;
       
   872 DT		return DATE_AND_TIME;
       
   873 
       
   874 
       
   875 	/***********************************/
       
   876 	/* B 1.3.1 - Elementary Data Types */
       
   877 	/***********************************/
       
   878 BYTE		return BYTE;
       
   879 WORD		return WORD;
       
   880 DWORD		return DWORD;
       
   881 LWORD		return LWORD;
       
   882 
       
   883 
       
   884 	/********************************/
       
   885 	/* B 1.3.2 - Generic data types */
       
   886 	/********************************/
       
   887 	/* Strangely, the following symbols do not seem to be required! */
       
   888 	/* But we include them so they become reserved words, and do not
       
   889 	 * get passed up to bison as an identifier...
       
   890 	 */
       
   891 ANY		return ANY;
       
   892 ANY_DERIVED	return ANY_DERIVED;
       
   893 ANY_ELEMENTARY	return ANY_ELEMENTARY;
       
   894 ANY_MAGNITUDE	return ANY_MAGNITUDE;
       
   895 ANY_NUM		return ANY_NUM;
       
   896 ANY_REAL	return ANY_REAL;
       
   897 ANY_INT		return ANY_INT;
       
   898 ANY_BIT		return ANY_BIT;
       
   899 ANY_STRING	return ANY_STRING;
       
   900 ANY_DATE	return ANY_DATE;
       
   901 
       
   902 
       
   903 	/********************************/
       
   904 	/* B 1.3.3 - Derived data types */
       
   905 	/********************************/
       
   906 ":="		return ASSIGN;
       
   907 ".."		return DOTDOT;
       
   908 TYPE		return TYPE;
       
   909 END_TYPE	return END_TYPE;
       
   910 ARRAY		return ARRAY;
       
   911 OF		return OF;
       
   912 STRUCT		return STRUCT;
       
   913 END_STRUCT	return END_STRUCT;
       
   914 
       
   915 
       
   916 	/*********************/
       
   917 	/* B 1.4 - Variables */
       
   918 	/*********************/
       
   919 REAL		return REAL;
       
   920 LREAL		return LREAL;
       
   921 
       
   922 SINT		return SINT;
       
   923 INT		return INT;
       
   924 DINT		return DINT;
       
   925 LINT		return LINT;
       
   926 
       
   927 USINT		return USINT;
       
   928 UINT		return UINT;
       
   929 UDINT		return UDINT;
       
   930 ULINT		return ULINT;
       
   931 
       
   932 
       
   933 WSTRING		return WSTRING;
       
   934 STRING		return STRING;
       
   935 BOOL		return BOOL;
       
   936 
       
   937 TIME		return TIME;
       
   938 DATE		return DATE;
       
   939 DT		return DT;
       
   940 TOD		return TOD;
       
   941 DATE_AND_TIME	return DATE_AND_TIME;
       
   942 TIME_OF_DAY	return TIME_OF_DAY;
       
   943 
       
   944 
       
   945 	/******************************************/
       
   946 	/* B 1.4.3 - Declaration & Initialisation */
       
   947 	/******************************************/
       
   948 VAR_INPUT	return VAR_INPUT;
       
   949 VAR_OUTPUT	return VAR_OUTPUT;
       
   950 VAR_IN_OUT	return VAR_IN_OUT;
       
   951 VAR_EXTERNAL	return VAR_EXTERNAL;
       
   952 VAR_GLOBAL	return VAR_GLOBAL;
       
   953 END_VAR		return END_VAR;
       
   954 RETAIN		return RETAIN;
       
   955 NON_RETAIN	return NON_RETAIN;
       
   956 R_EDGE		return R_EDGE;
       
   957 F_EDGE		return F_EDGE;
       
   958 AT		return AT;
       
   959 
       
   960 
       
   961 	/***********************/
       
   962 	/* B 1.5.1 - Functions */
       
   963 	/***********************/
       
   964 FUNCTION	return FUNCTION;
       
   965 END_FUNCTION	return END_FUNCTION;
       
   966 VAR		return VAR;
       
   967 CONSTANT	return CONSTANT;
       
   968 
       
   969 
       
   970 	/*****************************/
       
   971 	/* B 1.5.2 - Function Blocks */
       
   972 	/*****************************/
       
   973 FUNCTION_BLOCK		return FUNCTION_BLOCK;
       
   974 END_FUNCTION_BLOCK	return END_FUNCTION_BLOCK;
       
   975 VAR_TEMP		return VAR_TEMP;
       
   976 VAR			return VAR;
       
   977 NON_RETAIN		return NON_RETAIN;
       
   978 END_VAR			return END_VAR;
       
   979 
       
   980 
       
   981 	/**********************/
       
   982 	/* B 1.5.3 - Programs */
       
   983 	/**********************/
       
   984 PROGRAM		return PROGRAM;
       
   985 END_PROGRAM	return END_PROGRAM;
       
   986 
       
   987 
       
   988 	/********************************************/
       
   989 	/* B 1.6 Sequential Function Chart elements */
       
   990 	/********************************************/
       
   991 	/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
       
   992 	.* as other identifiers that may be used as variable names inside IL and ST programs.
       
   993 	 * They will have to be handled when we include parsing of SFC... For now, simply
       
   994 	 * ignore them!
       
   995 	 */
       
   996 	 /*
       
   997 ACTION		return ACTION;
       
   998 END_ACTION	return END_ACTION;
       
   999 
       
  1000 TRANSITION	return TRANSITION;
       
  1001 END_TRANSITION	return END_TRANSITION;
       
  1002 FROM		return FROM;
       
  1003 TO		return TO;
       
  1004 PRIORITY	return PRIORITY;
       
  1005 
       
  1006 INITIAL_STEP	return INITIAL_STEP;
       
  1007 STEP		return STEP;
       
  1008 END_STEP	return END_STEP;
       
  1009 
       
  1010 L		return L;
       
  1011 D		return D;
       
  1012 SD		return SD;
       
  1013 DS		return DS;
       
  1014 SL		return SL;
       
  1015 
       
  1016 N		return N;
       
  1017 P		return P;
       
  1018 
       
  1019 R		return R;
       
  1020 S		return S;
       
  1021 	*/
       
  1022 
       
  1023 
       
  1024 	/********************************/
       
  1025 	/* B 1.7 Configuration elements */
       
  1026 	/********************************/
       
  1027 CONFIGURATION		return CONFIGURATION;
       
  1028 END_CONFIGURATION	return END_CONFIGURATION;
       
  1029 TASK			return TASK;
       
  1030 RESOURCE		return RESOURCE;
       
  1031 ON			return ON;
       
  1032 END_RESOURCE		return END_RESOURCE;
       
  1033 VAR_CONFIG		return VAR_CONFIG;
       
  1034 VAR_ACCESS		return VAR_ACCESS;
       
  1035 END_VAR			return END_VAR;
       
  1036 WITH			return WITH;
       
  1037 PROGRAM			return PROGRAM;
       
  1038 RETAIN			return RETAIN;
       
  1039 NON_RETAIN		return NON_RETAIN;
       
  1040 PRIORITY		return PRIORITY;
       
  1041 SINGLE			return SINGLE;
       
  1042 INTERVAL		return INTERVAL;
       
  1043 READ_WRITE		return READ_WRITE;
       
  1044 READ_ONLY		return READ_ONLY;
       
  1045 
       
  1046 
       
  1047 	/***********************************/
       
  1048 	/* B 2.1 Instructions and Operands */
       
  1049 	/***********************************/
       
  1050 <il>\n		return EOL;
       
  1051 
       
  1052 
       
  1053 	/*******************/
       
  1054 	/* B 2.2 Operators */
       
  1055 	/*******************/
       
  1056 	/* NOTE: we can't have flex return the same token for
       
  1057 	 *       ANDN and &N, neither for AND and &, since
       
  1058 	 *       AND and ANDN are considered valid variable
       
  1059 	 *       function or functionblock type names!
       
  1060 	 *       This means that the parser may decide that the
       
  1061 	 *       AND or ANDN strings found in the source code
       
  1062 	 *       are being used as variable names
       
  1063 	 *       and not as operators, and will therefore transform
       
  1064 	 *       these tokens into indentifier tokens!
       
  1065 	 *       We can't have the parser thinking that the source
       
  1066 	 *       code contained the string AND (which may be interpreted
       
  1067 	 *       as a vairable name) when in reality the source code
       
  1068 	 *       merely contained the character &, so we use two
       
  1069 	 *       different tokens for & and AND (and similarly
       
  1070 	 *       ANDN and &N)!
       
  1071 	 */
       
  1072 LD		return LD;
       
  1073 LDN		return LDN;
       
  1074 ST		return ST;
       
  1075 STN		return STN;
       
  1076 NOT		return NOT;
       
  1077 S		return S;
       
  1078 R		return R;
       
  1079 S1		return S1;
       
  1080 R1		return R1;
       
  1081 CLK		return CLK;
       
  1082 CU		return CU;
       
  1083 CD		return CD;
       
  1084 PV		return PV;
       
  1085 IN		return IN;
       
  1086 PT		return PT;
       
  1087 AND		return AND;
       
  1088 &		return AND2;
       
  1089 OR		return OR;
       
  1090 XOR		return XOR;
       
  1091 ANDN		return ANDN;
       
  1092 &N		return ANDN2;
       
  1093 ORN		return ORN;
       
  1094 XORN		return XORN;
       
  1095 ADD		return ADD;
       
  1096 SUB		return SUB;
       
  1097 MUL		return MUL;
       
  1098 DIV		return DIV;
       
  1099 MOD		return MOD;
       
  1100 GT		return GT;
       
  1101 GE		return GE;
       
  1102 EQ		return EQ;
       
  1103 LT		return LT;
       
  1104 LE		return LE;
       
  1105 NE		return NE;
       
  1106 CAL		return CAL;
       
  1107 CALC		return CALC;
       
  1108 CALCN		return CALCN;
       
  1109 RET		return RET;
       
  1110 RETC		return RETC;
       
  1111 RETCN		return RETCN;
       
  1112 JMP		return JMP;
       
  1113 JMPC		return JMPC;
       
  1114 JMPCN		return JMPCN;
       
  1115 
       
  1116 
       
  1117 	/***********************/
       
  1118 	/* B 3.1 - Expressions */
       
  1119 	/***********************/
       
  1120 "**"		return OPER_EXP;
       
  1121 "<>"		return OPER_NE;
       
  1122 ">="		return OPER_GE;
       
  1123 "<="		return OPER_LE;
       
  1124 AND		return AND;
       
  1125 XOR		return XOR;
       
  1126 OR		return OR;
       
  1127 NOT		return NOT;
       
  1128 MOD		return MOD;
       
  1129 
       
  1130 
       
  1131 	/*****************************************/
       
  1132 	/* B 3.2.2 Subprogram Control Statements */
       
  1133 	/*****************************************/
       
  1134 :=		return ASSIGN;
       
  1135 =>		return SENDTO;
       
  1136 RETURN		return RETURN;
       
  1137 
       
  1138 
       
  1139 	/********************************/
       
  1140 	/* B 3.2.3 Selection Statements */
       
  1141 	/********************************/
       
  1142 IF		return IF;
       
  1143 THEN		return THEN;
       
  1144 ELSIF		return ELSIF;
       
  1145 ELSE		return ELSE;
       
  1146 END_IF		return END_IF;
       
  1147 
       
  1148 CASE		return CASE;
       
  1149 OF		return OF;
       
  1150 ELSE		return ELSE;
       
  1151 END_CASE	return END_CASE;
       
  1152 
       
  1153 
       
  1154 	/********************************/
       
  1155 	/* B 3.2.4 Iteration Statements */
       
  1156 	/********************************/
       
  1157 FOR		return FOR;
       
  1158 TO		return TO;
       
  1159 BY		return BY;
       
  1160 DO		return DO;
       
  1161 END_FOR		return END_FOR;
       
  1162 
       
  1163 WHILE		return WHILE;
       
  1164 DO		return DO;
       
  1165 END_WHILE	return END_WHILE;
       
  1166 
       
  1167 REPEAT		return REPEAT;
       
  1168 UNTIL		return UNTIL;
       
  1169 END_REPEAT	return END_REPEAT;
       
  1170 
       
  1171 EXIT		return EXIT;
       
  1172 
       
  1173 
       
  1174 
       
  1175 
       
  1176 
       
  1177 	/********************************************************/
       
  1178 	/********************************************************/
       
  1179 	/********************************************************/
       
  1180 	/*****                                              *****/
       
  1181 	/*****                                              *****/
       
  1182 	/*****  N O W    W O R K    W I T H    V A L U E S  *****/
       
  1183 	/*****                                              *****/
       
  1184 	/*****                                              *****/
       
  1185 	/********************************************************/
       
  1186 	/********************************************************/
       
  1187 	/********************************************************/
       
  1188 
       
  1189 
       
  1190 	/********************************************/
       
  1191 	/* B.1.4.1   Directly Represented Variables */
       
  1192 	/********************************************/
       
  1193 {direct_variable}	{yylval.ID=strdup(yytext); return direct_variable_token;}
       
  1194 
       
  1195 
       
  1196 	/******************************************/
       
  1197 	/* B 1.4.3 - Declaration & Initialisation */
       
  1198 	/******************************************/
       
  1199 {incompl_location}	{yylval.ID=strdup(yytext); return incompl_location_token;}
       
  1200 
       
  1201 
       
  1202 	/************************/
       
  1203 	/* B 1.2.3.1 - Duration */
       
  1204 	/************************/
       
  1205 {fixed_point}		{yylval.ID=strdup(yytext); return fixed_point_token;}
       
  1206 
       
  1207 {fixed_point_d}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
       
  1208 {integer_d}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
       
  1209 
       
  1210 {fixed_point_h}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
       
  1211 {integer_h}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
       
  1212 
       
  1213 {fixed_point_m}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
       
  1214 {integer_m}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
       
  1215 
       
  1216 {fixed_point_s}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
       
  1217 {integer_s}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
       
  1218 
       
  1219 {fixed_point_ms}	{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
       
  1220 {integer_ms}		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
       
  1221 
       
  1222 
       
  1223 	/*******************************/
       
  1224 	/* B.1.2.2   Character Strings */
       
  1225 	/*******************************/
       
  1226 {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
       
  1227 {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
       
  1228 
       
  1229 
       
  1230 	/******************************/
       
  1231 	/* B.1.2.1   Numeric literals */
       
  1232 	/******************************/
       
  1233 {integer}		{yylval.ID=strdup(yytext); return integer_token;}
       
  1234 {real}			{yylval.ID=strdup(yytext); return real_token;}
       
  1235 {binary_integer}	{yylval.ID=strdup(yytext); return binary_integer_token;}
       
  1236 {octal_integer} 	{yylval.ID=strdup(yytext); return octal_integer_token;}
       
  1237 {hex_integer} 		{yylval.ID=strdup(yytext); return hex_integer_token;}
       
  1238 
       
  1239 
       
  1240 	/*****************************************/
       
  1241 	/* B.1.1 Letters, digits and identifiers */
       
  1242 	/*****************************************/
       
  1243 <st>{identifier}/({st_whitespace})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
       
  1244 <il>{identifier}/({il_whitespace})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
       
  1245 {identifier} 				{yylval.ID=strdup(yytext);
       
  1246 					 /*printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));*/
       
  1247 					 return get_identifier_token(yytext);}
       
  1248 
       
  1249 
       
  1250 
       
  1251 
       
  1252 
       
  1253 
       
  1254 	/************************************************/
       
  1255 	/************************************************/
       
  1256 	/************************************************/
       
  1257 	/*****                                      *****/
       
  1258 	/*****                                      *****/
       
  1259 	/*****   T H E    L E F T O V E R S . . .   *****/
       
  1260 	/*****                                      *****/
       
  1261 	/*****                                      *****/
       
  1262 	/************************************************/
       
  1263 	/************************************************/
       
  1264 	/************************************************/
       
  1265 
       
  1266 	/* do the single character tokens...
       
  1267 	 *
       
  1268 	 *  e.g.:  ':'  '('  ')'  '+'  '*'  ...
       
  1269 	 */
       
  1270 .	{return yytext[0];}
       
  1271 
       
  1272 
       
  1273 %%
       
  1274 
       
  1275 
       
  1276 
       
  1277 
       
  1278 
       
  1279 /***********************************/
       
  1280 /* Utility function definitions... */
       
  1281 /***********************************/
       
  1282 
       
  1283 /* print the include file stack to stderr... */
       
  1284 void print_include_stack(void) {
       
  1285   int i;
       
  1286 
       
  1287   if ((include_stack_ptr - 1) >= 0)
       
  1288     fprintf (stderr, "in file "); 
       
  1289   for (i = include_stack_ptr - 1; i >= 0; i--)
       
  1290     fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].lineno);
       
  1291 }
       
  1292 
       
  1293 
       
  1294 /* return all the text in the current token back to the input stream, except the first n chars. */
       
  1295 void unput_text(unsigned int n) {
       
  1296   /* it seems that flex has a bug in that it will not correctly count the line numbers
       
  1297    * if we return newlines back to the input stream. These newlines will be re-counted
       
  1298    * a second time when they are processed again by flex.
       
  1299    * We therefore determine how many newlines are in the text we are returning,
       
  1300    * and decrement the line counter acordingly...
       
  1301    */
       
  1302   unsigned int i;
       
  1303 
       
  1304   for (i = n; i < strlen(yytext); i++)
       
  1305     if (yytext[i] == '\n')
       
  1306       yylineno--;
       
  1307 
       
  1308   /* now return all the text back to the input stream... */
       
  1309   yyless(n);
       
  1310 }
       
  1311 
       
  1312 
       
  1313 /* Called by flex when it reaches the end-of-file */
       
  1314 int yywrap(void)
       
  1315 {
       
  1316   /* We reached the end of the input file... */
       
  1317 
       
  1318   /* Should we continue with another file? */
       
  1319   /* If so:
       
  1320    *   open the new file...
       
  1321    *   return 0;
       
  1322    */
       
  1323 
       
  1324   /* to we stop processing...
       
  1325    *
       
  1326    *   return 1;
       
  1327    */
       
  1328 
       
  1329 
       
  1330   return 1;  /* Stop scanning at end of input file. */
       
  1331 }
       
  1332 
       
  1333 
       
  1334 
       
  1335 /*************************************/
       
  1336 /* Include a main() function to test */
       
  1337 /* the token parsing by flex....     */
       
  1338 /*************************************/
       
  1339 #ifdef TEST_MAIN
       
  1340 
       
  1341 #include "../util/symtable.hh"
       
  1342 
       
  1343 yystype yylval;
       
  1344 YYLTYPE yylloc;
       
  1345 
       
  1346 const char *current_filename;
       
  1347 
       
  1348 int get_identifier_token(const char *identifier_str) {return 0;}
       
  1349 
       
  1350 
       
  1351 
       
  1352 
       
  1353 
       
  1354 int main(int argc, char **argv) {
       
  1355 
       
  1356   FILE *in_file;
       
  1357   int res;
       
  1358 
       
  1359   if (argc == 1) {
       
  1360     /* Work as an interactive (command line) parser... */
       
  1361     while((res=yylex()))
       
  1362       fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
       
  1363   } else {
       
  1364     /* Work as non-interactive (file) parser... */
       
  1365     if((in_file = fopen(argv[1], "r")) == NULL) {
       
  1366       char *errmsg = strdup2("Error opening main file ", argv[1]);
       
  1367       perror(errmsg);
       
  1368       free(errmsg);
       
  1369       return -1;
       
  1370     }
       
  1371 
       
  1372     /* parse the file... */
       
  1373     yyin = in_file;
       
  1374     current_filename = argv[1];
       
  1375     while(1) {
       
  1376       res=yylex();
       
  1377       fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
       
  1378     }
       
  1379   }
       
  1380 
       
  1381   return 0;
       
  1382 
       
  1383 }
       
  1384 #endif