etisserant@0: /*
msousa@264:  *  matiec - a compiler for the programming languages defined in IEC 61131-3
msousa@264:  *
msousa@264:  *  Copyright (C) 2003-2011  Mario de Sousa (msousa@fe.up.pt)
msousa@264:  *
msousa@264:  *  This program is free software: you can redistribute it and/or modify
msousa@264:  *  it under the terms of the GNU General Public License as published by
mjsousa@866:  *  the Free Software Foundation, either version 3 of thest_whitespaceLicense, or
msousa@264:  *  (at your option) any later version.
msousa@264:  *
msousa@264:  *  This program is distributed in the hope that it will be useful,
msousa@264:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
msousa@264:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
msousa@264:  *  GNU General Public License for more details.
msousa@264:  *
msousa@264:  *  You should have received a copy of the GNU General Public License
msousa@264:  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
msousa@264:  *
etisserant@0:  *
etisserant@0:  * This code is made available on the understanding that it will not be
etisserant@0:  * used in safety-critical situations without a full and competent review.
etisserant@0:  */
etisserant@0: 
etisserant@0: /*
msousa@264:  * An IEC 61131-3 compiler.
etisserant@0:  *
etisserant@0:  * Based on the
etisserant@0:  * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
etisserant@0:  *
etisserant@0:  */
etisserant@0: 
etisserant@0: /*
etisserant@0:  * Stage 1
etisserant@0:  * =======
etisserant@0:  *
etisserant@0:  * This file contains the lexical tokens definitions, from which
etisserant@0:  * the flex utility will generate a lexical parser function.
etisserant@0:  */
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************/
etisserant@0: /* Lexical Parser Options... */
etisserant@0: /*****************************/
etisserant@0: 
etisserant@0: /* The lexical analyser will never work in interactive mode,
etisserant@0:  * i.e., it will only process programs saved to files, and never
etisserant@0:  * programs being written inter-actively by the user.
etisserant@0:  * This option saves the resulting parser from calling the
etisserant@0:  * isatty() function, that seems to be generating some compile
etisserant@0:  * errors under some (older?) versions of flex.
etisserant@0:  */
etisserant@0: %option never-interactive
etisserant@0: 
etisserant@0: /* Have the lexical analyser use a 'char *yytext' instead of an
etisserant@0:  * array of char 'char yytext[??]' to store the lexical token.
etisserant@0:  */
etisserant@0: %pointer
etisserant@0: 
etisserant@0: 
etisserant@0: /* Have the lexical analyser ignore the case of letters.
etisserant@0:  * This will occur for all the tokens and keywords, but
etisserant@0:  * the resulting text handed up to the syntax parser
etisserant@0:  * will not be changed, and keep the original case
etisserant@0:  * of the letters in the input file.
etisserant@0:  */
etisserant@0: %option case-insensitive
etisserant@0: 
etisserant@0: /* Have the generated lexical analyser keep track of the
etisserant@0:  * line number it is currently analysing.
etisserant@0:  * This is used to pass up to the syntax parser
etisserant@0:  * the number of the line on which the current
etisserant@0:  * token was found. It will enable the syntax parser
etisserant@0:  * to generate more informatve error messages...
etisserant@0:  */
etisserant@0: %option yylineno
etisserant@0: 
etisserant@0: /* required for the use of the yy_pop_state() and
etisserant@0:  * yy_push_state() functions
etisserant@0:  */
etisserant@0: %option stack
etisserant@0: 
etisserant@0: /* The '%option stack' also requests the inclusion of 
etisserant@0:  * the yy_top_state(), however this function is not
etisserant@0:  * currently being used. This means that the compiler
etisserant@0:  * is complaining about the existance of this function.
etisserant@0:  * The following option removes the yy_top_state()
etisserant@0:  * function from the resulting c code, so the compiler 
etisserant@0:  * no longer complains.
etisserant@0:  */
etisserant@0: %option noyy_top_state
etisserant@0: 
msousa@547: /* We will be using unput() in our flex code, so we cannot set the following option!... */
msousa@547: /*
msousa@267: %option nounput
msousa@547: */
msousa@267: 
andrej@1050: /* The '%option debug' makes the generated scanner run in
andrej@1050:  * debug mode.
andrej@1050: %option debug
andrej@1050:  */
andrej@1050: 
etisserant@0: /**************************************************/
etisserant@0: /* External Variable and Function declarations... */
etisserant@0: /**************************************************/
etisserant@0: 
etisserant@0: 
etisserant@0: %{
etisserant@0: /* Define TEST_MAIN to include a main() function.
etisserant@0:  * Useful for testing the parser generated by flex.
etisserant@0:  */
etisserant@0: /*
etisserant@0: #define TEST_MAIN
etisserant@0: */
etisserant@0: /* If lexical parser is compiled by itself, we need to define the following
etisserant@0:  * constant to some string. Under normal circumstances LIBDIRECTORY is set
etisserant@0:  * in the syntax parser header file...
etisserant@0:  */
etisserant@0: #ifdef TEST_MAIN
etisserant@40: #define DEFAULT_LIBDIR "just_testing"
etisserant@0: #endif
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /* Required for strdup() */
etisserant@0: #include <string.h>
etisserant@0: 
etisserant@0: /* Required only for the declaration of abstract syntax classes
etisserant@0:  * (class symbol_c; class token_c; class list_c;)
etisserant@0:  * These will not be used in flex, but the token type union defined
Edouard@822:  * in iec_bison.hh contains pointers to these classes, so we must include
etisserant@0:  * it here.
etisserant@0:  */
etisserant@0: #include "../absyntax/absyntax.hh"
etisserant@0: 
mario@15: 
Edouard@822: /* iec_bison.hh is generated by bison.
etisserant@0:  * Contains the definition of the token constants, and the
etisserant@0:  * token value type YYSTYPE (in our case, a 'const char *')
etisserant@0:  */
Edouard@822: #include "iec_bison.hh"
mario@15: #include "stage1_2_priv.hh"
mario@15: 
etisserant@0: 
etisserant@0: /* Variable defined by the bison parser,
etisserant@0:  * where the value of the tokens will be stored
etisserant@0:  */
etisserant@0: extern YYSTYPE yylval;
etisserant@0: 
etisserant@0: /* The name of the file currently being parsed...
etisserant@0:  * Note that flex accesses and updates this global variable
msousa@757:  * apropriately whenever it comes across an (*#include <filename> *) directive...
msousa@757:  */
msousa@757: const char *current_filename = NULL;
msousa@757: 
mario@15: 
etisserant@0: 
etisserant@0: /* Variable defined by the bison parser.
etisserant@0:  * It must be initialised with the location
etisserant@0:  * of the token being parsed.
etisserant@0:  * This is only needed if we want to keep
etisserant@0:  * track of the locations, in order to give
etisserant@0:  * more meaningful error messages!
etisserant@0:  */
conti@415: /*
conti@415:  *extern YYLTYPE yylloc;
conti@415: b*/
lbessard@136: #define YY_INPUT(buf,result,max_size)  {\
lbessard@136:     result = GetNextChar(buf, max_size);\
lbessard@136:     if (  result <= 0  )\
lbessard@136:       result = YY_NULL;\
lbessard@136:     }
lbessard@136: 
msousa@287: 
etisserant@0: /* Macro that is executed for every action.
etisserant@0:  * We use it to pass the location of the token
etisserant@0:  * back to the bison parser...
etisserant@0:  */
lbessard@136: #define YY_USER_ACTION {\
msousa@1055: 	previous_tracking   =*current_tracking;					\
msousa@1055: 	yylloc.first_line   = current_tracking->lineNumber;			\
msousa@1055: 	yylloc.first_column = current_tracking->currentChar;			\
msousa@1055: 	yylloc.first_file   = current_filename;					\
msousa@1055: 	yylloc.first_order  = current_order;					\
msousa@1055: 	\
msousa@1055: 	UpdateTracking(yytext);							\
msousa@1055: 	\
msousa@1055: 	yylloc.last_line    = current_tracking->lineNumber;			\
msousa@1055: 	yylloc.last_column  = current_tracking->currentChar - 1;		\
msousa@1055: 	yylloc.last_file    = current_filename;					\
msousa@1055: 	yylloc.last_order   = current_order;					\
msousa@1055: 	\
msousa@287: 	current_tracking->currentTokenStart = current_tracking->currentChar;	\
msousa@287: 	current_order++;							\
etisserant@0: 	}
etisserant@0: 
mjsousa@879: 
etisserant@0: /* Since this lexical parser we defined only works in ASCII based
etisserant@0:  * systems, we might as well make sure it is being compiled on
etisserant@0:  * one...
etisserant@0:  * Lets check a few random characters...
etisserant@0:  */
etisserant@0: #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
etisserant@0:      ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
etisserant@0: #error This lexical analyser is not portable to a non ASCII based system.
etisserant@0: #endif
etisserant@0: 
etisserant@0: 
etisserant@0: /* Function only called from within flex, but defined
etisserant@0:  * in iec.y!
lbessard@3:  * We declare it here...
etisserant@0:  *
etisserant@0:  * Search for a symbol in either of the two symbol tables
etisserant@0:  * and return the token id of the first symbol found.
etisserant@0:  * Searches first in the variables, and only if not found
etisserant@0:  * does it continue searching in the library elements
etisserant@0:  */
etisserant@0: //token_id_t get_identifier_token(const char *identifier_str);
etisserant@0: int get_identifier_token(const char *identifier_str);
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: /***************************************************/
etisserant@0: /* Forward Declaration of functions defined later. */
etisserant@0: /***************************************************/
etisserant@0: 
etisserant@0: %{
msousa@1055: void UpdateTracking(const char *text);
msousa@1055: /* return the character back to the input stream. */
msousa@1055: void unput_char(const char c);
etisserant@0: /* return all the text in the current token back to the input stream. */
msousa@1055: void unput_text(int n);
msousa@547: /* return all the text in the current token back to the input stream, 
msousa@547:  * but first return to the stream an additional character to mark the end of the token. 
msousa@547:  */
msousa@1055: void unput_and_mark(const char mark_char);
msousa@756: 
msousa@756: void include_file(const char *include_filename);
msousa@757: 
mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
msousa@1055:  * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
msousa@1055:  * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
mjsousa@1016:  * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
mjsousa@1016:  * the body_state.
mjsousa@1016:  */
msousa@1056: void  append_bodystate_buffer(const char *text, int is_whitespace = 0);
mjsousa@1016: void   unput_bodystate_buffer(void);
mjsousa@1016: int  isempty_bodystate_buffer(void);
mjsousa@1016: 
msousa@757: int GetNextChar(char *b, int maxBuffer);
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /****************************/
etisserant@0: /* Lexical Parser States... */
etisserant@0: /****************************/
etisserant@0: 
etisserant@0: /* NOTE: Our psrser can parse st or il code, intermixed
etisserant@0:  *       within the same file.
etisserant@0:  *       With IL we come across the issue of the EOL (end of line) token.
etisserant@0:  *       ST, and the declaration parts of IL do not use this token!
etisserant@0:  *       If the lexical analyser were to issue this token during ST
etisserant@0:  *       language parsing, or during the declaration of data types,
etisserant@0:  *       function headers, etc. in IL, the syntax parser would crash.
etisserant@0:  *
etisserant@0:  *       We can solve this issue using one of three methods:
etisserant@0:  *        (1) Augment all the syntax that does not accept the EOL
etisserant@0:  *            token to simply ignore it. This makes the syntax
etisserant@0:  *            definition (in iec.y) very cluttered!
etisserant@0:  *        (2) Let the lexical parser figure out which language
etisserant@0:  *            it is parsing, and decide whether or not to issue
etisserant@0:  *            the EOL token. This requires the lexical parser
etisserant@0:  *            to have knowledge of the syntax!, making for a poor
etisserant@0:  *            overall organisation of the code. It would also make it
etisserant@0:  *            very difficult to understand the lexical parser as it
etisserant@0:  *            would use several states, and a state machine to transition
etisserant@0:  *            between the states. The state transitions would be
etisserant@0:  *            intermingled with the lexical parser defintion!
etisserant@0:  *        (3) Use a mixture of (1) and (2). The lexical analyser
etisserant@0:  *            merely distinguishes between function headers and function
etisserant@0:  *            bodies, but no longer makes a distinction between il and
etisserant@0:  *            st language bodies. When parsing a body, it will return
etisserant@0:  *            the EOL token. In other states '\n' will be ignored as
etisserant@0:  *            whitespace.
etisserant@0:  *            The ST language syntax has been augmented in the syntax
etisserant@0:  *            parser configuration to ignore any EOL tokens that it may
etisserant@0:  *            come across!
etisserant@0:  *            This option has both drawbacks of option (1) and (2), but
etisserant@0:  *            much less intensely.
etisserant@0:  *            The syntax that gets cluttered is limited to the ST statements
etisserant@0:  *            (which is rather limited, compared to the function headers and
etisserant@0:  *            data type declarations, etc...), while the state machine in
etisserant@0:  *            the lexical parser becomes very simple. All state transitions
etisserant@0:  *            can be handled within the lexical parser by itself, and can be
etisserant@0:  *            easily identified. Thus knowledge of the syntax required by
etisserant@0:  *            the lexical parser is very limited!
etisserant@0:  *
etisserant@0:  * Amazingly enough, I (Mario) got to implement option (3)
etisserant@0:  * at first, requiring two basic states, decl and body.
etisserant@0:  * The lexical parser will enter the body state when
etisserant@0:  * it is parsing the body of a function/program/function block. The
etisserant@0:  * state transition is done when we find a VAR_END that is not followed
etisserant@0:  * by a VAR! This is the syntax knowledge that gets included in the
etisserant@0:  * lexical analyser with this option!
etisserant@0:  * Unfortunately, getting the st syntax parser to ignore EOL anywhere
etisserant@0:  * where they might appear leads to conflicts. This is due to the fact
etisserant@0:  * that the syntax parser uses the single look-ahead token to remove
etisserant@0:  * possible conflicts. When we insert a possible EOL, the single
etisserant@0:  * look ahead token becomes the EOL, which means the potential conflicts
etisserant@0:  * could no longer be resolved.
etisserant@0:  * Removing these conflicts would make the st syntax parser very convoluted,
etisserant@0:  * and adding the extraneous EOL would make it very cluttered.
etisserant@0:  * This option was therefore dropped in favour of another!
etisserant@0:  *
etisserant@0:  * I ended up implementing (2). Unfortunately the lexical analyser can
etisserant@0:  * not easily distinguish between il and st code, since function
etisserant@0:  * calls in il are very similar to function block calls in st.
etisserant@0:  * We therefore use an extra 'body' state. When the lexical parser
etisserant@0:  * finds that last END_VAR, it enters the body state. This state
etisserant@0:  * must figure out what language is being parsed from the first few
mario@68:  * tokens, and switch to the correct state (st, il or sfc) according to the
etisserant@0:  * language. This means that we insert quite a bit of knowledge of the
etisserant@0:  * syntax of the languages into the lexical parser. This is ugly, but it
etisserant@0:  * works, and at least it is possible to keep all the state changes together
etisserant@0:  * to make it easier to remove them later on if need be.
mario@68:  * Once the language being parsed has been identified, 
mario@68:  * the body state returns any matched text back to the buffer with unput(),
mario@68:  * to be later matched correctly by the apropriate language parser (st, il or sfc).
mario@68:  *
mario@68:  * Aditionally, in sfc state it may further recursively enter the body state
mario@68:  * once again. This is because an sfc body may contain ACTIONS, which are then
mario@68:  * written in one of the three languages (ST, IL or SFC), so once again we need
mario@68:  * to figure out which language the ACTION in the SFC was written in. We already
mario@68:  * ahve all that done in the body state, so we recursively transition to the body 
mario@68:  * state once again.
mario@68:  * Note that in this case, when coming out of the st/il state (whichever language
mario@68:  * the action was written in) the sfc state will become active again. This is done by
mario@68:  * pushing and poping the previously active state!
mario@68:  *
mario@68:  * The sfc_qualifier_state is required because when parsing actions within an
mario@68:  * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
mario@68:  * to bison to work correctly, these qualifiers must be returned as tokens. However,
mario@68:  * these tokens are not reserved keywords, which means it should be possible to
mario@68:  * define variables/functions/FBs with any of these names (including 
mario@68:  * S and R which are special because they are also IL operators). So, when we are not
mario@68:  * expecting any action qualifiers, flex does not return these tokens, and is free
mario@68:  * to interpret them as previously defined variables/functions/... as the case may be.
mario@68:  *
msousa@547:  * The time_literal_state is required because TIME# literals are decomposed into 
msousa@547:  * portions, and wewant to send these portions one by one to bison. Each poertion will 
msousa@547:  * represent the value in days/hours/minutes/seconds/ms.
msousa@547:  * Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
msousa@547:  * we need to disable lexical identification of identifiers while parsing TIME# literals!
msousa@547:  * e.g.:  TIME#55d_4h_56m
msousa@547:  *       We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
msousa@547:  *       Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
msousa@547:  *       and return that identifier instead! So, we added this state!
msousa@547:  *
mjsousa@952:  * The ignore_pou_state state is only used when bison says it is doing the pre-parsing.
mjsousa@952:  * During pre-parsing, the main state machine will only transition between
mjsousa@952:  * INITIAL and ignore_pou_state, and from here back to INITIAL. All other
mjsousa@952:  * transitions are inhibited. This inhibition is actually just enforced by making
mjsousa@952:  * sure that the INITIAL ---> ignore_pou_state transition is tested before all other
mjsousa@952:  * transitions coming out of INITIAL state. All other transitions are unaffected, as they
mjsousa@952:  * never get a chance to be evaluated when bison is doing pre-parsing.
mjsousa@952:  * Pre-parsing is a first quick scan through the whole input source code simply
mjsousa@952:  * to determine the list of POUs and datatypes that will be defined in that
mjsousa@952:  * code. Basically, the objective is to fill up the previously_declared_xxxxx
mjsousa@952:  * maps, without processing the code itself. Once these maps have been filled up,
mjsousa@952:  * bison will throw away the AST (abstract syntax tree) created up to that point, 
mjsousa@952:  * and scan through the same source code again, but this time creating a correct AST.
mjsousa@952:  * This pre-scan allows the source code to reference POUs and datatypes that are
mjsousa@952:  * only declared after they are used!
mjsousa@868:  * 
mjsousa@952:  *
mjsousa@952:  * Here is a main state machine...
mjsousa@952:  *                                                                         --+  
mjsousa@952:  *                                                                           |  these states are
mjsousa@952:  *              +------------> get_pou_name_state  ----> ignore_pou_state    |  only active 
mjsousa@952:  *              |                                            |               |  when bison is 
mjsousa@952:  *              |  ------------------------------------------+               |  doing the 
mjsousa@952:  *              |  |                                                         |  pre-parsing!!
mjsousa@952:  *              |  v                                                       --+
mjsousa@868:  *       +---> INITIAL <-------> config
mjsousa@868:  *       |        \
mjsousa@868:  *       |        V
mjsousa@868:  *       |   header_state
mjsousa@868:  *       |        |
mjsousa@868:  *       |        V
mjsousa@868:  *     vardecl_list_state <------> var_decl
mjsousa@868:  *       ^        | 
mjsousa@868:  *       |        | [using push()]
mjsousa@868:  *       |        |
mjsousa@868:  *       |        V
mjsousa@868:  *       |       body, 
mjsousa@868:  *       |        |
mjsousa@868:  *       |        | 
mjsousa@868:  *       |   -------------------
mjsousa@868:  *       |   |       |         |
mjsousa@868:  *       |   v       v         v
mjsousa@868:  *       |  st      il        sfc
mjsousa@868:  *       |   |       |         |  [using pop() when leaving st/il/sfc => goes to vardecl_list_state]
mjsousa@868:  *       |   |       |         |
mjsousa@868:  *       -----------------------
mjsousa@868:  *
mjsousa@868:  * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then 
mjsousa@868:  *        we also push() to the body state. This means that sometimes, when pop()ing
mjsousa@868:  *        from st and il, the state machine may return to the sfc state!
mjsousa@868:  *      - The transitions form sfc to body will be decided by bison, which will
mjsousa@868:  *        tell flex to do the transition by calling cmd_goto_body_state().
mjsousa@868:  *   
mjsousa@866:  * 
etisserant@0:  * Possible state changes are:
mjsousa@952:  *   INITIAL -> goto(ignore_pou_state)
mjsousa@952:  *               (This transition state is only used when bison says it is doing the pre-parsing.)
mjsousa@952:  *               (This transition takes precedence over all other transitions!)
mjsousa@952:  *               (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found)
mjsousa@952:  * 
mario@68:  *   INITIAL -> goto(config_state)
mario@68:  *                (when a CONFIGURATION is found)
mjsousa@866:  * 
mjsousa@866:  *   INITIAL -> goto(header_state)
mjsousa@866:  *               (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found)
mjsousa@952:  * 
mjsousa@866:  *   header_state -> goto(vardecl_list_state)
mjsousa@866:  *               (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration)
mjsousa@866:  * 
mjsousa@866:  *  vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state) 
mjsousa@866:  *                (when a VAR token is found)
mjsousa@866:  *   vardecl_state -> pop() to (vardecl_list_state) 
mjsousa@866:  *                (when a END_VAR token is found)
mjsousa@866:  * 
mjsousa@868:  *   vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state) 
mjsousa@866:  *                (when the last END_VAR is found!)
mjsousa@866:  *
mjsousa@868:  *   body_state    -> goto(sfc_state)
mario@68:  *                     (when it figures out it is parsing sfc language)
mjsousa@868:  *   body_state    -> goto(st_state)
mario@68:  *                     (when it figures out it is parsing st language)
mjsousa@868:  *   body_state    -> goto(il_state)
mario@68:  *                     (when it figures out it is parsing il language)
mjsousa@868:  *   st_state      -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68:  *                      END_ACTION or END_TRANSITION is found)
mjsousa@868:  *   il_state      -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68:  *                      END_ACTION or END_TRANSITION is found)
mjsousa@868:  *   sfc_state     -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mjsousa@866:  * 
mjsousa@952:  *   ignore_pou_state   -> goto(INITIAL)
mjsousa@952:  *                         (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found)
mjsousa@868:  *   vardecl_list_state -> goto(INITIAL)
mjsousa@952:  *                         (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mjsousa@952:  *   config_state       -> goto(INITIAL)
mjsousa@952:  *                         (when a END_CONFIGURATION is found)
mjsousa@866:  * 
mjsousa@866:  *  
mjsousa@866:  *   sfc_state     -> push current state(sfc_state); goto(body_state)
mario@68:  *                     (when parsing an action. This transition is requested by bison)
mjsousa@866:  *   sfc_state     -> push current state(sfc_state); goto(sfc_qualifier_state)
mario@68:  *                     (when expecting an action qualifier. This transition is requested by bison)
mjsousa@866:  *   sfc_qualifier_state -> pop() to sfc_state
mario@68:  *                     (when no longer expecting an action qualifier. This transition is requested by bison)
mjsousa@866:  *
mario@74:  *   config_state  -> push(config_state); goto(task_init_state)
mario@74:  *                     (when parsing a task initialisation. This transition is requested by bison)
mario@74:  *   task_init_state -> pop()
mario@74:  *                     (when no longer parsing task initialisation parameters. This transition is requested by bison)
mario@74:  *
mjsousa@866:  * 
mjsousa@866:  * There is another secondary state machine for parsing comments, another for file_includes, 
mjsousa@866:  * and yet another for time literals.
mario@74:  */
mario@68: 
mario@68: 
mjsousa@952: /* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */
mjsousa@952: %x ignore_pou_state
mjsousa@952: %x get_pou_name_state
mjsousa@952: 
etisserant@0: /* we are parsing a configuration. */
lbessard@3: %s config_state
etisserant@0: 
mario@74: /* Inside a configuration, we are parsing a task initialisation parameters */
mario@74: /* This means that PRIORITY, SINGLE and INTERVAL must be handled as
mario@74:  * tokens, and not as possible identifiers. Note that the above words
mario@74:  * are not keywords.
mario@74:  */
mario@74: %s task_init_state
mario@74: 
mjsousa@866: /* we are looking for the first VAR inside a function's, program's or function block's declaration */
mjsousa@868: /* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */
mjsousa@866: %s header_state
mjsousa@866: 
mjsousa@866: /* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */
mjsousa@866: %x vardecl_list_state 
mjsousa@866: /* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */
mjsousa@866: %s vardecl_state
etisserant@0: 
mjsousa@868: /* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */
mario@68: %x body_state
etisserant@0: 
etisserant@0: /* we are parsing il code -> flex must return the EOL tokens!       */
lbessard@3: %s il_state
etisserant@0: 
etisserant@0: /* we are parsing st code -> flex must not return the EOL tokens!   */
lbessard@3: %s st_state
etisserant@0: 
mario@68: /* we are parsing sfc code -> flex must not return the EOL tokens!  */
lbessard@3: %s sfc_state
etisserant@0: 
mario@68: /* we are parsing sfc code, and expecting an action qualifier.      */
mario@68: %s sfc_qualifier_state
etisserant@0: 
mario@86: /* we are parsing sfc code, and expecting the priority token.       */
mario@86: %s sfc_priority_state
etisserant@0: 
msousa@547: /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
msousa@547: %x time_literal_state
mario@75: 
mjsousa@866: /* we are parsing a comment. */
mjsousa@866: %x comment_state
mjsousa@866: 
mario@75: 
etisserant@0: /*******************/
etisserant@0: /* File #include's */
etisserant@0: /*******************/
etisserant@0: 
etisserant@0: /* We extend the IEC 61131-3 standard syntax to allow inclusion
etisserant@0:  * of other files, using the IEC 61131-3 pragma directive...
etisserant@0:  * The accepted syntax is:
etisserant@0:  *  {#include "<filename>"}
etisserant@0:  */
etisserant@0: 
etisserant@0: /* the "include" states are used for picking up the name of an include file */
etisserant@0: %x include_beg
etisserant@0: %x include_filename
etisserant@0: %x include_end
etisserant@0: 
etisserant@0: 
etisserant@0: file_include_pragma_filename	[^\"]*
mjsousa@866: file_include_pragma_beg		"{#include"{st_whitespace}\"
mjsousa@866: file_include_pragma_end		\"{st_whitespace}"}"
etisserant@0: file_include_pragma			{file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
etisserant@0: 
etisserant@0: 
etisserant@0: %{
mjsousa@879: 
mjsousa@879: /* A counter to track the order by which each token is processed.
mjsousa@879:  * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
mjsousa@879:  *       i.e.. it may get incremented by more than one between two consecutive tokens.
mjsousa@879:  *       This is due to the fact that the counter gets incremented every 'user action' in flex,
mjsousa@879:  *       however not every user action will result in a token being passed to bison.
mjsousa@879:  *       Nevertheless this is still OK, as we are only interested in the relative
mjsousa@879:  *       ordering of tokens...
mjsousa@879:  */
mjsousa@879: static long int current_order = 0;
mjsousa@879:   
etisserant@0: typedef struct {
msousa@757:     int eof;
msousa@757:     int lineNumber;
msousa@757:     int currentChar;
msousa@757:     int lineLength;
msousa@757:     int currentTokenStart;
msousa@757:     FILE *in_file;
msousa@757:   } tracking_t;
msousa@757: 
mjsousa@879: /* A forward declaration of a function defined at the end of this file. */
mjsousa@879: void FreeTracking(tracking_t *tracking);
mjsousa@879: 
mjsousa@879: 
mjsousa@879: #define MAX_INCLUDE_DEPTH 16
mjsousa@879: 
msousa@757: typedef struct {
etisserant@0: 	  YY_BUFFER_STATE buffer_state;
msousa@757: 	  tracking_t *env;
etisserant@0: 	  const char *filename;
etisserant@0: 	} include_stack_t;
etisserant@0: 
msousa@1055: tracking_t * current_tracking = NULL;
msousa@1055: tracking_t  previous_tracking;
etisserant@0: include_stack_t include_stack[MAX_INCLUDE_DEPTH];
etisserant@0: int include_stack_ptr = 0;
etisserant@0: 
etisserant@0: const char *INCLUDE_DIRECTORIES[] = {
etisserant@40: 	DEFAULT_LIBDIR,
etisserant@40: 	".",
etisserant@40: 	"/lib",
etisserant@40: 	"/usr/lib",
etisserant@40: 	"/usr/lib/iec",
etisserant@0: 	NULL /* must end with NULL!! */
etisserant@0: 	};
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************/
etisserant@0: /* Prelimenary constructs... */
etisserant@0: /*****************************/
etisserant@0: 
mjsousa@866: /* PRAGMAS */
mjsousa@866: /* ======= */
msousa@267: /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
msousa@267:  * especially the prototypes of Functions and FBs defined in the standard
msousa@267:  * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax 
msousa@267:  * with two pragmas to indicate that the code is to be parsed (going through the 
msousa@267:  * lexical, syntactical, and semantic analysers), but no code is to be generated.
msousa@267:  * 
msousa@267:  * The accepted syntax is:
msousa@267:  *  {no_code_generation begin}
msousa@267:  *    ... prototypes ...
msousa@267:  *  {no_code_generation end}
msousa@267:  * 
msousa@267:  * When parsing these prototypes the abstract syntax tree will be populated as usual,
msousa@267:  * allowing the semantic analyser to correctly analyse the semantics of calls to these
msousa@267:  * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
msousa@267:  * between the above two pragmas.
msousa@267:  */
msousa@267: 
msousa@267: disable_code_generation_pragma	"{disable code generation}"
msousa@267: enable_code_generation_pragma	"{enable code generation}"
msousa@267: 
msousa@267: 
msousa@267: /* Any other pragma... */
mjsousa@869: pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}")
mjsousa@868: 
mjsousa@868: 
mjsousa@866: 
mjsousa@866: /* COMMENTS */
mjsousa@866: /* ======== */
mjsousa@866: 
mjsousa@866: /* In order to allow nested comments, comments are handled by a specific comment_state state */
mjsousa@866: /* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state.
mjsousa@866:  * Whenever a "*)" is found, we pop a state off the stack
mjsousa@866:  */
mjsousa@866: 
mjsousa@866: /* comments... */
mjsousa@866: comment_beg  "(*"
mjsousa@866: comment_end  "*)"
mjsousa@866: 
mjsousa@866: /* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB
mjsousa@866:  * invocations with the 'NOT <variable_name> =>' syntax (which needs two look ahead 
mjsousa@866:  * tokens to be parsed correctly - and bison being LALR(1) only supports one).
mjsousa@866:  * The current work around requires flex to completely parse the '<variable_name> =>'
mjsousa@866:  * sequence. This sequence includes whitespace and/or comments between the 
mjsousa@866:  * <variable_name> and the "=>" token.
mjsousa@866:  * 
mjsousa@866:  * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context,
mjsousa@866:  * which means we can not use the comment_state method of specifying/finding and ignoring 
mjsousa@866:  * comments.
mjsousa@866:  * 
mjsousa@866:  * For this reason only, we must also define what a complete comment looks like, so
mjsousa@866:  * it may be used in this rule. Since the rule uses the whitespace_or_comment
mjsousa@866:  * construct as trailing context, this definition of comment must not use any
mjsousa@866:  * trailing context either.
mjsousa@866:  * 
mjsousa@866:  * Aditionally, it is not possible to define nested comments in flex without the use of
mjsousa@866:  * states, so for this particular location, we do NOT support nested comments.
mjsousa@866:  */
etisserant@0: /* NOTE: this seemingly unnecessary complex definition is required
etisserant@0:  *       to be able to eat up comments such as:
etisserant@0:  *          '(* Testing... ! ***** ******)'
etisserant@0:  *       without using the trailing context command in flex (/{context})
etisserant@0:  *       since {comment} itself will later be used with
etisserant@0:  *       trailing context ({comment}/{context})
etisserant@0:  */
etisserant@0: not_asterisk				[^*]
etisserant@0: not_close_parenthesis_nor_asterisk	[^*)]
etisserant@0: asterisk				"*"
mjsousa@866: comment_text	({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk})
etisserant@0: comment		"(*"({comment_text}*)({asterisk}+)")"
etisserant@0: 
etisserant@0: 
mjsousa@866: 
mjsousa@866: /* 3.1 Whitespace */
mjsousa@866: /* ============== */
etisserant@0: /*
mjsousa@866:  * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4)
mjsousa@866:  * 
mjsousa@866:  * Whitespace definition includes the newline character.
mjsousa@866:  * 
mjsousa@866:  * However, the standard is inconsistent in that in IL the newline character 
mjsousa@866:  * is considered a token (EOL - end of line). 
mjsousa@866:  * In our implementation we therefore have two definitions of whitespace
mjsousa@866:  *   - one for ST, that includes the newline character
mjsousa@866:  *   - one for IL without the newline character.
msousa@1065:  *
msousa@1065:  * IL whitespace is only active while parsing IL code, whereas ST whitespace
msousa@1065:  * is used in all other circumstances. Additionally, when parsing IL, the newline
msousa@1065:  * character is treated as the EOL token.
msousa@1065:  * The above requires the use of a state machine in the lexical parser to track which
msousa@1065:  * language is being parsed. This requires that the lexical parser (i.e. flex)
msousa@1065:  * have some knowledge of the syntax itself.
mjsousa@866:  *
mjsousa@866:  * NOTE: Our definition of whitespace will only work in ASCII!
mjsousa@866:  *
etisserant@0:  * NOTE: we cannot use
etisserant@0:  *         st_whitespace	[:space:]*
etisserant@0:  *       since we use {st_whitespace} as trailing context. In our case
etisserant@0:  *       this would not constitute "dangerous trailing context", but the
etisserant@0:  *       lexical generator (i.e. flex) does not know this (since it does
etisserant@0:  *       not know which characters belong to the set [:space:]), and will
etisserant@0:  *       generate a "dangerous trailing context" warning!
etisserant@0:  *       We use this alternative just to stop the flex utility from
etisserant@0:  *       generating the invalid (in this case) warning...
etisserant@0:  */
msousa@1065: /* NOTE: il_whitespace_char is not currenty used, be we include it for completeness */ 
msousa@1065: st_whitespace_char		[ \f\n\r\t\v]
msousa@1065: il_whitespace_char		[ \f\r\t\v]
etisserant@0: 
mjsousa@866: st_whitespace			[ \f\n\r\t\v]*
mjsousa@866: il_whitespace			[ \f\r\t\v]*
mjsousa@866: 
mjsousa@866: st_whitespace_or_pragma_or_commentX	({st_whitespace})|({pragma})|({comment})
mjsousa@866: il_whitespace_or_pragma_or_commentX	({il_whitespace})|({pragma})|({comment})
mjsousa@866: 
mjsousa@866: st_whitespace_or_pragma_or_comment	{st_whitespace_or_pragma_or_commentX}*
mjsousa@866: il_whitespace_or_pragma_or_comment	{il_whitespace_or_pragma_or_commentX}*
mjsousa@866: 
mjsousa@866: 
mjsousa@866: 
mjsousa@866: qualified_identifier	{identifier}(\.{identifier})+
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: The following definitions only work if the host computer
etisserant@0:  *       is using the ASCII maping. For e.g., with EBCDIC [A-Z]
etisserant@0:  *       contains non-alphabetic characters!
etisserant@0:  *       The correct way of doing it would be to use
etisserant@0:  *       the [:upper:] etc... definitions.
etisserant@0:  *
etisserant@0:  *       Unfortunately, further on we need all printable
etisserant@0:  *       characters (i.e. [:print:]), but excluding '$'.
etisserant@0:  *       Flex does not allow sets to be composed by excluding
etisserant@0:  *       elements. Sets may only be constructed by adding new
etisserant@0:  *       elements, which means that we have to revert to
etisserant@0:  *       [\x20\x21\x23\x25\x26\x28-x7E] for the definition
etisserant@0:  *       of the printable characters with the required exceptions.
etisserant@0:  *       The above also implies the use of ASCII, but now we have
etisserant@0:  *       no way to work around it|
etisserant@0:  *
etisserant@0:  *       The conclusion is that our parser is limited to ASCII
etisserant@0:  *       based host computers!!
etisserant@0:  */
etisserant@0: letter		[A-Za-z]
etisserant@0: digit		[0-9]
etisserant@0: octal_digit	[0-7]
etisserant@0: hex_digit	{digit}|[A-F]
etisserant@0: identifier	({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
etisserant@0: 
etisserant@0: /*******************/
etisserant@0: /* B.1.2 Constants */
etisserant@0: /*******************/
etisserant@0: 
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1   Numeric literals */
etisserant@0: /******************************/
etisserant@0: integer         {digit}((_?{digit})*)
msousa@547: 
msousa@547: /* Some helper symbols for parsing TIME literals... */
msousa@547: integer_0_59    (0(_?))*([0-5](_?))?{digit}
msousa@547: integer_0_19    (0(_?))*([0-1](_?))?{digit}
msousa@547: integer_20_23   (0(_?))*2(_?)[0-3]
msousa@547: integer_0_23    {integer_0_19}|{integer_20_23}
msousa@547: integer_0_999   {digit}((_?{digit})?)((_?{digit})?)
msousa@547: 
msousa@547: 
etisserant@0: binary_integer  2#{bit}((_?{bit})*)
etisserant@0: bit		[0-1]
etisserant@0: octal_integer   8#{octal_digit}((_?{octal_digit})*)
etisserant@0: hex_integer     16#{hex_digit}((_?{hex_digit})*)
etisserant@0: exponent        [Ee]([+-]?){integer}
etisserant@0: /* The correct definition for real would be:
etisserant@0:  * real		{integer}\.{integer}({exponent}?)
etisserant@0:  *
etisserant@0:  * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
etisserant@0:  * fixed_point		{integer}\.{integer}
etisserant@0:  *
etisserant@0:  * This means that {integer}\.{integer} could be interpreted
etisserant@0:  * as either a fixed_point or a real.
etisserant@0:  * I have opted to interpret {integer}\.{integer} as a fixed_point.
etisserant@0:  * In order to do this, the definition of real has been changed to:
etisserant@0:  * real		{integer}\.{integer}{exponent}
etisserant@0:  *
etisserant@0:  * This means that the syntax parser now needs to define a real to be
etisserant@0:  * either a real_token or a fixed_point_token!
etisserant@0:  */
etisserant@0: real		{integer}\.{integer}{exponent}
etisserant@0: 
etisserant@0: 
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2   Character Strings */
etisserant@0: /*******************************/
etisserant@0: /*
etisserant@0: common_character_representation :=
etisserant@0: <any printable character except '$', '"' or "'">
etisserant@0: |'$$'
etisserant@0: |'$L'|'$N'|'$P'|'$R'|'$T'
etisserant@0: |'$l'|'$n'|'$p'|'$r'|'$t'
etisserant@0: 
etisserant@0: NOTE: 	$ = 0x24
etisserant@0: 	" = 0x22
etisserant@0: 	' = 0x27
etisserant@0: 
etisserant@0: 	printable chars in ASCII: 0x20-0x7E
etisserant@0: */
etisserant@0: 
etisserant@0: esc_char_u		$L|$N|$P|$R|$T
etisserant@0: esc_char_l		$l|$n|$p|$r|$t
etisserant@0: esc_char		$$|{esc_char_u}|{esc_char_l}
etisserant@0: double_byte_char	(${hex_digit}{hex_digit}{hex_digit}{hex_digit})
etisserant@0: single_byte_char	(${hex_digit}{hex_digit})
etisserant@0: 
etisserant@0: /* WARNING:
etisserant@0:  * This definition is only valid in ASCII...
etisserant@0:  *
etisserant@0:  * Flex includes the function print_char() that defines
etisserant@0:  * all printable characters portably (i.e. whatever character
etisserant@0:  * encoding is currently being used , ASCII, EBCDIC, etc...)
etisserant@0:  * Unfortunately, we cannot generate the definition of
etisserant@0:  * common_character_representation portably, since flex
etisserant@0:  * does not allow definition of sets by subtracting
etisserant@0:  * elements in one set from another set.
etisserant@0:  * This means we must build up the defintion of
etisserant@0:  * common_character_representation using only set addition,
etisserant@0:  * which leaves us with the only choice of defining the
etisserant@0:  * characters non-portably...
etisserant@0:  */
etisserant@0: common_character_representation		[\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
etisserant@0: double_byte_character_representation 	$\"|'|{double_byte_char}|{common_character_representation}
etisserant@0: single_byte_character_representation 	$'|\"|{single_byte_char}|{common_character_representation}
etisserant@0: 
etisserant@0: 
etisserant@0: double_byte_character_string	\"({double_byte_character_representation}*)\"
etisserant@0: single_byte_character_string	'({single_byte_character_representation}*)'
etisserant@0: 
etisserant@0: 
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: fixed_point		{integer}\.{integer}
etisserant@0: 
msousa@547: 
msousa@547: /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
msousa@547:  *       as its definition does not match the standard's text.
msousa@547:  *       IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
msousa@547:  *       definition instead!
msousa@547:  *
msousa@547:  *       duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
msousa@547:  *       interval::= days | hours | minutes | seconds | milliseconds
msousa@547:  *       fixed_point  ::= integer [ '.' integer]
msousa@547:  *       days         ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
msousa@547:  *       hours        ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
msousa@547:  *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
msousa@547:  *       seconds      ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
msousa@547:  *       milliseconds ::= fixed_point 'ms'
msousa@547:  * 
msousa@547:  * 
msousa@547:  *  The original IEC 61131-3 v2 definition is:
msousa@547:  *       duration ::= ('T' | 'TIME') '#' ['-'] interval
msousa@547:  *       interval ::= days | hours | minutes | seconds | milliseconds
msousa@547:  *       fixed_point  ::= integer [ '.' integer]
msousa@547:  *       days         ::= fixed_point 'd' | integer 'd' ['_'] hours
msousa@547:  *       hours        ::= fixed_point 'h' | integer 'h' ['_'] minutes
msousa@547:  *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] seconds
msousa@547:  *       seconds      ::= fixed_point 's' | integer 's' ['_'] milliseconds
msousa@547:  *       milliseconds ::= fixed_point 'ms'
msousa@547:  */
msousa@547: 
msousa@547: interval_ms_X		({integer_0_999}(\.{integer})?)ms
msousa@686: interval_s_X		{integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
msousa@686: interval_m_X		{integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
msousa@686: interval_h_X		{integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)
msousa@547: 
msousa@547: interval_ms		{integer}ms|({fixed_point}ms)
msousa@547: interval_s		{integer}s(_?{interval_ms_X})?|({fixed_point}s)
msousa@547: interval_m		{integer}m(_?{interval_s_X})?|({fixed_point}m)
msousa@547: interval_h		{integer}h(_?{interval_m_X})?|({fixed_point}h)
msousa@547: interval_d		{integer}d(_?{interval_h_X})?|({fixed_point}d)
msousa@547: 
msousa@547: interval		{interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}
msousa@547: 
msousa@686: 
msousa@547: /* to help provide nice error messages, we also parse an incorrect but plausible interval... */
msousa@547: /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not 
msousa@547:  *      be able to parse any other legal lexcial construct (besides a legal interval, but that
msousa@547:  *      is OK as this rule will appear _after_ the rule to parse legal intervals!).
msousa@547:  */
msousa@547: fixed_point_or_integer  {fixed_point}|{integer}
msousa@547: erroneous_interval	({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?
etisserant@0: 
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1   Directly Represented Variables */
etisserant@0: /********************************************/
etisserant@0: /* The correct definition, if the standard were to be followed... */
mario@11: 
mario@11: location_prefix			[IQM]
mario@11: size_prefix			[XBWDL]
mario@11: direct_variable_standard	%{location_prefix}({size_prefix}?){integer}((.{integer})*)
mario@11: 
etisserant@0: 
etisserant@0: /* For the MatPLC, we will accept %<identifier>
etisserant@0:  * as a direct variable, this being mapped onto the MatPLC point
etisserant@0:  * named <identifier>
etisserant@0:  */
etisserant@0: /* TODO: we should not restrict it to only the accepted syntax
etisserant@0:  * of <identifier> as specified by the standard. MatPLC point names
etisserant@0:  * have a more permissive syntax.
etisserant@0:  *
etisserant@0:  * e.g. "P__234"
etisserant@0:  *    Is a valid MatPLC point name, but not a valid <identifier> !!
etisserant@0:  *    The same happens with names such as "333", "349+23", etc...
etisserant@0:  *    How can we handle these more expressive names in our case?
etisserant@0:  *    Remember that some direct variable may remain anonymous, with
etisserant@0:  *    declarations such as:
etisserant@0:  *    VAR
etisserant@0:  *       AT %I3 : BYTE := 255;
etisserant@0:  *    END_VAR
mario@11:  *    in which case we are currently using "%I3" as the variable
mario@11:  *    name.
mario@11:  */
msousa@547: /* direct_variable_matplc		%{identifier} */
msousa@547: /* direct_variable			{direct_variable_standard}|{direct_variable_matplc} */
msousa@547: direct_variable			{direct_variable_standard}
etisserant@0: 
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: incompl_location	%[IQM]\*
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: %%
etisserant@0: 	/* fprintf(stderr, "flex: state %d\n", YY_START); */
etisserant@0: 
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****   F I R S T    T H I N G S    F I R S T   *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 
mario@68: 	/***********************************************************/
mario@68: 	/* Handle requests sent by bison for flex to change state. */
mario@68: 	/***********************************************************/
mario@13: 	if (get_goto_body_state()) {
mario@68: 	  yy_push_state(body_state);
mario@13: 	  rst_goto_body_state();
mario@6: 	}
lbessard@3: 
mario@68: 	if (get_goto_sfc_qualifier_state()) {
mario@68: 	  yy_push_state(sfc_qualifier_state);
mario@68: 	  rst_goto_sfc_qualifier_state();
mario@68: 	}
mario@68: 
mario@86: 	if (get_goto_sfc_priority_state()) {
mario@86: 	  yy_push_state(sfc_priority_state);
mario@86: 	  rst_goto_sfc_priority_state();
mario@86: 	}
mario@86: 
mario@74: 	if (get_goto_task_init_state()) {
mario@74: 	  yy_push_state(task_init_state);
mario@74: 	  rst_goto_task_init_state();
mario@74: 	}
mario@74: 
mario@68: 	if (get_pop_state()) {
mario@68: 	  yy_pop_state();
mario@68: 	  rst_pop_state();
mario@68: 	}
mario@68: 
mario@68: 	/***************************/
etisserant@0: 	/* Handle the pragmas!     */
mario@68: 	/***************************/
etisserant@0: 
etisserant@0: 	/* We start off by searching for the pragmas we handle in the lexical parser. */
etisserant@0: <INITIAL>{file_include_pragma}	unput_text(0); yy_push_state(include_beg);
etisserant@0: 
msousa@267: 	/* Pragmas sent to syntax analyser (bison) */
mjsousa@1016: 	/* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks.
mjsousa@1016: 	 *       We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state.
mjsousa@1016: 	 *       This is because the pragmas are stored in a statement_list or instruction_list (in bison),
mjsousa@1016: 	 *       but these lists must start with the special tokens start_IL_body_token/start_ST_body_token.
mjsousa@1016: 	 *       This means that these special tokens must be generated (by the body_state) before processing
mjsousa@1016: 	 *       the pragme => we cannot process the trailing pragmas in the vardecl_list_state state.
mjsousa@1016: 	 */
mjsousa@1016: {disable_code_generation_pragma}				return disable_code_generation_pragma_token;
mjsousa@1016: {enable_code_generation_pragma}					return enable_code_generation_pragma_token;
mjsousa@1016: <vardecl_list_state>{disable_code_generation_pragma}/(VAR)	return disable_code_generation_pragma_token; 
mjsousa@1016: <vardecl_list_state>{enable_code_generation_pragma}/(VAR)	return enable_code_generation_pragma_token;  
mjsousa@1016: <body_state>{disable_code_generation_pragma}			append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
mjsousa@1016: <body_state>{enable_code_generation_pragma}			append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
etisserant@0: 	/* Any other pragma we find, we just pass it up to the syntax parser...   */
mario@68: 	/* Note that the <body_state> state is exclusive, so we have to include it here too. */
mjsousa@1016: <body_state>{pragma}					append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
etisserant@0: {pragma}	{/* return the pragmma without the enclosing '{' and '}' */
mjsousa@868: 		 int cut = yytext[1]=='{'?2:1;
Edouard@634: 		 yytext[strlen(yytext)-cut] = '\0';
Edouard@634: 		 yylval.ID=strdup(yytext+cut);
etisserant@0: 		 return pragma_token;
etisserant@0: 		}
mjsousa@1016: <vardecl_list_state>{pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */
Laurent@701: 		 int cut = yytext[1]=='{'?2:1;
mjsousa@866: 		 yytext[strlen(yytext)-cut] = '\0';
Laurent@701: 		 yylval.ID=strdup(yytext+cut);
etisserant@0: 		 return pragma_token;
etisserant@0: 		}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************************/
etisserant@0: 	/* Handle the file includes!     */
etisserant@0: 	/*********************************/
etisserant@0: <include_beg>{file_include_pragma_beg}	BEGIN(include_filename);
etisserant@0: 
etisserant@0: <include_filename>{file_include_pragma_filename}	{
msousa@756: 			  /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: 			  include_file(yytext);
etisserant@0: 			  /* switch to whatever state was active before the include file */
etisserant@0: 			  yy_pop_state();
etisserant@0: 			  /* now process the new file... */
etisserant@0: 			}
etisserant@0: 
etisserant@0: 
mjsousa@761: <<EOF>>			{     /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules
mjsousa@761: 			       *       when checking for syntax errors in the input source code.
mjsousa@761: 			       *       This means that in reality flex will be asked to carry on reading the input
mjsousa@761: 			       *       even after it has reached the end of all (including the main) input files.
mjsousa@761: 			       *       In other owrds, we will be called to return more tokens, even after we have
mjsousa@761: 			       *       already returned an END_OF_INPUT token. In this case, we must carry on returning
mjsousa@761: 			       *       more END_OF_INPUT tokens.
mjsousa@761: 			       * 
mjsousa@761: 			       *       However, in the above case we will be asked to carry on reading more tokens 
mjsousa@761: 			       *       from the main input file, after we have reached the end. For this to work
mjsousa@761: 			       *       correctly, we cannot close the main input file!
mjsousa@761: 			       * 
mjsousa@761: 			       *       This is why we WILL be called with include_stack_ptr == 0 multiple times,
mjsousa@761: 			       *       and why we must handle it as a special case
mjsousa@761: 			       *       that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again.
mjsousa@761: 			       * 
mjsousa@761: 			       *       As a corollory, flex can never safely close the main input file, and we must ask
mjsousa@761: 			       *       bison to close it!
mario@76: 			       */
mario@76: 			  if (include_stack_ptr == 0) {
mjsousa@761: 			      // fclose(yyin);           // Must not do this!!
mjsousa@879: 			      // FreeTracking(current_tracking); // Must not do this!!
mario@73: 			      /* yyterminate() terminates the scanner and returns a 0 to the 
mario@73: 			       * scanner's  caller, indicating "all done".
mario@73: 			       *	
mario@73: 			       * Our syntax parser (written with bison) has the token	
mario@73: 			       * END_OF_INPUT associated to the value 0, so even though
mario@73: 			       * we don't explicitly return the token END_OF_INPUT
mario@73: 			       * calling yyterminate() is equivalent to doing that. 
mario@73: 			       */ 	
etisserant@0: 			    yyterminate();
msousa@737: 			  } else {
mjsousa@761: 			    fclose(yyin);
mjsousa@879: 			    FreeTracking(current_tracking);
lbessard@136: 			    --include_stack_ptr;
etisserant@0: 			    yy_delete_buffer(YY_CURRENT_BUFFER);
etisserant@0: 			    yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
lbessard@136: 			    current_tracking = include_stack[include_stack_ptr].env;
etisserant@0: 			      /* removing constness of char *. This is safe actually,
etisserant@0: 			       * since the only real const char * that is stored on the stack is
etisserant@1: 			       * the first one (i.e. the one that gets stored in include_stack[0],
etisserant@0: 			       * which is never free'd!
etisserant@0: 			       */
msousa@286: 			    /* NOTE: We do __NOT__ free the malloc()'d memory since 
msousa@286: 			     *       pointers to this filename will be kept by many objects
msousa@286: 			     *       in the abstract syntax tree.
msousa@286: 			     *       This will later be used to provide correct error
msousa@286: 			     *       messages during semantic analysis (stage 3)
msousa@286: 			     */
msousa@286: 			    /* free((char *)current_filename); */
etisserant@0: 			    current_filename = include_stack[include_stack_ptr].filename;
etisserant@0: 			    yy_push_state(include_end);
etisserant@0: 			  }
etisserant@0: 			}
etisserant@0: 
etisserant@0: <include_end>{file_include_pragma_end}	yy_pop_state();
msousa@756: 	/* handle the artificial file includes created by include_string(), which do not end with a '}' */
msousa@756: <include_end>.				unput_text(0); yy_pop_state(); 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************************/
etisserant@0: 	/* Handle all the state changes! */
etisserant@0: 	/*********************************/
etisserant@0: 
mjsousa@866: 	/* INITIAL -> header_state */
etisserant@0: <INITIAL>{
mjsousa@1016: FUNCTION{st_whitespace} 		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION;
mjsousa@1016: FUNCTION_BLOCK{st_whitespace}		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK;
mjsousa@1016: PROGRAM{st_whitespace}			if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM;
mjsousa@1016: CONFIGURATION{st_whitespace}		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION;
mjsousa@1016: }
mjsousa@1016: 
mjsousa@1016: <get_pou_name_state>{
mjsousa@1016: {identifier}			BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token;
mjsousa@1016: .				BEGIN(ignore_pou_state); unput_text(0);
mjsousa@1016: }
mjsousa@1016: 
mjsousa@1016: <ignore_pou_state>{
mjsousa@1016: END_FUNCTION			unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_FUNCTION_BLOCK		unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_PROGRAM			unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_CONFIGURATION		unput_text(0); BEGIN(INITIAL);
mjsousa@1016: .|\n				{}/* Ignore text inside POU! (including the '\n' character!)) */
mjsousa@1016: }
mjsousa@1016: 
mjsousa@1016: 
mjsousa@1016: 	/* header_state -> (vardecl_list_state) */
mjsousa@1016: 	/* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain
mjsousa@1016: 	 *       at least one VAR_XXX block.
mjsousa@1016: 	 *      How about functions that do not declare variables, and go directly to the body_state???
etisserant@0: 	 *      - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
etisserant@0: 	 *        must have at least one input argument, so a correct declaration will have at least
etisserant@0: 	 *        one VAR_INPUT ... VAR_END construct!
etisserant@0: 	 *      - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
etisserant@0: 	 *        must have at least one input argument, so a correct declaration will have at least
etisserant@0: 	 *        one VAR_INPUT ... VAR_END construct!
etisserant@0: 	 *      - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
etisserant@0: 	 *        argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
etisserant@0: 	 *        construct!
etisserant@0: 	 *
etisserant@0: 	 *       All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
mario@68: 	 *       FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
etisserant@0: 	 *       If the code has an error, and no VAR_END before the body, we will simply
mjsousa@1016: 	 *       continue in the <vardecl_state> state, until the end of the FUNCTION, FUNCTION_BLOCK
etisserant@0: 	 *       or PROGAM.
mjsousa@1016: 	 * 
mjsousa@1016: 	 * WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function
mjsousa@1016: 	 *          may be declared with no Input, Output or IN_OUT variables. This means that the above 
mjsousa@1016: 	 *          assumption is no longer valid.
msousa@1057: 	 * 
msousa@1057: 	 * NOTE: Some code being parsed may be erroneous and not contain any VAR END_VAR block.
msousa@1057: 	 *       To generate error messages that make sense, the flex state machine should not get lost
msousa@1057: 	 *       in these situations. We therefore consider the possibility of finding 
msousa@1057: 	 *       END_FUNCTION, END_FUNCTION_BLOCK or END_PROGRAM when inside the header_state.
etisserant@0: 	 */
mjsousa@866: <header_state>{
mjsousa@868: VAR				| /* execute the next rule's action, i.e. fall-through! */
mjsousa@868: VAR_INPUT			|
mjsousa@868: VAR_OUTPUT			|
mjsousa@868: VAR_IN_OUT			|
mjsousa@868: VAR_EXTERNAL			|
mjsousa@868: VAR_GLOBAL			|
mjsousa@868: VAR_TEMP			|
mjsousa@868: VAR_CONFIG			|
msousa@1057: VAR_ACCESS			unput_text(0); BEGIN(vardecl_list_state);
msousa@1057: 
msousa@1057: END_FUNCTION			| /* execute the next rule's action, i.e. fall-through! */
msousa@1057: END_FUNCTION_BLOCK		| 
msousa@1057: END_PROGRAM			unput_text(0); BEGIN(vardecl_list_state); 
msousa@1057: 				/* Notice that we do NOT go directly to body_state, as that requires a push().
msousa@1057: 				 * If we were to puch to body_state here, then the corresponding pop() at the
msousa@1057: 				 *end of body_state would return to header_state.
msousa@1057: 				 * After this pop() header_state would not return to INITIAL as it should, but
msousa@1057: 				 * would instead enter an infitie loop push()ing again to body_state
msousa@1057: 				 */
mjsousa@868: }
mjsousa@868: 
mjsousa@868: 
mjsousa@868: 	/* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */
mjsousa@866: <vardecl_list_state>{
msousa@1065: 				/* NOTE: vardecl_list_state is an exclusive state, i.e. when in this state
msousa@1065: 				 *       default rules do not apply! This means that when in this state identifiers
msousa@1065: 				 *       are not recognised!
msousa@1065: 				 * NOTE: Notice that we only change to vardecl_state if the VAR*** is followed by 
msousa@1065: 				 *       at least one whitespace. This is to dintinguish the VAR declaration
msousa@1065: 				 *       from identifiers starting with 'var' (e.g. a variable named 'varint')
msousa@1065: 				 * NOTE: Notice that we cannot use st_whitespace here, as it can legally be empty.
msousa@1065: 				 *       We therefore use st_whitespace_char instead.
msousa@1065: 				 */  
msousa@1065: VAR_INPUT{st_whitespace_char}		| /* execute the next rule's action, i.e. fall-through! */
msousa@1065: VAR_OUTPUT{st_whitespace_char}		|
msousa@1065: VAR_IN_OUT{st_whitespace_char}		|
msousa@1065: VAR_EXTERNAL{st_whitespace_char}	|
msousa@1065: VAR_GLOBAL{st_whitespace_char}		|
msousa@1065: VAR_TEMP{st_whitespace_char}		|
msousa@1065: VAR_CONFIG{st_whitespace_char}		|
msousa@1065: VAR_ACCESS{st_whitespace_char}		|
msousa@1065: VAR{st_whitespace_char}			unput_text(0); yy_push_state(vardecl_state); //printf("\nChanging to vardecl_state\n");
msousa@1065: 
msousa@1065: END_FUNCTION{st_whitespace}		unput_text(0); BEGIN(INITIAL);
msousa@1065: END_FUNCTION_BLOCK{st_whitespace}	unput_text(0); BEGIN(INITIAL);
msousa@1065: END_PROGRAM{st_whitespace}		unput_text(0); BEGIN(INITIAL);
mjsousa@868: 
msousa@1055: 				/* NOTE: Handling of whitespace...
msousa@1055: 				 *   - Must come __before__ the next rule for any single character '.'
msousa@1055: 				 *   - If the rules were reversed, any whitespace with a single space (' ') 
msousa@1055: 				 *     would be handled by the '.' rule instead of the {whitespace} rule!
msousa@1055: 				 */
msousa@1055: {st_whitespace}			/* Eat any whitespace */ 
msousa@1055: 
msousa@1055: 				/* anything else, just change to body_state! */
msousa@1055: .				unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");
mjsousa@868: }
mjsousa@868: 
mjsousa@868: 
mjsousa@868: 	/* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */
mjsousa@866: <vardecl_state>{
mjsousa@948: END_VAR				yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */
mjsousa@866: }
mjsousa@866: 
etisserant@0: 
mjsousa@868: 	/* body_state -> (il_state | st_state | sfc_state) */
mario@68: <body_state>{
mjsousa@1020: {st_whitespace}			{/* In body state we do not process any tokens,
mjsousa@1020: 				  * we simply store them for later processing!
msousa@1055: 				  * NOTE: we must return ALL text when in body_state, including
msousa@1055: 				  * all comments and whitespace, so as not
msousa@1055: 				  * to lose track of the line_number and column number
msousa@1055: 				  * used when printing debugging messages.
msousa@1056: 				  * NOTE: some of the following rules depend on the fact that 
msousa@1055: 				  * the body state buffer is either empty or only contains white space up to
msousa@1056: 				  * that point. Since the vardecl_list_state will eat up all
msousa@1055: 				  * whitespace before entering the body_state, the contents of the bodystate_buffer
msousa@1056: 				  * will _never_ start with whitespace if the previous state was vardecl_list_state. 
msousa@1056: 				  * However, it is possible to enter the body_state from other states (e.g. when 
msousa@1056: 				  * parsing SFC code, that contains transitions or actions in other languages)
mjsousa@1020: 				  */
msousa@1056: 				 append_bodystate_buffer(yytext, 1 /* is whitespace */); 
mjsousa@1020: 				}
mjsousa@1016: 	/* 'INITIAL_STEP' always used in beginning of SFCs !! */
mjsousa@1016: INITIAL_STEP			{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(sfc_state);}
mjsousa@1016: 				  else					{append_bodystate_buffer(yytext);}
mjsousa@1016: 				}
mjsousa@1016:  
mjsousa@1016: 	/* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */
mjsousa@1016: :=				{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */
mjsousa@1016: 				  else				 	{append_bodystate_buffer(yytext);}
mjsousa@1016: 				}
mjsousa@1016:  
andrej@1031: 	/* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */
mjsousa@1016: END_ACTION			| /* execute the next rule's action, i.e. fall-through! */
mjsousa@1016: END_FUNCTION			|
mjsousa@1016: END_FUNCTION_BLOCK		|
andrej@1031: END_TRANSITION   		|
mjsousa@1016: END_PROGRAM			{ append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;}
mjsousa@1016: .|\n				{ append_bodystate_buffer(yytext);
mjsousa@1016: 				  if (strcmp(yytext, ";") == 0)
mjsousa@1016: 				    {unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;}
mjsousa@1016: 				}
mjsousa@1016: 	/* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */
mjsousa@1016: RETURN				| /* execute the next rule's action, i.e. fall-through! */
mjsousa@1016: IF				|
mjsousa@1016: CASE				|
mjsousa@1016: FOR				|
mjsousa@1016: WHILE				|
mjsousa@1016: EXIT				|
mjsousa@1016: REPEAT				{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(st_state); return start_ST_body_token;}
mjsousa@1016: 				  else				 	{append_bodystate_buffer(yytext);}
mjsousa@1016: 				}
mjsousa@1016: 
mario@68: }	/* end of body_state lexical parser */
lbessard@3: 
mjsousa@866: 
mjsousa@868: 	/* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@3: <il_state,st_state>{
lbessard@3: END_FUNCTION		yy_pop_state(); unput_text(0);
lbessard@3: END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
lbessard@3: END_PROGRAM		yy_pop_state(); unput_text(0);
lbessard@3: END_TRANSITION		yy_pop_state(); unput_text(0);
mario@6: END_ACTION		yy_pop_state(); unput_text(0);
lbessard@3: }
lbessard@3: 
mjsousa@868: 	/* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@4: <sfc_state>{
lbessard@4: END_FUNCTION		yy_pop_state(); unput_text(0);
lbessard@4: END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
lbessard@4: END_PROGRAM		yy_pop_state(); unput_text(0);
lbessard@4: }
lbessard@4: 
etisserant@0: 	/* config -> INITIAL */
etisserant@0: END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***************************************/
etisserant@0: 	/* Next is to to remove all whitespace */
etisserant@0: 	/***************************************/
etisserant@0: 	/* NOTE: pragmas are handled right at the beginning... */
etisserant@0: 
mjsousa@866: 	/* The whitespace */
msousa@1055: <INITIAL,header_state,config_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace}	/* Eat any whitespace */
mjsousa@866: <il_state>{il_whitespace}		/* Eat any whitespace */
mjsousa@1020:  /* NOTE: Due to the need of having the following rule have higher priority,
mjsousa@1020:   *        the following rule was moved to an earlier position in this file.
mjsousa@1020: <body_state>{st_whitespace}		{...}
mjsousa@1020:  */
mjsousa@866: 
mjsousa@866: 	/* The comments */
mjsousa@952: <get_pou_name_state,ignore_pou_state,body_state,vardecl_list_state>{comment_beg}		yy_push_state(comment_state);
mjsousa@867: {comment_beg}						yy_push_state(comment_state);
mjsousa@866: <comment_state>{
mjsousa@867: {comment_beg}						{if (get_opt_nested_comments()) yy_push_state(comment_state);}
mjsousa@867: {comment_end}						yy_pop_state();
mjsousa@867: .							/* Ignore text inside comment! */
mjsousa@867: \n							/* Ignore text inside comment! */
mjsousa@866: }
msousa@267: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B.1.1 Letters, digits and identifiers */
etisserant@0: 	/*****************************************/
etisserant@0: 	/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
etisserant@0: 	 *       On the other hand, the spec does not define them as keywords,
etisserant@0: 	 *       which means they may be re-used for variable names, etc...!
etisserant@0: 	 *       The syntax parser already caters for the possibility of these
etisserant@0: 	 *       tokens being used for variable names in their declarations.
etisserant@0: 	 *       When they are declared, they will be added to the variable symbol table!
etisserant@0: 	 *       Further appearances of these tokens must no longer be parsed
etisserant@0: 	 *       as R1_tokens etc..., but rather as variable_name_tokens!
etisserant@0: 	 *
etisserant@0: 	 *       That is why the first thing we do with identifiers, even before
etisserant@0: 	 *       checking whether they may be a 'keyword', is to check whether
etisserant@0: 	 *       they have been previously declared as a variable name,
etisserant@0: 	 *
mario@13: 	 *       However, we have a dilema! Should we here also check for
mario@13: 	 *       prev_declared_derived_function_name_token?
mario@13: 	 *       If we do, then the 'MOD' default library function (defined in
mario@13: 	 *       the standard) will always be returned as a function name, and
mario@13: 	 *       it will therefore not be possible to use it as an operator as 
mario@13: 	 *       in the following ST expression 'X := Y MOD Z;' !
mario@13: 	 *       If we don't, then even it will not be possible to use 'MOD'
mario@13: 	 *       as a funtion as in 'X := MOD(Y, Z);'
mario@13: 	 *       We solve this by NOT testing for function names here, and
mario@13: 	 *       handling this function and keyword clash in bison!
etisserant@0: 	 */
mjsousa@1016: 	/* NOTE: The following code has been commented out as most users do not want matiec
mjsousa@1016: 	 *       to allow the use of 'R1', 'IN' ... IL operators as identifiers, 
mjsousa@1016: 	 *       even though a literal reading of the standard allows this.
mjsousa@1016: 	 *       We could add this as a commadnd line option, but it is not yet done.
mjsousa@1016: 	 *       For now we just comment out the code, but leave it the commented code
mjsousa@1016: 	 *       in so we can re-activate quickly (without having to go through old commits
mjsousa@1016: 	 *       in the mercurial repository to figure out the missing code!
mjsousa@1016: 	 */
mario@83:  /*
etisserant@0: {identifier} 	{int token = get_identifier_token(yytext);
mario@81: 		 // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); 
etisserant@0: 		 if ((token == prev_declared_variable_name_token) ||
mario@13: //		     (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
etisserant@0: 		     (token == prev_declared_fb_name_token)) {
mario@83: 		 // if (token != identifier_token)
mario@83: 		 // * NOTE: if we replace the above uncommented conditions with
mario@13:                   *       the simple test of (token != identifier_token), then 
mario@13:                   *       'MOD' et al must be removed from the 
mario@13:                   *       library_symbol_table as a default function name!
mario@83: 		  * //
etisserant@0: 		   yylval.ID=strdup(yytext);
mario@81: 		   // fprintf(stderr, "returning token %d\n", token); 
etisserant@0: 		   return token;
etisserant@0: 		 }
mario@83: 		 // otherwise, leave it for the other lexical parser rules... 
mario@81: 		 // fprintf(stderr, "rejecting\n"); 
etisserant@0: 		 REJECT;
etisserant@0: 		}
mario@83:  */
etisserant@0: 
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****   N O W    D O   T H E   K E Y W O R D S   *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 
etisserant@0: 
mjsousa@934: REF	{if (get_opt_ref_standard_extensions()) return REF;        else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@934: DREF	{if (get_opt_ref_standard_extensions()) return DREF;       else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@934: REF_TO	{if (get_opt_ref_standard_extensions()) return REF_TO;     else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@934: NULL	{if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@873: 
mario@82: EN	return EN;			/* Keyword */
mario@82: ENO	return ENO;			/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************/
etisserant@0: 	/* B 1.2.1 - Numeric Literals */
etisserant@0: 	/******************************/
mario@82: TRUE		return TRUE;		/* Keyword */
msousa@257: BOOL#1  	return boolean_true_literal_token;
msousa@257: BOOL#TRUE	return boolean_true_literal_token;
msousa@257: SAFEBOOL#1	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
msousa@257: SAFEBOOL#TRUE	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: 
mario@82: FALSE		return FALSE;		/* Keyword */
msousa@257: BOOL#0  	return boolean_false_literal_token;
msousa@257: BOOL#FALSE  	return boolean_false_literal_token;
msousa@257: SAFEBOOL#0	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
msousa@257: SAFEBOOL#FALSE	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************/
etisserant@0: 	/* B 1.2.3.1 - Duration */
etisserant@0: 	/************************/
mario@82: t#		return T_SHARP;		/* Delimiter */
mario@82: T#		return T_SHARP;		/* Delimiter */
mario@82: TIME		return TIME;		/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************************/
etisserant@0: 	/* B 1.2.3.2 - Time of day and Date */
etisserant@0: 	/************************************/
mario@82: TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
mario@82: TOD		return TIME_OF_DAY;	/* Keyword (Data Type) */
mario@82: DATE		return DATE;		/* Keyword (Data Type) */
mario@82: d#		return D_SHARP;		/* Delimiter */
mario@82: D#		return D_SHARP;		/* Delimiter */
mario@82: DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
mario@82: DT		return DATE_AND_TIME;	/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***********************************/
etisserant@0: 	/* B 1.3.1 - Elementary Data Types */
etisserant@0: 	/***********************************/
msousa@257: BOOL		return BOOL;		/* Keyword (Data Type) */
msousa@257: 
mario@82: BYTE		return BYTE;		/* Keyword (Data Type) */
mario@82: WORD		return WORD;		/* Keyword (Data Type) */
mario@82: DWORD		return DWORD;		/* Keyword (Data Type) */
mario@82: LWORD		return LWORD;		/* Keyword (Data Type) */
etisserant@0: 
msousa@257: SINT		return SINT;		/* Keyword (Data Type) */
msousa@257: INT		return INT;		/* Keyword (Data Type) */
msousa@257: DINT		return DINT;		/* Keyword (Data Type) */
msousa@257: LINT		return LINT;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: USINT		return USINT;		/* Keyword (Data Type) */
msousa@257: UINT		return UINT;		/* Keyword (Data Type) */
msousa@257: UDINT		return UDINT;		/* Keyword (Data Type) */
msousa@257: ULINT		return ULINT;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: REAL		return REAL;		/* Keyword (Data Type) */
msousa@257: LREAL		return LREAL;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: WSTRING		return WSTRING;		/* Keyword (Data Type) */
msousa@257: STRING		return STRING;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: TIME		return TIME;		/* Keyword (Data Type) */
msousa@257: DATE		return DATE;		/* Keyword (Data Type) */
msousa@257: DT		return DT;		/* Keyword (Data Type) */
msousa@257: TOD		return TOD;		/* Keyword (Data Type) */
msousa@257: DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
msousa@257: TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
msousa@257: 
mjsousa@1014: 					/* A non-standard extension! */
mjsousa@1014: VOID		{if (runtime_options.allow_void_datatype) {return VOID;}          else {REJECT;}} 
mjsousa@1014: 
mjsousa@1014: 
msousa@257: 	/*****************************************************************/
msousa@257: 	/* Keywords defined in "Safety Software Technical Specification" */
msousa@257: 	/*****************************************************************/
msousa@257:         /* 
msousa@257:          * NOTE: The following keywords are define in 
msousa@257:          *       "Safety Software Technical Specification,
msousa@257:          *        Part 1: Concepts and Function Blocks,  
msousa@257:          *        Version 1.0 – Official Release"
msousa@257:          *        written by PLCopen - Technical Committee 5
msousa@257:          *
msousa@257:          *        We only support these extensions and keywords
msousa@257:          *        if the apropriate command line option is given.
msousa@257:          */
msousa@257: SAFEBOOL	     {if (get_opt_safe_extensions()) {return SAFEBOOL;}          else {REJECT;}} 
msousa@257: 
msousa@257: SAFEBYTE	     {if (get_opt_safe_extensions()) {return SAFEBYTE;}          else {REJECT;}} 
msousa@257: SAFEWORD	     {if (get_opt_safe_extensions()) {return SAFEWORD;}          else {REJECT;}} 
msousa@257: SAFEDWORD	     {if (get_opt_safe_extensions()) {return SAFEDWORD;}         else{REJECT;}}
msousa@257: SAFELWORD	     {if (get_opt_safe_extensions()) {return SAFELWORD;}         else{REJECT;}}
msousa@257:                
msousa@257: SAFEREAL	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
msousa@257: SAFELREAL    	     {if (get_opt_safe_extensions()) {return SAFELREAL;}         else{REJECT;}}
msousa@257:                   
msousa@257: SAFESINT	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
msousa@257: SAFEINT	             {if (get_opt_safe_extensions()) {return SAFEINT;}           else{REJECT;}}
msousa@257: SAFEDINT	     {if (get_opt_safe_extensions()) {return SAFEDINT;}          else{REJECT;}}
msousa@257: SAFELINT             {if (get_opt_safe_extensions()) {return SAFELINT;}          else{REJECT;}}
msousa@257: 
msousa@257: SAFEUSINT            {if (get_opt_safe_extensions()) {return SAFEUSINT;}         else{REJECT;}}
msousa@257: SAFEUINT             {if (get_opt_safe_extensions()) {return SAFEUINT;}          else{REJECT;}}
msousa@257: SAFEUDINT            {if (get_opt_safe_extensions()) {return SAFEUDINT;}         else{REJECT;}}
msousa@257: SAFEULINT            {if (get_opt_safe_extensions()) {return SAFEULINT;}         else{REJECT;}}
msousa@257: 
msousa@257:  /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
msousa@257:  /*  so it is best not to support them at all... */
msousa@257:  /*
msousa@257: SAFEWSTRING          {if (get_opt_safe_extensions()) {return SAFEWSTRING;}       else{REJECT;}}
msousa@257: SAFESTRING           {if (get_opt_safe_extensions()) {return SAFESTRING;}        else{REJECT;}}
msousa@257:  */
msousa@257: 
msousa@257: SAFETIME             {if (get_opt_safe_extensions()) {return SAFETIME;}          else{REJECT;}}
msousa@257: SAFEDATE             {if (get_opt_safe_extensions()) {return SAFEDATE;}          else{REJECT;}}
msousa@257: SAFEDT               {if (get_opt_safe_extensions()) {return SAFEDT;}            else{REJECT;}}
msousa@257: SAFETOD              {if (get_opt_safe_extensions()) {return SAFETOD;}           else{REJECT;}}
msousa@257: SAFEDATE_AND_TIME    {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
msousa@257: SAFETIME_OF_DAY      {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;}   else{REJECT;}}
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.3.2 - Generic data types */
etisserant@0: 	/********************************/
etisserant@0: 	/* Strangely, the following symbols do not seem to be required! */
etisserant@0: 	/* But we include them so they become reserved words, and do not
etisserant@0: 	 * get passed up to bison as an identifier...
etisserant@0: 	 */
mario@82: ANY		return ANY;		/* Keyword (Data Type) */
mario@82: ANY_DERIVED	return ANY_DERIVED;	/* Keyword (Data Type) */
mario@82: ANY_ELEMENTARY	return ANY_ELEMENTARY;	/* Keyword (Data Type) */
mario@82: ANY_MAGNITUDE	return ANY_MAGNITUDE;	/* Keyword (Data Type) */
mario@82: ANY_NUM		return ANY_NUM;		/* Keyword (Data Type) */
mario@82: ANY_REAL	return ANY_REAL;	/* Keyword (Data Type) */
mario@82: ANY_INT		return ANY_INT;		/* Keyword (Data Type) */
mario@82: ANY_BIT		return ANY_BIT;		/* Keyword (Data Type) */
mario@82: ANY_STRING	return ANY_STRING;	/* Keyword (Data Type) */
mario@82: ANY_DATE	return ANY_DATE;	/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.3.3 - Derived data types */
etisserant@0: 	/********************************/
mario@82: ":="		return ASSIGN;		/* Delimiter */
mario@82: ".."		return DOTDOT;		/* Delimiter */
mario@82: TYPE		return TYPE;		/* Keyword */
mario@82: END_TYPE	return END_TYPE;	/* Keyword */
mario@82: ARRAY		return ARRAY;		/* Keyword */
mario@82: OF		return OF;		/* Keyword */
mario@82: STRUCT		return STRUCT;		/* Keyword */
mario@82: END_STRUCT	return END_STRUCT;	/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************/
etisserant@0: 	/* B 1.4 - Variables */
etisserant@0: 	/*********************/
etisserant@0: 
etisserant@0: 	/******************************************/
etisserant@0: 	/* B 1.4.3 - Declaration & Initialisation */
etisserant@0: 	/******************************************/
mario@82: VAR_INPUT	return VAR_INPUT;	/* Keyword */
mario@82: VAR_OUTPUT	return VAR_OUTPUT;	/* Keyword */
mario@82: VAR_IN_OUT	return VAR_IN_OUT;	/* Keyword */
mario@82: VAR_EXTERNAL	return VAR_EXTERNAL;	/* Keyword */
mario@82: VAR_GLOBAL	return VAR_GLOBAL;	/* Keyword */
mario@82: END_VAR		return END_VAR;		/* Keyword */
mario@82: RETAIN		return RETAIN;		/* Keyword */
mario@82: NON_RETAIN	return NON_RETAIN;	/* Keyword */
mario@82: R_EDGE		return R_EDGE;		/* Keyword */
mario@82: F_EDGE		return F_EDGE;		/* Keyword */
mario@82: AT		return AT;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***********************/
etisserant@0: 	/* B 1.5.1 - Functions */
etisserant@0: 	/***********************/
mjsousa@1010: 	/* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command.
mjsousa@1016: 	 *       This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: 	 *       flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: 	 *       after changing to the body_state.
mjsousa@1010: 	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
mjsousa@1010: 	 *       the input stream even in the presence of buggy code!
mjsousa@1010: 	 */
mjsousa@1010: FUNCTION			return FUNCTION;			/* Keyword */
mjsousa@1010: END_FUNCTION	BEGIN(INITIAL);	return END_FUNCTION;			/* Keyword */  /* see Note above */
mjsousa@1010: VAR				return VAR;				/* Keyword */
mjsousa@1010: CONSTANT			return CONSTANT;			/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************/
etisserant@0: 	/* B 1.5.2 - Function Blocks */
etisserant@0: 	/*****************************/
mjsousa@1010: 	/* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command.
mjsousa@1016: 	 *       This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: 	 *       flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: 	 *       after changing to the body_state.
mjsousa@1010: 	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
mjsousa@1010: 	 *       the input stream even in the presence of buggy code!
mjsousa@1010: 	 */
mjsousa@1010: FUNCTION_BLOCK				return FUNCTION_BLOCK;		/* Keyword */
mjsousa@1010: END_FUNCTION_BLOCK	BEGIN(INITIAL);	return END_FUNCTION_BLOCK;	/* Keyword */  /* see Note above */
mjsousa@1010: VAR_TEMP				return VAR_TEMP;		/* Keyword */
mjsousa@1010: VAR					return VAR;			/* Keyword */
mjsousa@1010: NON_RETAIN				return NON_RETAIN;		/* Keyword */
mjsousa@1010: END_VAR					return END_VAR;			/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/**********************/
etisserant@0: 	/* B 1.5.3 - Programs */
etisserant@0: 	/**********************/
mjsousa@1010: 	/* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command.
mjsousa@1016: 	 *       This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: 	 *       flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: 	 *       after changing to the body_state.
mjsousa@1010: 	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
mjsousa@1010: 	 *       the input stream even in the presence of buggy code!
mjsousa@1010: 	 */
mjsousa@1010: PROGRAM				return PROGRAM;				/* Keyword */
mjsousa@1010: END_PROGRAM	BEGIN(INITIAL);	return END_PROGRAM;			/* Keyword */  /* see Note above */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************/
etisserant@0: 	/* B 1.6 Sequential Function Chart elements */
etisserant@0: 	/********************************************/
etisserant@0: 	/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
etisserant@0: 	.* as other identifiers that may be used as variable names inside IL and ST programs.
etisserant@0: 	 * They will have to be handled when we include parsing of SFC... For now, simply
etisserant@0: 	 * ignore them!
etisserant@0: 	 */
etisserant@1: 	 
mario@82: ACTION		return ACTION;			/* Keyword */
mario@82: END_ACTION	return END_ACTION;		/* Keyword */
mario@82: 
mario@82: TRANSITION	return TRANSITION;		/* Keyword */
mario@82: END_TRANSITION	return END_TRANSITION;		/* Keyword */
mario@82: FROM		return FROM;			/* Keyword */
mario@82: TO		return TO;			/* Keyword */
mario@82: 
mario@82: INITIAL_STEP	return INITIAL_STEP;		/* Keyword */
mario@82: STEP		return STEP;			/* Keyword */
mario@82: END_STEP	return END_STEP;		/* Keyword */
etisserant@0: 
mario@74: 	/* PRIORITY is not a keyword, so we only return it when 
mario@74: 	 * it is explicitly required and we are not expecting any identifiers
mario@74: 	 * that could also use the same letter sequence (i.e. an identifier: piority)
mario@74: 	 */
mario@86: <sfc_priority_state>PRIORITY	return PRIORITY;
mario@74: 
mario@68: <sfc_qualifier_state>{
etisserant@0: L		return L;
etisserant@0: D		return D;
etisserant@0: SD		return SD;
etisserant@0: DS		return DS;
etisserant@0: SL		return SL;
etisserant@0: N		return N;
etisserant@0: P		return P;
Laurent@627: P0		return P0;
Laurent@627: P1		return P1;
etisserant@0: R		return R;
etisserant@0: S		return S;
etisserant@1: }
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.7 Configuration elements */
etisserant@0: 	/********************************/
mjsousa@1010: 	/* Note: The following END_CONFIGURATION rule will never get to be used, as we have
mjsousa@1010: 	 *       another identical rule above (closer to the rules handling the transitions
mjsousa@1010: 	 *       of the main state machine) that will always execute before this one.
mjsousa@1010: 	 * Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command.
mjsousa@1010: 	 *       This is nt strictly necessary, but I place it here so it follwos the same
mjsousa@1010: 	 *       pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK
mjsousa@1010: 	 */
mjsousa@1010: CONFIGURATION				return CONFIGURATION;		/* Keyword */
mjsousa@1010: END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;	/* Keyword */   /* see 2 Notes above! */
mjsousa@1010: TASK					return TASK;			/* Keyword */
mjsousa@1010: RESOURCE				return RESOURCE;		/* Keyword */
mjsousa@1010: ON					return ON;			/* Keyword */
mjsousa@1010: END_RESOURCE				return END_RESOURCE;		/* Keyword */
mjsousa@1010: VAR_CONFIG				return VAR_CONFIG;		/* Keyword */
mjsousa@1010: VAR_ACCESS				return VAR_ACCESS;		/* Keyword */
mjsousa@1010: END_VAR					return END_VAR;			/* Keyword */
mjsousa@1010: WITH					return WITH;			/* Keyword */
mjsousa@1010: PROGRAM					return PROGRAM;			/* Keyword */
mjsousa@1010: RETAIN					return RETAIN;			/* Keyword */
mjsousa@1010: NON_RETAIN				return NON_RETAIN;		/* Keyword */
mjsousa@1010: READ_WRITE				return READ_WRITE;		/* Keyword */
mjsousa@1010: READ_ONLY				return READ_ONLY;		/* Keyword */
mario@74: 
mario@74: 	/* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when 
mario@74: 	 * it is explicitly required and we are not expecting any identifiers
mario@74: 	 * that could also use the same letter sequence (i.e. an identifier: piority, ...)
mario@74: 	 */
mario@74: <task_init_state>{
etisserant@0: PRIORITY		return PRIORITY;
etisserant@0: SINGLE			return SINGLE;
etisserant@0: INTERVAL		return INTERVAL;
mario@74: }
etisserant@0: 
etisserant@0: 	/***********************************/
etisserant@0: 	/* B 2.1 Instructions and Operands */
etisserant@0: 	/***********************************/
lbessard@3: <il_state>\n		return EOL;
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*******************/
etisserant@0: 	/* B 2.2 Operators */
etisserant@0: 	/*******************/
etisserant@0: 	/* NOTE: we can't have flex return the same token for
etisserant@0: 	 *       ANDN and &N, neither for AND and &, since
etisserant@0: 	 *       AND and ANDN are considered valid variable
etisserant@0: 	 *       function or functionblock type names!
etisserant@0: 	 *       This means that the parser may decide that the
etisserant@0: 	 *       AND or ANDN strings found in the source code
etisserant@0: 	 *       are being used as variable names
etisserant@0: 	 *       and not as operators, and will therefore transform
etisserant@0: 	 *       these tokens into indentifier tokens!
etisserant@0: 	 *       We can't have the parser thinking that the source
etisserant@0: 	 *       code contained the string AND (which may be interpreted
etisserant@0: 	 *       as a vairable name) when in reality the source code
etisserant@0: 	 *       merely contained the character &, so we use two
etisserant@0: 	 *       different tokens for & and AND (and similarly
etisserant@0: 	 *       ANDN and &N)!
etisserant@0: 	 */
mario@68:  /* The following tokens clash with ST expression operators and Standard Functions */
mario@73:  /* They are also keywords! */
mario@82: AND		return AND;		/* Keyword */
mario@82: MOD		return MOD;		/* Keyword */
mario@82: OR		return OR;		/* Keyword */
mario@82: XOR		return XOR;		/* Keyword */
mario@82: NOT		return NOT;		/* Keyword */
mario@68: 
mario@68:  /* The following tokens clash with Standard Functions */
mario@82:  /* They are keywords because they are a function name */
mario@73: <il_state>{
mario@82: ADD		return ADD;		/* Keyword (Standard Function) */
mario@82: DIV		return DIV;		/* Keyword (Standard Function) */
mario@82: EQ		return EQ;		/* Keyword (Standard Function) */
mario@82: GE		return GE;		/* Keyword (Standard Function) */
mario@82: GT		return GT;		/* Keyword (Standard Function) */
mario@82: LE		return LE;		/* Keyword (Standard Function) */
mario@82: LT		return LT;		/* Keyword (Standard Function) */
mario@82: MUL		return MUL;		/* Keyword (Standard Function) */
mario@82: NE		return NE;		/* Keyword (Standard Function) */
mario@82: SUB		return SUB;		/* Keyword (Standard Function) */
mario@73: }
mario@68: 
mario@68:  /* The following tokens clash with SFC action qualifiers */
mario@82:  /* They are not keywords! */
mario@73: <il_state>{
mario@68: S		return S;
mario@68: R		return R;
mario@73: }
mario@68: 
mario@68:  /* The following tokens clash with ST expression operators */
mario@82: &		return AND2;		/* NOT a Delimiter! */
mario@68: 
mario@68:  /* The following tokens have no clashes */
mario@82:  /* They are not keywords! */
mario@73: <il_state>{
etisserant@0: LD		return LD;
etisserant@0: LDN		return LDN;
etisserant@0: ST		return ST;
etisserant@0: STN		return STN;
etisserant@0: S1		return S1;
etisserant@0: R1		return R1;
etisserant@0: CLK		return CLK;
etisserant@0: CU		return CU;
etisserant@0: CD		return CD;
etisserant@0: PV		return PV;
etisserant@0: IN		return IN;
etisserant@0: PT		return PT;
etisserant@0: ANDN		return ANDN;
etisserant@0: &N		return ANDN2;
etisserant@0: ORN		return ORN;
etisserant@0: XORN		return XORN;
etisserant@0: CAL		return CAL;
etisserant@0: CALC		return CALC;
etisserant@0: CALCN		return CALCN;
etisserant@0: RET		return RET;
etisserant@0: RETC		return RETC;
etisserant@0: RETCN		return RETCN;
etisserant@0: JMP		return JMP;
etisserant@0: JMPC		return JMPC;
etisserant@0: JMPCN		return JMPCN;
mario@73: }
etisserant@0: 
etisserant@0: 	/***********************/
etisserant@0: 	/* B 3.1 - Expressions */
etisserant@0: 	/***********************/
mario@82: "**"		return OPER_EXP;	/* NOT a Delimiter! */
mario@82: "<>"		return OPER_NE;		/* NOT a Delimiter! */
mario@82: ">="		return OPER_GE;		/* NOT a Delimiter! */
mario@82: "<="		return OPER_LE;		/* NOT a Delimiter! */
mario@82: &		return AND2;		/* NOT a Delimiter! */
mario@82: AND		return AND;		/* Keyword */
mario@82: XOR		return XOR;		/* Keyword */
mario@82: OR		return OR;		/* Keyword */
mario@82: NOT		return NOT;		/* Keyword */
mario@82: MOD		return MOD;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B 3.2.2 Subprogram Control Statements */
etisserant@0: 	/*****************************************/
mario@82: :=		return ASSIGN;		/* Delimiter */
mario@82: =>		return SENDTO;		/* Delimiter */
mario@82: RETURN		return RETURN;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 3.2.3 Selection Statements */
etisserant@0: 	/********************************/
mario@82: IF		return IF;		/* Keyword */
mario@82: THEN		return THEN;		/* Keyword */
mario@82: ELSIF		return ELSIF;		/* Keyword */
mario@82: ELSE		return ELSE;		/* Keyword */
mario@82: END_IF		return END_IF;		/* Keyword */
mario@82: 
mario@82: CASE		return CASE;		/* Keyword */
mario@82: OF		return OF;		/* Keyword */
mario@82: ELSE		return ELSE;		/* Keyword */
mario@82: END_CASE	return END_CASE;	/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 3.2.4 Iteration Statements */
etisserant@0: 	/********************************/
mario@82: FOR		return FOR;		/* Keyword */
mario@82: TO		return TO;		/* Keyword */
mario@82: BY		return BY;		/* Keyword */
mario@82: DO		return DO;		/* Keyword */
mario@82: END_FOR		return END_FOR;		/* Keyword */
mario@82: 
mario@82: WHILE		return WHILE;		/* Keyword */
mario@82: DO		return DO;		/* Keyword */
mario@82: END_WHILE	return END_WHILE;	/* Keyword */
mario@82: 
mario@82: REPEAT		return REPEAT;		/* Keyword */
mario@82: UNTIL		return UNTIL;		/* Keyword */
mario@82: END_REPEAT	return END_REPEAT;	/* Keyword */
mario@82: 
mario@82: EXIT		return EXIT;		/* Keyword */
etisserant@0: 
etisserant@0: 
msousa@257: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****  N O W    W O R K    W I T H    V A L U E S  *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************/
etisserant@0: 	/* B.1.4.1   Directly Represented Variables */
etisserant@0: 	/********************************************/
lbessard@175: {direct_variable}   {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************************/
etisserant@0: 	/* B 1.4.3 - Declaration & Initialisation */
etisserant@0: 	/******************************************/
etisserant@0: {incompl_location}	{yylval.ID=strdup(yytext); return incompl_location_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************/
etisserant@0: 	/* B 1.2.3.1 - Duration */
etisserant@0: 	/************************/
etisserant@0: {fixed_point}		{yylval.ID=strdup(yytext); return fixed_point_token;}
msousa@547: {interval}		{/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
msousa@547: {erroneous_interval}	{return erroneous_interval_token;}
msousa@547: 
msousa@547: <time_literal_state>{
msousa@547: {integer}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
msousa@547: {integer}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
msousa@547: {integer}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
msousa@547: {integer}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
msousa@547: {integer}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
msousa@547: {fixed_point}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
msousa@547: {fixed_point}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
msousa@547: {fixed_point}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
msousa@547: {fixed_point}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
msousa@547: {fixed_point}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
msousa@547: 
msousa@547: _			/* do nothing - eat it up!*/
msousa@616: \#			{/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
msousa@616: .			{/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
msousa@547: \n			{ERROR;}
msousa@547: }
etisserant@0: 	/*******************************/
etisserant@0: 	/* B.1.2.2   Character Strings */
etisserant@0: 	/*******************************/
etisserant@0: {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
etisserant@0: {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************/
etisserant@0: 	/* B.1.2.1   Numeric literals */
etisserant@0: 	/******************************/
etisserant@0: {integer}		{yylval.ID=strdup(yytext); return integer_token;}
etisserant@0: {real}			{yylval.ID=strdup(yytext); return real_token;}
etisserant@0: {binary_integer}	{yylval.ID=strdup(yytext); return binary_integer_token;}
etisserant@0: {octal_integer} 	{yylval.ID=strdup(yytext); return octal_integer_token;}
etisserant@0: {hex_integer} 		{yylval.ID=strdup(yytext); return hex_integer_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B.1.1 Letters, digits and identifiers */
etisserant@0: 	/*****************************************/
mjsousa@866: <st_state>{identifier}/({st_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
mjsousa@866: <il_state>{identifier}/({il_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
etisserant@0: {identifier} 				{yylval.ID=strdup(yytext);
mario@75: 					 // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
etisserant@0: 					 return get_identifier_token(yytext);}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****   T H E    L E F T O V E R S . . .   *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 
etisserant@0: 	/* do the single character tokens...
etisserant@0: 	 *
etisserant@0: 	 *  e.g.:  ':'  '('  ')'  '+'  '*'  ...
etisserant@0: 	 */
etisserant@0: .	{return yytext[0];}
etisserant@0: 
etisserant@0: 
etisserant@0: %%
etisserant@0: 
etisserant@0: 
msousa@757: /*************************/
msousa@757: /* Tracking Functions... */
msousa@757: /*************************/
msousa@757: 
mjsousa@880: #define MAX_LINE_LENGTH 1024
msousa@757: 
msousa@757: tracking_t *GetNewTracking(FILE* in_file) {
msousa@757:   tracking_t* new_env = new tracking_t;
msousa@1055:   new_env->eof         = 0;
msousa@1055:   new_env->lineNumber  = 1;
msousa@757:   new_env->currentChar = 0;
msousa@1055:   new_env->lineLength  = 0;
msousa@757:   new_env->currentTokenStart = 0;
msousa@757:   new_env->in_file = in_file;
msousa@757:   return new_env;
msousa@757: }
msousa@757: 
msousa@757: 
mjsousa@879: void FreeTracking(tracking_t *tracking) {
mjsousa@879:   delete tracking;
mjsousa@879: }
mjsousa@879: 
mjsousa@879: 
msousa@1055: void UpdateTracking(const char *text) {
msousa@1055:   const char *newline, *token = text;
msousa@1055:   while ((newline = strchr(token, '\n')) != NULL) {
msousa@1055:     token = newline + 1;
msousa@1055:     current_tracking->lineNumber++;
msousa@1055:     current_tracking->currentChar = 1;
msousa@1055:   }
msousa@1055:   current_tracking->currentChar += strlen(token);
msousa@1055: }
msousa@1055: 
msousa@1055: 
msousa@757: /* GetNextChar: reads a character from input */
msousa@757: int GetNextChar(char *b, int maxBuffer) {
msousa@1055:   int res = fgetc(current_tracking->in_file);
msousa@1055:   if ( res == EOF ) 
msousa@757:     return 0;
msousa@1055:   *b = (char)res;
msousa@1055:   return 1;
msousa@1055: }
msousa@757: 
msousa@757: 
msousa@757: 
etisserant@0: /***********************************/
etisserant@0: /* Utility function definitions... */
etisserant@0: /***********************************/
etisserant@0: 
etisserant@0: /* print the include file stack to stderr... */
etisserant@0: void print_include_stack(void) {
etisserant@0:   int i;
etisserant@0: 
etisserant@0:   if ((include_stack_ptr - 1) >= 0)
etisserant@0:     fprintf (stderr, "in file "); 
etisserant@0:   for (i = include_stack_ptr - 1; i >= 0; i--)
lbessard@136:     fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
etisserant@0: }
etisserant@0: 
etisserant@0: 
msousa@756: 
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: void handle_include_file_(FILE *filehandle, const char *filename) {
msousa@756:   if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
msousa@756:     fprintf(stderr, "Includes nested too deeply\n");
msousa@756:     exit( 1 );
msousa@756:   }
msousa@756:   
msousa@756:   yyin = filehandle;
msousa@756:   
msousa@756:   include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
msousa@756:   include_stack[include_stack_ptr].env = current_tracking;
msousa@756:   include_stack[include_stack_ptr].filename = current_filename;
msousa@756:   
msousa@756:   current_filename = strdup(filename);
msousa@756:   current_tracking = GetNewTracking(yyin);
msousa@756:   include_stack_ptr++;
msousa@756: 
msousa@756:   /* switch input buffer to new file... */
msousa@756:   yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: /* insert the code (in <source_code>) into the source code we are parsing.
msousa@756:  * This is done by creating an artificial file with that new source code, and then 'including' the file
msousa@756:  */
msousa@757: void include_string_(const char *source_code) {
msousa@756:   FILE *tmp_file = tmpfile();
msousa@756:   
msousa@756:   if(tmp_file == NULL) {
msousa@756:     perror("Error creating temp file.");
msousa@756:     exit(EXIT_FAILURE);
msousa@756:   }
msousa@756: 
msousa@756:   fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
msousa@756:   rewind(tmp_file);
msousa@756: 
msousa@756:   /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
msousa@756:   handle_include_file_(tmp_file, "");
msousa@756: //fclose(tmp_file);  /* do NOT close file. It must only be closed when we finish reading from it! */
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
msousa@756: void include_file(const char *filename) {
msousa@756:   FILE *filehandle = NULL;
msousa@756:   
msousa@756:   for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
msousa@756:     char *full_name;
msousa@756:     full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
msousa@756:     if (full_name == NULL) {
msousa@756:       fprintf(stderr, "Out of memory!\n");
msousa@756:       exit( 1 );
msousa@756:     }
msousa@756:     filehandle = fopen(full_name, "r");
msousa@756:     free(full_name);
msousa@756:   }
msousa@756: 
msousa@756:   if (NULL == filehandle) {
msousa@756:     fprintf(stderr, "Error opening included file %s\n", filename);
msousa@756:     exit( 1 );
msousa@756:   }
msousa@756: 
msousa@756:   /* now process the new file... */
msousa@756:   handle_include_file_(filehandle, filename);
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@1055: /* return the specified character to the input stream */
msousa@1055: /* WARNING: this function destroys the contents of yytext */
msousa@1055: void unput_char(const char c) {
msousa@1055:   /* NOTE: The following uncomented code is not necessary as we currently use a different algorithm:
msousa@1055:    *          - make a backup/snapshot of the current tracking data (in previous_tracking variable)
msousa@1055:    *             (done in YY_USER_ACTION)
msousa@1055:    *          - restore the previous tracking state when we unput any text...
msousa@1055:    *             (in unput_text() and unput_and_mark() )
msousa@1055:    */
msousa@1055: //   /* We will later be processing this same character again when it is read from the input strem,
msousa@1055: //    * and therefore we will be incrementing the line number and character column acordingly.
msousa@1055: //    * We must therefore try to 'undo' the changes to the line number and character column
msousa@1055: //    * so this character is not counted twice!
msousa@1055: //    */
msousa@1055: //   if        (c == '\n') {
msousa@1055: //     current_tracking->lineNumber--;
msousa@1055: //     /* We should now set the current_tracking->currentChar to the length of the previous line
msousa@1055: //      * But we currently have no way of knowing it, so we simply set it to 0.
msousa@1055: //      * I (msousa) don't think this is currently an issue because I don't believe the code
msousa@1055: //      * ever calls unput_char() with a '\n', so we leave it for now
msousa@1055: //      */
msousa@1055: //     current_tracking->currentChar = 0;
msousa@1055: //   } else if (current_tracking->currentChar > 0) {
msousa@1055: //     current_tracking->currentChar--;
msousa@1055: //   }
msousa@1055: 
msousa@1055:   unput(c); // unput() destroys the contents of yytext !!
msousa@1055: }
msousa@756: 
msousa@756: 
etisserant@0: /* return all the text in the current token back to the input stream, except the first n chars. */
msousa@1055: void unput_text(int n) {
msousa@1055:   if (n < 0) ERROR;
msousa@1055:   signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !!
msousa@1055: 
msousa@1055:   char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
msousa@1055:   for (int i = yyleng-1; i >= n; i--)
msousa@1055:     unput_char(yycopy[i]);
msousa@1055: 
msousa@1055:   *current_tracking = previous_tracking;
msousa@1055:   yycopy[n] = '\0';
msousa@1055:   UpdateTracking(yycopy);
lbessard@136:   
msousa@1055:   free(yycopy);
msousa@1055: }
msousa@1055: 
etisserant@0: 
etisserant@0: 
msousa@547: /* return all the text in the current token back to the input stream, 
msousa@547:  * but first return to the stream an additional character to mark the end of the token. 
msousa@547:  */
msousa@1055: void unput_and_mark(const char mark_char) {
msousa@1055:   char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
msousa@1055:   unput_char(mark_char);
msousa@547:   for (int i = yyleng-1; i >= 0; i--)
msousa@1055:     unput_char(yycopy[i]);
msousa@547: 
msousa@547:   free(yycopy);
msousa@1055:   *current_tracking = previous_tracking;
msousa@547: }
msousa@547: 
msousa@547: 
msousa@547: 
mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
msousa@1055:  * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
msousa@1055:  * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
mjsousa@1016:  * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
mjsousa@1016:  * the body_state.
mjsousa@1016:  */
mjsousa@1016: /* The buffer used by the body_state state */
msousa@1055: char *bodystate_buffer        = NULL;
msousa@1055: bool  bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace.
msousa@1055: tracking_t bodystate_init_tracking;
mjsousa@1016: 
mjsousa@1016: /* append text to bodystate_buffer */
msousa@1056: void  append_bodystate_buffer(const char *text, int is_whitespace) {
msousa@1055:   // printf("<<<append_bodystate_buffer>>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
mjsousa@1016:   long int old_len = 0;
msousa@1055:   // make backup of tracking if we are starting off a new body_state_buffer
msousa@1055:   if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking;
msousa@1056:   // set bodystate_is_whitespace flag if we are starting a new buffer
msousa@1056:   if (NULL == bodystate_buffer) bodystate_is_whitespace = 1;
msousa@1056:   // set bodystate_is_whitespace flag to FALSE if we are adding non white space to buffer
msousa@1056:   if (!is_whitespace)           bodystate_is_whitespace = 0;
msousa@1055: 
mjsousa@1016:   if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer);
mjsousa@1016:   bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1);
mjsousa@1016:   if (NULL == bodystate_buffer) ERROR;
mjsousa@1016:   strcpy(bodystate_buffer + old_len, text);
mjsousa@1016:   //printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer);
mjsousa@1016: }
mjsousa@1016: 
mjsousa@1016: /* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */
mjsousa@1016: void   unput_bodystate_buffer(void) {
mjsousa@1016:   if (NULL == bodystate_buffer) ERROR;
msousa@1055:   // printf("<<<unput_bodystate_buffer>>>\n%s\n", bodystate_buffer);
mjsousa@1016:   
mjsousa@1016:   for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--)
msousa@1055:     unput_char(bodystate_buffer[i]);
mjsousa@1016:   
mjsousa@1016:   free(bodystate_buffer);
msousa@1056:   bodystate_buffer        = NULL;
msousa@1056:   bodystate_is_whitespace = 1;  
msousa@1055:   *current_tracking = bodystate_init_tracking;
msousa@1055: }
msousa@1055: 
msousa@1055: 
msousa@1055: /* Return true if bodystate_buffer is empty or ony contains whitespace!! */
mjsousa@1016: int  isempty_bodystate_buffer(void) {
msousa@1055:   if (NULL == bodystate_buffer) return 1;
msousa@1056:   if (bodystate_is_whitespace)  return 1;
msousa@1055:   return 0;
mjsousa@1016: }
mjsousa@1016: 
mjsousa@1016: 
mjsousa@1016: 
mjsousa@1016: 
etisserant@0: /* Called by flex when it reaches the end-of-file */
etisserant@0: int yywrap(void)
etisserant@0: {
etisserant@0:   /* We reached the end of the input file... */
etisserant@0: 
etisserant@0:   /* Should we continue with another file? */
etisserant@0:   /* If so:
etisserant@0:    *   open the new file...
etisserant@0:    *   return 0;
etisserant@0:    */
etisserant@0: 
msousa@737:   /* to stop processing...
etisserant@0:    *   return 1;
etisserant@0:    */
etisserant@0: 
etisserant@0:   return 1;  /* Stop scanning at end of input file. */
etisserant@0: }
etisserant@0: 
etisserant@0: 
etisserant@0: 
msousa@757: /*******************************/
msousa@757: /* Public Interface for Bison. */
msousa@757: /*******************************/
msousa@757: 
msousa@757: /* The following functions will be called from inside bison code! */
msousa@757: 
msousa@757: void include_string(const char *source_code) {include_string_(source_code);}
msousa@757: 
msousa@757: 
msousa@757: /* Tell flex which file to parse. This function will not imediately start parsing the file.
msousa@757:  * To parse the file, you then need to call yyparse()
msousa@757:  *
mjsousa@761:  * Returns NULL on error opening the file (and a valid errno), or 0 on success.
mjsousa@761:  * Caller must close the file!
mjsousa@761:  */
mjsousa@761: FILE *parse_file(const char *filename) {
msousa@757:   FILE *filehandle = NULL;
msousa@757: 
mjsousa@761:   if((filehandle = fopen(filename, "r")) != NULL) {
mjsousa@761:     yyin = filehandle;
mjsousa@761:     current_filename = strdup(filename);
mjsousa@761:     current_tracking = GetNewTracking(yyin);
mjsousa@761:   }
mjsousa@761:   return filehandle;
msousa@757: }
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
etisserant@0: /*************************************/
etisserant@0: /* Include a main() function to test */
etisserant@0: /* the token parsing by flex....     */
etisserant@0: /*************************************/
etisserant@0: #ifdef TEST_MAIN
etisserant@0: 
etisserant@0: #include "../util/symtable.hh"
etisserant@0: 
etisserant@0: yystype yylval;
etisserant@0: YYLTYPE yylloc;
etisserant@0: 
etisserant@0: 
mario@15: 
mario@15: 
etisserant@0: int get_identifier_token(const char *identifier_str) {return 0;}
lbessard@175: int get_direct_variable_token(const char *direct_variable_str) {return 0;}
etisserant@0: 
etisserant@0: 
etisserant@0: int main(int argc, char **argv) {
etisserant@0: 
etisserant@0:   FILE *in_file;
etisserant@0:   int res;
lbessard@136: 	
etisserant@0:   if (argc == 1) {
etisserant@0:     /* Work as an interactive (command line) parser... */
etisserant@0:     while((res=yylex()))
etisserant@0:       fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
etisserant@0:   } else {
etisserant@0:     /* Work as non-interactive (file) parser... */
etisserant@0:     if((in_file = fopen(argv[1], "r")) == NULL) {
etisserant@0:       char *errmsg = strdup2("Error opening main file ", argv[1]);
etisserant@0:       perror(errmsg);
etisserant@0:       free(errmsg);
etisserant@0:       return -1;
etisserant@0:     }
etisserant@0: 
etisserant@0:     /* parse the file... */
etisserant@0:     yyin = in_file;
etisserant@0:     current_filename = argv[1];
etisserant@0:     while(1) {
etisserant@0:       res=yylex();
etisserant@0:       fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
etisserant@0:     }
etisserant@0:   }
lbessard@136: 	
lbessard@136: 	return 0;
etisserant@0: 
etisserant@0: }
etisserant@0: #endif