etisserant@0: /*
msousa@264:  *  matiec - a compiler for the programming languages defined in IEC 61131-3
msousa@264:  *
msousa@264:  *  Copyright (C) 2003-2011  Mario de Sousa (msousa@fe.up.pt)
msousa@264:  *
msousa@264:  *  This program is free software: you can redistribute it and/or modify
msousa@264:  *  it under the terms of the GNU General Public License as published by
mjsousa@866:  *  the Free Software Foundation, either version 3 of thest_whitespaceLicense, or
msousa@264:  *  (at your option) any later version.
msousa@264:  *
msousa@264:  *  This program is distributed in the hope that it will be useful,
msousa@264:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
msousa@264:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
msousa@264:  *  GNU General Public License for more details.
msousa@264:  *
msousa@264:  *  You should have received a copy of the GNU General Public License
msousa@264:  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
msousa@264:  *
etisserant@0:  *
etisserant@0:  * This code is made available on the understanding that it will not be
etisserant@0:  * used in safety-critical situations without a full and competent review.
etisserant@0:  */
etisserant@0: 
etisserant@0: /*
msousa@264:  * An IEC 61131-3 compiler.
etisserant@0:  *
etisserant@0:  * Based on the
etisserant@0:  * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
etisserant@0:  *
etisserant@0:  */
etisserant@0: 
etisserant@0: /*
etisserant@0:  * Stage 1
etisserant@0:  * =======
etisserant@0:  *
etisserant@0:  * This file contains the lexical tokens definitions, from which
etisserant@0:  * the flex utility will generate a lexical parser function.
etisserant@0:  */
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************/
etisserant@0: /* Lexical Parser Options... */
etisserant@0: /*****************************/
etisserant@0: 
etisserant@0: /* The lexical analyser will never work in interactive mode,
etisserant@0:  * i.e., it will only process programs saved to files, and never
etisserant@0:  * programs being written inter-actively by the user.
etisserant@0:  * This option saves the resulting parser from calling the
etisserant@0:  * isatty() function, that seems to be generating some compile
etisserant@0:  * errors under some (older?) versions of flex.
etisserant@0:  */
etisserant@0: %option never-interactive
etisserant@0: 
etisserant@0: /* Have the lexical analyser use a 'char *yytext' instead of an
etisserant@0:  * array of char 'char yytext[??]' to store the lexical token.
etisserant@0:  */
etisserant@0: %pointer
etisserant@0: 
etisserant@0: 
etisserant@0: /* Have the lexical analyser ignore the case of letters.
etisserant@0:  * This will occur for all the tokens and keywords, but
etisserant@0:  * the resulting text handed up to the syntax parser
etisserant@0:  * will not be changed, and keep the original case
etisserant@0:  * of the letters in the input file.
etisserant@0:  */
etisserant@0: %option case-insensitive
etisserant@0: 
etisserant@0: /* Have the generated lexical analyser keep track of the
etisserant@0:  * line number it is currently analysing.
etisserant@0:  * This is used to pass up to the syntax parser
etisserant@0:  * the number of the line on which the current
etisserant@0:  * token was found. It will enable the syntax parser
etisserant@0:  * to generate more informatve error messages...
etisserant@0:  */
etisserant@0: %option yylineno
etisserant@0: 
etisserant@0: /* required for the use of the yy_pop_state() and
etisserant@0:  * yy_push_state() functions
etisserant@0:  */
etisserant@0: %option stack
etisserant@0: 
etisserant@0: /* The '%option stack' also requests the inclusion of 
etisserant@0:  * the yy_top_state(), however this function is not
etisserant@0:  * currently being used. This means that the compiler
etisserant@0:  * is complaining about the existance of this function.
etisserant@0:  * The following option removes the yy_top_state()
etisserant@0:  * function from the resulting c code, so the compiler 
etisserant@0:  * no longer complains.
etisserant@0:  */
etisserant@0: %option noyy_top_state
etisserant@0: 
msousa@547: /* We will be using unput() in our flex code, so we cannot set the following option!... */
msousa@547: /*
msousa@267: %option nounput
msousa@547: */
msousa@267: 
etisserant@0: /**************************************************/
etisserant@0: /* External Variable and Function declarations... */
etisserant@0: /**************************************************/
etisserant@0: 
etisserant@0: 
etisserant@0: %{
etisserant@0: /* Define TEST_MAIN to include a main() function.
etisserant@0:  * Useful for testing the parser generated by flex.
etisserant@0:  */
etisserant@0: /*
etisserant@0: #define TEST_MAIN
etisserant@0: */
etisserant@0: /* If lexical parser is compiled by itself, we need to define the following
etisserant@0:  * constant to some string. Under normal circumstances LIBDIRECTORY is set
etisserant@0:  * in the syntax parser header file...
etisserant@0:  */
etisserant@0: #ifdef TEST_MAIN
etisserant@40: #define DEFAULT_LIBDIR "just_testing"
etisserant@0: #endif
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /* Required for strdup() */
etisserant@0: #include <string.h>
etisserant@0: 
etisserant@0: /* Required only for the declaration of abstract syntax classes
etisserant@0:  * (class symbol_c; class token_c; class list_c;)
etisserant@0:  * These will not be used in flex, but the token type union defined
Edouard@822:  * in iec_bison.hh contains pointers to these classes, so we must include
etisserant@0:  * it here.
etisserant@0:  */
etisserant@0: #include "../absyntax/absyntax.hh"
etisserant@0: 
mario@15: 
Edouard@822: /* iec_bison.hh is generated by bison.
etisserant@0:  * Contains the definition of the token constants, and the
etisserant@0:  * token value type YYSTYPE (in our case, a 'const char *')
etisserant@0:  */
Edouard@822: #include "iec_bison.hh"
mario@15: #include "stage1_2_priv.hh"
mario@15: 
etisserant@0: 
etisserant@0: /* Variable defined by the bison parser,
etisserant@0:  * where the value of the tokens will be stored
etisserant@0:  */
etisserant@0: extern YYSTYPE yylval;
etisserant@0: 
etisserant@0: /* The name of the file currently being parsed...
etisserant@0:  * Note that flex accesses and updates this global variable
msousa@757:  * apropriately whenever it comes across an (*#include <filename> *) directive...
msousa@757:  */
msousa@757: const char *current_filename = NULL;
msousa@757: 
mario@15: 
etisserant@0: 
etisserant@0: /* We will not be using unput() in our flex code... */
msousa@267: /* NOTE: it seems that this #define is no longer needed, It has been 
msousa@267:  * replaced by %option nounput.
msousa@267:  * Should we simply delete it?
msousa@267:  * For now leave it in, in case someone is using an old version of flex.
msousa@267:  * In any case, the most harm that can result in a warning message
msousa@267:  * when compiling iec.flex.c:
msousa@267:  * warning: ‘void yyunput(int, char*)’ defined but not used
msousa@267:  */
etisserant@0: #define YY_NO_UNPUT
etisserant@0: 
etisserant@0: /* Variable defined by the bison parser.
etisserant@0:  * It must be initialised with the location
etisserant@0:  * of the token being parsed.
etisserant@0:  * This is only needed if we want to keep
etisserant@0:  * track of the locations, in order to give
etisserant@0:  * more meaningful error messages!
etisserant@0:  */
conti@415: /*
conti@415:  *extern YYLTYPE yylloc;
conti@415: b*/
lbessard@136: #define YY_INPUT(buf,result,max_size)  {\
lbessard@136:     result = GetNextChar(buf, max_size);\
lbessard@136:     if (  result <= 0  )\
lbessard@136:       result = YY_NULL;\
lbessard@136:     }
lbessard@136: 
msousa@287: 
etisserant@0: /* Macro that is executed for every action.
etisserant@0:  * We use it to pass the location of the token
etisserant@0:  * back to the bison parser...
etisserant@0:  */
lbessard@136: #define YY_USER_ACTION {\
msousa@287: 	yylloc.first_line = current_tracking->lineNumber;			\
msousa@287: 	yylloc.first_column = current_tracking->currentTokenStart;		\
msousa@287: 	yylloc.first_file = current_filename;					\
msousa@287: 	yylloc.first_order = current_order;					\
msousa@287: 	yylloc.last_line = current_tracking->lineNumber;			\
msousa@287: 	yylloc.last_column = current_tracking->currentChar - 1;			\
msousa@287: 	yylloc.last_file = current_filename;					\
msousa@287: 	yylloc.last_order = current_order;					\
msousa@287: 	current_tracking->currentTokenStart = current_tracking->currentChar;	\
msousa@287: 	current_order++;							\
etisserant@0: 	}
etisserant@0: 
mjsousa@879: 
mjsousa@879: 
etisserant@0: /* Since this lexical parser we defined only works in ASCII based
etisserant@0:  * systems, we might as well make sure it is being compiled on
etisserant@0:  * one...
etisserant@0:  * Lets check a few random characters...
etisserant@0:  */
etisserant@0: #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
etisserant@0:      ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
etisserant@0: #error This lexical analyser is not portable to a non ASCII based system.
etisserant@0: #endif
etisserant@0: 
etisserant@0: 
etisserant@0: /* Function only called from within flex, but defined
etisserant@0:  * in iec.y!
lbessard@3:  * We declare it here...
etisserant@0:  *
etisserant@0:  * Search for a symbol in either of the two symbol tables
etisserant@0:  * and return the token id of the first symbol found.
etisserant@0:  * Searches first in the variables, and only if not found
etisserant@0:  * does it continue searching in the library elements
etisserant@0:  */
etisserant@0: //token_id_t get_identifier_token(const char *identifier_str);
etisserant@0: int get_identifier_token(const char *identifier_str);
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: /***************************************************/
etisserant@0: /* Forward Declaration of functions defined later. */
etisserant@0: /***************************************************/
etisserant@0: 
etisserant@0: %{
etisserant@0: /* return all the text in the current token back to the input stream. */
etisserant@0: void unput_text(unsigned int n);
msousa@547: /* return all the text in the current token back to the input stream, 
msousa@547:  * but first return to the stream an additional character to mark the end of the token. 
msousa@547:  */
msousa@547: void unput_and_mark(const char c);
msousa@756: 
msousa@756: void include_file(const char *include_filename);
msousa@757: 
msousa@757: int GetNextChar(char *b, int maxBuffer);
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /****************************/
etisserant@0: /* Lexical Parser States... */
etisserant@0: /****************************/
etisserant@0: 
etisserant@0: /* NOTE: Our psrser can parse st or il code, intermixed
etisserant@0:  *       within the same file.
etisserant@0:  *       With IL we come across the issue of the EOL (end of line) token.
etisserant@0:  *       ST, and the declaration parts of IL do not use this token!
etisserant@0:  *       If the lexical analyser were to issue this token during ST
etisserant@0:  *       language parsing, or during the declaration of data types,
etisserant@0:  *       function headers, etc. in IL, the syntax parser would crash.
etisserant@0:  *
etisserant@0:  *       We can solve this issue using one of three methods:
etisserant@0:  *        (1) Augment all the syntax that does not accept the EOL
etisserant@0:  *            token to simply ignore it. This makes the syntax
etisserant@0:  *            definition (in iec.y) very cluttered!
etisserant@0:  *        (2) Let the lexical parser figure out which language
etisserant@0:  *            it is parsing, and decide whether or not to issue
etisserant@0:  *            the EOL token. This requires the lexical parser
etisserant@0:  *            to have knowledge of the syntax!, making for a poor
etisserant@0:  *            overall organisation of the code. It would also make it
etisserant@0:  *            very difficult to understand the lexical parser as it
etisserant@0:  *            would use several states, and a state machine to transition
etisserant@0:  *            between the states. The state transitions would be
etisserant@0:  *            intermingled with the lexical parser defintion!
etisserant@0:  *        (3) Use a mixture of (1) and (2). The lexical analyser
etisserant@0:  *            merely distinguishes between function headers and function
etisserant@0:  *            bodies, but no longer makes a distinction between il and
etisserant@0:  *            st language bodies. When parsing a body, it will return
etisserant@0:  *            the EOL token. In other states '\n' will be ignored as
etisserant@0:  *            whitespace.
etisserant@0:  *            The ST language syntax has been augmented in the syntax
etisserant@0:  *            parser configuration to ignore any EOL tokens that it may
etisserant@0:  *            come across!
etisserant@0:  *            This option has both drawbacks of option (1) and (2), but
etisserant@0:  *            much less intensely.
etisserant@0:  *            The syntax that gets cluttered is limited to the ST statements
etisserant@0:  *            (which is rather limited, compared to the function headers and
etisserant@0:  *            data type declarations, etc...), while the state machine in
etisserant@0:  *            the lexical parser becomes very simple. All state transitions
etisserant@0:  *            can be handled within the lexical parser by itself, and can be
etisserant@0:  *            easily identified. Thus knowledge of the syntax required by
etisserant@0:  *            the lexical parser is very limited!
etisserant@0:  *
etisserant@0:  * Amazingly enough, I (Mario) got to implement option (3)
etisserant@0:  * at first, requiring two basic states, decl and body.
etisserant@0:  * The lexical parser will enter the body state when
etisserant@0:  * it is parsing the body of a function/program/function block. The
etisserant@0:  * state transition is done when we find a VAR_END that is not followed
etisserant@0:  * by a VAR! This is the syntax knowledge that gets included in the
etisserant@0:  * lexical analyser with this option!
etisserant@0:  * Unfortunately, getting the st syntax parser to ignore EOL anywhere
etisserant@0:  * where they might appear leads to conflicts. This is due to the fact
etisserant@0:  * that the syntax parser uses the single look-ahead token to remove
etisserant@0:  * possible conflicts. When we insert a possible EOL, the single
etisserant@0:  * look ahead token becomes the EOL, which means the potential conflicts
etisserant@0:  * could no longer be resolved.
etisserant@0:  * Removing these conflicts would make the st syntax parser very convoluted,
etisserant@0:  * and adding the extraneous EOL would make it very cluttered.
etisserant@0:  * This option was therefore dropped in favour of another!
etisserant@0:  *
etisserant@0:  * I ended up implementing (2). Unfortunately the lexical analyser can
etisserant@0:  * not easily distinguish between il and st code, since function
etisserant@0:  * calls in il are very similar to function block calls in st.
etisserant@0:  * We therefore use an extra 'body' state. When the lexical parser
etisserant@0:  * finds that last END_VAR, it enters the body state. This state
etisserant@0:  * must figure out what language is being parsed from the first few
mario@68:  * tokens, and switch to the correct state (st, il or sfc) according to the
etisserant@0:  * language. This means that we insert quite a bit of knowledge of the
etisserant@0:  * syntax of the languages into the lexical parser. This is ugly, but it
etisserant@0:  * works, and at least it is possible to keep all the state changes together
etisserant@0:  * to make it easier to remove them later on if need be.
mario@68:  * Once the language being parsed has been identified, 
mario@68:  * the body state returns any matched text back to the buffer with unput(),
mario@68:  * to be later matched correctly by the apropriate language parser (st, il or sfc).
mario@68:  *
mario@68:  * Aditionally, in sfc state it may further recursively enter the body state
mario@68:  * once again. This is because an sfc body may contain ACTIONS, which are then
mario@68:  * written in one of the three languages (ST, IL or SFC), so once again we need
mario@68:  * to figure out which language the ACTION in the SFC was written in. We already
mario@68:  * ahve all that done in the body state, so we recursively transition to the body 
mario@68:  * state once again.
mario@68:  * Note that in this case, when coming out of the st/il state (whichever language
mario@68:  * the action was written in) the sfc state will become active again. This is done by
mario@68:  * pushing and poping the previously active state!
mario@68:  *
mario@68:  * The sfc_qualifier_state is required because when parsing actions within an
mario@68:  * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
mario@68:  * to bison to work correctly, these qualifiers must be returned as tokens. However,
mario@68:  * these tokens are not reserved keywords, which means it should be possible to
mario@68:  * define variables/functions/FBs with any of these names (including 
mario@68:  * S and R which are special because they are also IL operators). So, when we are not
mario@68:  * expecting any action qualifiers, flex does not return these tokens, and is free
mario@68:  * to interpret them as previously defined variables/functions/... as the case may be.
mario@68:  *
msousa@547:  * The time_literal_state is required because TIME# literals are decomposed into 
msousa@547:  * portions, and wewant to send these portions one by one to bison. Each poertion will 
msousa@547:  * represent the value in days/hours/minutes/seconds/ms.
msousa@547:  * Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
msousa@547:  * we need to disable lexical identification of identifiers while parsing TIME# literals!
msousa@547:  * e.g.:  TIME#55d_4h_56m
msousa@547:  *       We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
msousa@547:  *       Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
msousa@547:  *       and return that identifier instead! So, we added this state!
msousa@547:  *
mjsousa@868:  * There is a main state machine...
mjsousa@868:  * 
mjsousa@868:  *       +---> INITIAL <-------> config
mjsousa@868:  *       |        \
mjsousa@868:  *       |        V
mjsousa@868:  *       |   header_state
mjsousa@868:  *       |        |
mjsousa@868:  *       |        V
mjsousa@868:  *     vardecl_list_state <------> var_decl
mjsousa@868:  *       ^        | 
mjsousa@868:  *       |        | [using push()]
mjsousa@868:  *       |        |
mjsousa@868:  *       |        V
mjsousa@868:  *       |       body, 
mjsousa@868:  *       |        |
mjsousa@868:  *       |        | 
mjsousa@868:  *       |   -------------------
mjsousa@868:  *       |   |       |         |
mjsousa@868:  *       |   v       v         v
mjsousa@868:  *       |  st      il        sfc
mjsousa@868:  *       |   |       |         |  [using pop() when leaving st/il/sfc => goes to vardecl_list_state]
mjsousa@868:  *       |   |       |         |
mjsousa@868:  *       -----------------------
mjsousa@868:  *
mjsousa@868:  * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then 
mjsousa@868:  *        we also push() to the body state. This means that sometimes, when pop()ing
mjsousa@868:  *        from st and il, the state machine may return to the sfc state!
mjsousa@868:  *      - The transitions form sfc to body will be decided by bison, which will
mjsousa@868:  *        tell flex to do the transition by calling cmd_goto_body_state().
mjsousa@868:  *   
mjsousa@866:  * 
etisserant@0:  * Possible state changes are:
mario@68:  *   INITIAL -> goto(config_state)
mario@68:  *                (when a CONFIGURATION is found)
mjsousa@866:  * 
mjsousa@866:  *   INITIAL -> goto(header_state)
mjsousa@866:  *               (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found)
mjsousa@866:  *   header_state -> goto(vardecl_list_state)
mjsousa@866:  *               (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration)
mjsousa@866:  * 
mjsousa@866:  *  vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state) 
mjsousa@866:  *                (when a VAR token is found)
mjsousa@866:  *   vardecl_state -> pop() to (vardecl_list_state) 
mjsousa@866:  *                (when a END_VAR token is found)
mjsousa@866:  * 
mjsousa@868:  *   vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state) 
mjsousa@866:  *                (when the last END_VAR is found!)
mjsousa@866:  *
mjsousa@868:  *   body_state    -> goto(sfc_state)
mario@68:  *                     (when it figures out it is parsing sfc language)
mjsousa@868:  *   body_state    -> goto(st_state)
mario@68:  *                     (when it figures out it is parsing st language)
mjsousa@868:  *   body_state    -> goto(il_state)
mario@68:  *                     (when it figures out it is parsing il language)
mjsousa@868:  *   st_state      -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68:  *                      END_ACTION or END_TRANSITION is found)
mjsousa@868:  *   il_state      -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68:  *                      END_ACTION or END_TRANSITION is found)
mjsousa@868:  *   sfc_state     -> pop() to vardecl_list_state
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mjsousa@866:  * 
mjsousa@868:  *   vardecl_list_state -> goto(INITIAL)
mario@68:  *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mario@68:  *   config_state  -> goto(INITIAL)
mario@68:  *                     (when a END_CONFIGURATION is found)
mjsousa@866:  * 
mjsousa@866:  *  
mjsousa@866:  *   sfc_state     -> push current state(sfc_state); goto(body_state)
mario@68:  *                     (when parsing an action. This transition is requested by bison)
mjsousa@866:  *   sfc_state     -> push current state(sfc_state); goto(sfc_qualifier_state)
mario@68:  *                     (when expecting an action qualifier. This transition is requested by bison)
mjsousa@866:  *   sfc_qualifier_state -> pop() to sfc_state
mario@68:  *                     (when no longer expecting an action qualifier. This transition is requested by bison)
mjsousa@866:  *
mario@74:  *   config_state  -> push(config_state); goto(task_init_state)
mario@74:  *                     (when parsing a task initialisation. This transition is requested by bison)
mario@74:  *   task_init_state -> pop()
mario@74:  *                     (when no longer parsing task initialisation parameters. This transition is requested by bison)
mario@74:  *
mjsousa@866:  * 
mjsousa@866:  * There is another secondary state machine for parsing comments, another for file_includes, 
mjsousa@866:  * and yet another for time literals.
mario@74:  */
mario@68: 
mario@68: 
etisserant@0: /* we are parsing a configuration. */
lbessard@3: %s config_state
etisserant@0: 
mario@74: /* Inside a configuration, we are parsing a task initialisation parameters */
mario@74: /* This means that PRIORITY, SINGLE and INTERVAL must be handled as
mario@74:  * tokens, and not as possible identifiers. Note that the above words
mario@74:  * are not keywords.
mario@74:  */
mario@74: %s task_init_state
mario@74: 
mjsousa@866: /* we are looking for the first VAR inside a function's, program's or function block's declaration */
mjsousa@868: /* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */
mjsousa@866: %s header_state
mjsousa@866: 
mjsousa@866: /* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */
mjsousa@866: %x vardecl_list_state 
mjsousa@866: /* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */
mjsousa@866: %s vardecl_state
etisserant@0: 
mjsousa@868: /* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */
mario@68: %x body_state
etisserant@0: 
etisserant@0: /* we are parsing il code -> flex must return the EOL tokens!       */
lbessard@3: %s il_state
etisserant@0: 
etisserant@0: /* we are parsing st code -> flex must not return the EOL tokens!   */
lbessard@3: %s st_state
etisserant@0: 
mario@68: /* we are parsing sfc code -> flex must not return the EOL tokens!  */
lbessard@3: %s sfc_state
etisserant@0: 
mario@68: /* we are parsing sfc code, and expecting an action qualifier.      */
mario@68: %s sfc_qualifier_state
etisserant@0: 
mario@86: /* we are parsing sfc code, and expecting the priority token.       */
mario@86: %s sfc_priority_state
etisserant@0: 
msousa@547: /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
msousa@547: %x time_literal_state
mario@75: 
mjsousa@866: /* we are parsing a comment. */
mjsousa@866: %x comment_state
mjsousa@866: 
mario@75: 
etisserant@0: /*******************/
etisserant@0: /* File #include's */
etisserant@0: /*******************/
etisserant@0: 
etisserant@0: /* We extend the IEC 61131-3 standard syntax to allow inclusion
etisserant@0:  * of other files, using the IEC 61131-3 pragma directive...
etisserant@0:  * The accepted syntax is:
etisserant@0:  *  {#include "<filename>"}
etisserant@0:  */
etisserant@0: 
etisserant@0: /* the "include" states are used for picking up the name of an include file */
etisserant@0: %x include_beg
etisserant@0: %x include_filename
etisserant@0: %x include_end
etisserant@0: 
etisserant@0: 
etisserant@0: file_include_pragma_filename	[^\"]*
mjsousa@866: file_include_pragma_beg		"{#include"{st_whitespace}\"
mjsousa@866: file_include_pragma_end		\"{st_whitespace}"}"
etisserant@0: file_include_pragma			{file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
etisserant@0: 
etisserant@0: 
etisserant@0: %{
mjsousa@879: 
mjsousa@879: /* A counter to track the order by which each token is processed.
mjsousa@879:  * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
mjsousa@879:  *       i.e.. it may get incremented by more than one between two consecutive tokens.
mjsousa@879:  *       This is due to the fact that the counter gets incremented every 'user action' in flex,
mjsousa@879:  *       however not every user action will result in a token being passed to bison.
mjsousa@879:  *       Nevertheless this is still OK, as we are only interested in the relative
mjsousa@879:  *       ordering of tokens...
mjsousa@879:  */
mjsousa@879: static long int current_order = 0;
mjsousa@879:   
etisserant@0: typedef struct {
msousa@757:     int eof;
msousa@757:     int lineNumber;
msousa@757:     int currentChar;
msousa@757:     int lineLength;
msousa@757:     int currentTokenStart;
msousa@757:     char *buffer;
msousa@757:     FILE *in_file;
msousa@757:   } tracking_t;
msousa@757: 
mjsousa@879: /* A forward declaration of a function defined at the end of this file. */
mjsousa@879: void FreeTracking(tracking_t *tracking);
mjsousa@879: 
mjsousa@879: 
mjsousa@879: #define MAX_INCLUDE_DEPTH 16
mjsousa@879: 
msousa@757: typedef struct {
etisserant@0: 	  YY_BUFFER_STATE buffer_state;
msousa@757: 	  tracking_t *env;
etisserant@0: 	  const char *filename;
etisserant@0: 	} include_stack_t;
etisserant@0: 
msousa@757: tracking_t *current_tracking = NULL;
etisserant@0: include_stack_t include_stack[MAX_INCLUDE_DEPTH];
etisserant@0: int include_stack_ptr = 0;
etisserant@0: 
etisserant@0: const char *INCLUDE_DIRECTORIES[] = {
etisserant@40: 	DEFAULT_LIBDIR,
etisserant@40: 	".",
etisserant@40: 	"/lib",
etisserant@40: 	"/usr/lib",
etisserant@40: 	"/usr/lib/iec",
etisserant@0: 	NULL /* must end with NULL!! */
etisserant@0: 	};
etisserant@0: %}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************/
etisserant@0: /* Prelimenary constructs... */
etisserant@0: /*****************************/
etisserant@0: 
mjsousa@866: /* PRAGMAS */
mjsousa@866: /* ======= */
msousa@267: /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
msousa@267:  * especially the prototypes of Functions and FBs defined in the standard
msousa@267:  * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax 
msousa@267:  * with two pragmas to indicate that the code is to be parsed (going through the 
msousa@267:  * lexical, syntactical, and semantic analysers), but no code is to be generated.
msousa@267:  * 
msousa@267:  * The accepted syntax is:
msousa@267:  *  {no_code_generation begin}
msousa@267:  *    ... prototypes ...
msousa@267:  *  {no_code_generation end}
msousa@267:  * 
msousa@267:  * When parsing these prototypes the abstract syntax tree will be populated as usual,
msousa@267:  * allowing the semantic analyser to correctly analyse the semantics of calls to these
msousa@267:  * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
msousa@267:  * between the above two pragmas.
msousa@267:  */
msousa@267: 
msousa@267: disable_code_generation_pragma	"{disable code generation}"
msousa@267: enable_code_generation_pragma	"{enable code generation}"
msousa@267: 
msousa@267: 
msousa@267: /* Any other pragma... */
mjsousa@869: pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}")
mjsousa@868: 
mjsousa@868: 
mjsousa@866: 
mjsousa@866: /* COMMENTS */
mjsousa@866: /* ======== */
mjsousa@866: 
mjsousa@866: /* In order to allow nested comments, comments are handled by a specific comment_state state */
mjsousa@866: /* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state.
mjsousa@866:  * Whenever a "*)" is found, we pop a state off the stack
mjsousa@866:  */
mjsousa@866: 
mjsousa@866: /* comments... */
mjsousa@866: comment_beg  "(*"
mjsousa@866: comment_end  "*)"
mjsousa@866: 
mjsousa@866: /* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB
mjsousa@866:  * invocations with the 'NOT <variable_name> =>' syntax (which needs two look ahead 
mjsousa@866:  * tokens to be parsed correctly - and bison being LALR(1) only supports one).
mjsousa@866:  * The current work around requires flex to completely parse the '<variable_name> =>'
mjsousa@866:  * sequence. This sequence includes whitespace and/or comments between the 
mjsousa@866:  * <variable_name> and the "=>" token.
mjsousa@866:  * 
mjsousa@866:  * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context,
mjsousa@866:  * which means we can not use the comment_state method of specifying/finding and ignoring 
mjsousa@866:  * comments.
mjsousa@866:  * 
mjsousa@866:  * For this reason only, we must also define what a complete comment looks like, so
mjsousa@866:  * it may be used in this rule. Since the rule uses the whitespace_or_comment
mjsousa@866:  * construct as trailing context, this definition of comment must not use any
mjsousa@866:  * trailing context either.
mjsousa@866:  * 
mjsousa@866:  * Aditionally, it is not possible to define nested comments in flex without the use of
mjsousa@866:  * states, so for this particular location, we do NOT support nested comments.
mjsousa@866:  */
etisserant@0: /* NOTE: this seemingly unnecessary complex definition is required
etisserant@0:  *       to be able to eat up comments such as:
etisserant@0:  *          '(* Testing... ! ***** ******)'
etisserant@0:  *       without using the trailing context command in flex (/{context})
etisserant@0:  *       since {comment} itself will later be used with
etisserant@0:  *       trailing context ({comment}/{context})
etisserant@0:  */
etisserant@0: not_asterisk				[^*]
etisserant@0: not_close_parenthesis_nor_asterisk	[^*)]
etisserant@0: asterisk				"*"
mjsousa@866: comment_text	({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk})
etisserant@0: comment		"(*"({comment_text}*)({asterisk}+)")"
etisserant@0: 
etisserant@0: 
mjsousa@866: 
mjsousa@866: /* 3.1 Whitespace */
mjsousa@866: /* ============== */
etisserant@0: /*
mjsousa@866:  * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4)
mjsousa@866:  * 
mjsousa@866:  * Whitespace definition includes the newline character.
mjsousa@866:  * 
mjsousa@866:  * However, the standard is inconsistent in that in IL the newline character 
mjsousa@866:  * is considered a token (EOL - end of line). 
mjsousa@866:  * In our implementation we therefore have two definitions of whitespace
mjsousa@866:  *   - one for ST, that includes the newline character
mjsousa@866:  *   - one for IL without the newline character.
mjsousa@866:  * Additionally, when parsing IL, the newline character is treated as the EOL token.
mjsousa@866:  * This requires the use of a state machine in the lexical parser that needs at least 
mjsousa@866:  * some knowledge of the syntax itself.
mjsousa@866:  *
mjsousa@866:  * NOTE: Our definition of whitespace will only work in ASCII!
mjsousa@866:  *
etisserant@0:  * NOTE: we cannot use
etisserant@0:  *         st_whitespace	[:space:]*
etisserant@0:  *       since we use {st_whitespace} as trailing context. In our case
etisserant@0:  *       this would not constitute "dangerous trailing context", but the
etisserant@0:  *       lexical generator (i.e. flex) does not know this (since it does
etisserant@0:  *       not know which characters belong to the set [:space:]), and will
etisserant@0:  *       generate a "dangerous trailing context" warning!
etisserant@0:  *       We use this alternative just to stop the flex utility from
etisserant@0:  *       generating the invalid (in this case) warning...
etisserant@0:  */
etisserant@0: 
mjsousa@866: st_whitespace			[ \f\n\r\t\v]*
mjsousa@866: il_whitespace			[ \f\r\t\v]*
mjsousa@866: 
mjsousa@866: st_whitespace_or_pragma_or_commentX	({st_whitespace})|({pragma})|({comment})
mjsousa@866: il_whitespace_or_pragma_or_commentX	({il_whitespace})|({pragma})|({comment})
mjsousa@866: 
mjsousa@866: st_whitespace_or_pragma_or_comment	{st_whitespace_or_pragma_or_commentX}*
mjsousa@866: il_whitespace_or_pragma_or_comment	{il_whitespace_or_pragma_or_commentX}*
mjsousa@866: 
mjsousa@866: 
mjsousa@866: 
mjsousa@866: qualified_identifier	{identifier}(\.{identifier})+
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: The following definitions only work if the host computer
etisserant@0:  *       is using the ASCII maping. For e.g., with EBCDIC [A-Z]
etisserant@0:  *       contains non-alphabetic characters!
etisserant@0:  *       The correct way of doing it would be to use
etisserant@0:  *       the [:upper:] etc... definitions.
etisserant@0:  *
etisserant@0:  *       Unfortunately, further on we need all printable
etisserant@0:  *       characters (i.e. [:print:]), but excluding '$'.
etisserant@0:  *       Flex does not allow sets to be composed by excluding
etisserant@0:  *       elements. Sets may only be constructed by adding new
etisserant@0:  *       elements, which means that we have to revert to
etisserant@0:  *       [\x20\x21\x23\x25\x26\x28-x7E] for the definition
etisserant@0:  *       of the printable characters with the required exceptions.
etisserant@0:  *       The above also implies the use of ASCII, but now we have
etisserant@0:  *       no way to work around it|
etisserant@0:  *
etisserant@0:  *       The conclusion is that our parser is limited to ASCII
etisserant@0:  *       based host computers!!
etisserant@0:  */
etisserant@0: letter		[A-Za-z]
etisserant@0: digit		[0-9]
etisserant@0: octal_digit	[0-7]
etisserant@0: hex_digit	{digit}|[A-F]
etisserant@0: identifier	({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
etisserant@0: 
etisserant@0: /*******************/
etisserant@0: /* B.1.2 Constants */
etisserant@0: /*******************/
etisserant@0: 
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1   Numeric literals */
etisserant@0: /******************************/
etisserant@0: integer         {digit}((_?{digit})*)
msousa@547: 
msousa@547: /* Some helper symbols for parsing TIME literals... */
msousa@547: integer_0_59    (0(_?))*([0-5](_?))?{digit}
msousa@547: integer_0_19    (0(_?))*([0-1](_?))?{digit}
msousa@547: integer_20_23   (0(_?))*2(_?)[0-3]
msousa@547: integer_0_23    {integer_0_19}|{integer_20_23}
msousa@547: integer_0_999   {digit}((_?{digit})?)((_?{digit})?)
msousa@547: 
msousa@547: 
etisserant@0: binary_integer  2#{bit}((_?{bit})*)
etisserant@0: bit		[0-1]
etisserant@0: octal_integer   8#{octal_digit}((_?{octal_digit})*)
etisserant@0: hex_integer     16#{hex_digit}((_?{hex_digit})*)
etisserant@0: exponent        [Ee]([+-]?){integer}
etisserant@0: /* The correct definition for real would be:
etisserant@0:  * real		{integer}\.{integer}({exponent}?)
etisserant@0:  *
etisserant@0:  * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
etisserant@0:  * fixed_point		{integer}\.{integer}
etisserant@0:  *
etisserant@0:  * This means that {integer}\.{integer} could be interpreted
etisserant@0:  * as either a fixed_point or a real.
etisserant@0:  * I have opted to interpret {integer}\.{integer} as a fixed_point.
etisserant@0:  * In order to do this, the definition of real has been changed to:
etisserant@0:  * real		{integer}\.{integer}{exponent}
etisserant@0:  *
etisserant@0:  * This means that the syntax parser now needs to define a real to be
etisserant@0:  * either a real_token or a fixed_point_token!
etisserant@0:  */
etisserant@0: real		{integer}\.{integer}{exponent}
etisserant@0: 
etisserant@0: 
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2   Character Strings */
etisserant@0: /*******************************/
etisserant@0: /*
etisserant@0: common_character_representation :=
etisserant@0: <any printable character except '$', '"' or "'">
etisserant@0: |'$$'
etisserant@0: |'$L'|'$N'|'$P'|'$R'|'$T'
etisserant@0: |'$l'|'$n'|'$p'|'$r'|'$t'
etisserant@0: 
etisserant@0: NOTE: 	$ = 0x24
etisserant@0: 	" = 0x22
etisserant@0: 	' = 0x27
etisserant@0: 
etisserant@0: 	printable chars in ASCII: 0x20-0x7E
etisserant@0: */
etisserant@0: 
etisserant@0: esc_char_u		$L|$N|$P|$R|$T
etisserant@0: esc_char_l		$l|$n|$p|$r|$t
etisserant@0: esc_char		$$|{esc_char_u}|{esc_char_l}
etisserant@0: double_byte_char	(${hex_digit}{hex_digit}{hex_digit}{hex_digit})
etisserant@0: single_byte_char	(${hex_digit}{hex_digit})
etisserant@0: 
etisserant@0: /* WARNING:
etisserant@0:  * This definition is only valid in ASCII...
etisserant@0:  *
etisserant@0:  * Flex includes the function print_char() that defines
etisserant@0:  * all printable characters portably (i.e. whatever character
etisserant@0:  * encoding is currently being used , ASCII, EBCDIC, etc...)
etisserant@0:  * Unfortunately, we cannot generate the definition of
etisserant@0:  * common_character_representation portably, since flex
etisserant@0:  * does not allow definition of sets by subtracting
etisserant@0:  * elements in one set from another set.
etisserant@0:  * This means we must build up the defintion of
etisserant@0:  * common_character_representation using only set addition,
etisserant@0:  * which leaves us with the only choice of defining the
etisserant@0:  * characters non-portably...
etisserant@0:  */
etisserant@0: common_character_representation		[\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
etisserant@0: double_byte_character_representation 	$\"|'|{double_byte_char}|{common_character_representation}
etisserant@0: single_byte_character_representation 	$'|\"|{single_byte_char}|{common_character_representation}
etisserant@0: 
etisserant@0: 
etisserant@0: double_byte_character_string	\"({double_byte_character_representation}*)\"
etisserant@0: single_byte_character_string	'({single_byte_character_representation}*)'
etisserant@0: 
etisserant@0: 
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: fixed_point		{integer}\.{integer}
etisserant@0: 
msousa@547: 
msousa@547: /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
msousa@547:  *       as its definition does not match the standard's text.
msousa@547:  *       IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
msousa@547:  *       definition instead!
msousa@547:  *
msousa@547:  *       duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
msousa@547:  *       interval::= days | hours | minutes | seconds | milliseconds
msousa@547:  *       fixed_point  ::= integer [ '.' integer]
msousa@547:  *       days         ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
msousa@547:  *       hours        ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
msousa@547:  *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
msousa@547:  *       seconds      ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
msousa@547:  *       milliseconds ::= fixed_point 'ms'
msousa@547:  * 
msousa@547:  * 
msousa@547:  *  The original IEC 61131-3 v2 definition is:
msousa@547:  *       duration ::= ('T' | 'TIME') '#' ['-'] interval
msousa@547:  *       interval ::= days | hours | minutes | seconds | milliseconds
msousa@547:  *       fixed_point  ::= integer [ '.' integer]
msousa@547:  *       days         ::= fixed_point 'd' | integer 'd' ['_'] hours
msousa@547:  *       hours        ::= fixed_point 'h' | integer 'h' ['_'] minutes
msousa@547:  *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] seconds
msousa@547:  *       seconds      ::= fixed_point 's' | integer 's' ['_'] milliseconds
msousa@547:  *       milliseconds ::= fixed_point 'ms'
msousa@547:  */
msousa@547: 
msousa@547: interval_ms_X		({integer_0_999}(\.{integer})?)ms
msousa@686: interval_s_X		{integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
msousa@686: interval_m_X		{integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
msousa@686: interval_h_X		{integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)
msousa@547: 
msousa@547: interval_ms		{integer}ms|({fixed_point}ms)
msousa@547: interval_s		{integer}s(_?{interval_ms_X})?|({fixed_point}s)
msousa@547: interval_m		{integer}m(_?{interval_s_X})?|({fixed_point}m)
msousa@547: interval_h		{integer}h(_?{interval_m_X})?|({fixed_point}h)
msousa@547: interval_d		{integer}d(_?{interval_h_X})?|({fixed_point}d)
msousa@547: 
msousa@547: interval		{interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}
msousa@547: 
msousa@686: 
msousa@547: /* to help provide nice error messages, we also parse an incorrect but plausible interval... */
msousa@547: /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not 
msousa@547:  *      be able to parse any other legal lexcial construct (besides a legal interval, but that
msousa@547:  *      is OK as this rule will appear _after_ the rule to parse legal intervals!).
msousa@547:  */
msousa@547: fixed_point_or_integer  {fixed_point}|{integer}
msousa@547: erroneous_interval	({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?
etisserant@0: 
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1   Directly Represented Variables */
etisserant@0: /********************************************/
etisserant@0: /* The correct definition, if the standard were to be followed... */
mario@11: 
mario@11: location_prefix			[IQM]
mario@11: size_prefix			[XBWDL]
mario@11: direct_variable_standard	%{location_prefix}({size_prefix}?){integer}((.{integer})*)
mario@11: 
etisserant@0: 
etisserant@0: /* For the MatPLC, we will accept %<identifier>
etisserant@0:  * as a direct variable, this being mapped onto the MatPLC point
etisserant@0:  * named <identifier>
etisserant@0:  */
etisserant@0: /* TODO: we should not restrict it to only the accepted syntax
etisserant@0:  * of <identifier> as specified by the standard. MatPLC point names
etisserant@0:  * have a more permissive syntax.
etisserant@0:  *
etisserant@0:  * e.g. "P__234"
etisserant@0:  *    Is a valid MatPLC point name, but not a valid <identifier> !!
etisserant@0:  *    The same happens with names such as "333", "349+23", etc...
etisserant@0:  *    How can we handle these more expressive names in our case?
etisserant@0:  *    Remember that some direct variable may remain anonymous, with
etisserant@0:  *    declarations such as:
etisserant@0:  *    VAR
etisserant@0:  *       AT %I3 : BYTE := 255;
etisserant@0:  *    END_VAR
mario@11:  *    in which case we are currently using "%I3" as the variable
mario@11:  *    name.
mario@11:  */
msousa@547: /* direct_variable_matplc		%{identifier} */
msousa@547: /* direct_variable			{direct_variable_standard}|{direct_variable_matplc} */
msousa@547: direct_variable			{direct_variable_standard}
etisserant@0: 
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: incompl_location	%[IQM]\*
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: %%
etisserant@0: 	/* fprintf(stderr, "flex: state %d\n", YY_START); */
etisserant@0: 
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****   F I R S T    T H I N G S    F I R S T   *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****                                           *****/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 	/*****************************************************/
etisserant@0: 
mario@68: 	/***********************************************************/
mario@68: 	/* Handle requests sent by bison for flex to change state. */
mario@68: 	/***********************************************************/
mario@13: 	if (get_goto_body_state()) {
mario@68: 	  yy_push_state(body_state);
mario@13: 	  rst_goto_body_state();
mario@6: 	}
lbessard@3: 
mario@68: 	if (get_goto_sfc_qualifier_state()) {
mario@68: 	  yy_push_state(sfc_qualifier_state);
mario@68: 	  rst_goto_sfc_qualifier_state();
mario@68: 	}
mario@68: 
mario@86: 	if (get_goto_sfc_priority_state()) {
mario@86: 	  yy_push_state(sfc_priority_state);
mario@86: 	  rst_goto_sfc_priority_state();
mario@86: 	}
mario@86: 
mario@74: 	if (get_goto_task_init_state()) {
mario@74: 	  yy_push_state(task_init_state);
mario@74: 	  rst_goto_task_init_state();
mario@74: 	}
mario@74: 
mario@68: 	if (get_pop_state()) {
mario@68: 	  yy_pop_state();
mario@68: 	  rst_pop_state();
mario@68: 	}
mario@68: 
mario@68: 	/***************************/
etisserant@0: 	/* Handle the pragmas!     */
mario@68: 	/***************************/
etisserant@0: 
etisserant@0: 	/* We start off by searching for the pragmas we handle in the lexical parser. */
etisserant@0: <INITIAL>{file_include_pragma}	unput_text(0); yy_push_state(include_beg);
etisserant@0: 
msousa@267: 	/* Pragmas sent to syntax analyser (bison) */
msousa@267: {disable_code_generation_pragma}               return disable_code_generation_pragma_token;
msousa@267: {enable_code_generation_pragma}                return enable_code_generation_pragma_token;
mjsousa@866: <body_state,vardecl_list_state>{disable_code_generation_pragma}   return disable_code_generation_pragma_token;
mjsousa@866: <body_state,vardecl_list_state>{enable_code_generation_pragma}    return enable_code_generation_pragma_token;
msousa@267: 
etisserant@0: 	/* Any other pragma we find, we just pass it up to the syntax parser...   */
mario@68: 	/* Note that the <body_state> state is exclusive, so we have to include it here too. */
etisserant@0: {pragma}	{/* return the pragmma without the enclosing '{' and '}' */
mjsousa@868: 		 int cut = yytext[1]=='{'?2:1;
Edouard@634: 		 yytext[strlen(yytext)-cut] = '\0';
Edouard@634: 		 yylval.ID=strdup(yytext+cut);
etisserant@0: 		 return pragma_token;
etisserant@0: 		}
mjsousa@866: <body_state,vardecl_list_state>{pragma} {/* return the pragmma without the enclosing '{' and '}' */
Laurent@701: 		 int cut = yytext[1]=='{'?2:1;
mjsousa@866: 		 yytext[strlen(yytext)-cut] = '\0';
Laurent@701: 		 yylval.ID=strdup(yytext+cut);
etisserant@0: 		 return pragma_token;
etisserant@0: 		}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************************/
etisserant@0: 	/* Handle the file includes!     */
etisserant@0: 	/*********************************/
etisserant@0: <include_beg>{file_include_pragma_beg}	BEGIN(include_filename);
etisserant@0: 
etisserant@0: <include_filename>{file_include_pragma_filename}	{
msousa@756: 			  /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: 			  include_file(yytext);
etisserant@0: 			  /* switch to whatever state was active before the include file */
etisserant@0: 			  yy_pop_state();
etisserant@0: 			  /* now process the new file... */
etisserant@0: 			}
etisserant@0: 
etisserant@0: 
mjsousa@761: <<EOF>>			{     /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules
mjsousa@761: 			       *       when checking for syntax errors in the input source code.
mjsousa@761: 			       *       This means that in reality flex will be asked to carry on reading the input
mjsousa@761: 			       *       even after it has reached the end of all (including the main) input files.
mjsousa@761: 			       *       In other owrds, we will be called to return more tokens, even after we have
mjsousa@761: 			       *       already returned an END_OF_INPUT token. In this case, we must carry on returning
mjsousa@761: 			       *       more END_OF_INPUT tokens.
mjsousa@761: 			       * 
mjsousa@761: 			       *       However, in the above case we will be asked to carry on reading more tokens 
mjsousa@761: 			       *       from the main input file, after we have reached the end. For this to work
mjsousa@761: 			       *       correctly, we cannot close the main input file!
mjsousa@761: 			       * 
mjsousa@761: 			       *       This is why we WILL be called with include_stack_ptr == 0 multiple times,
mjsousa@761: 			       *       and why we must handle it as a special case
mjsousa@761: 			       *       that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again.
mjsousa@761: 			       * 
mjsousa@761: 			       *       As a corollory, flex can never safely close the main input file, and we must ask
mjsousa@761: 			       *       bison to close it!
mario@76: 			       */
mario@76: 			  if (include_stack_ptr == 0) {
mjsousa@761: 			      // fclose(yyin);           // Must not do this!!
mjsousa@879: 			      // FreeTracking(current_tracking); // Must not do this!!
mario@73: 			      /* yyterminate() terminates the scanner and returns a 0 to the 
mario@73: 			       * scanner's  caller, indicating "all done".
mario@73: 			       *	
mario@73: 			       * Our syntax parser (written with bison) has the token	
mario@73: 			       * END_OF_INPUT associated to the value 0, so even though
mario@73: 			       * we don't explicitly return the token END_OF_INPUT
mario@73: 			       * calling yyterminate() is equivalent to doing that. 
mario@73: 			       */ 	
etisserant@0: 			    yyterminate();
msousa@737: 			  } else {
mjsousa@761: 			    fclose(yyin);
mjsousa@879: 			    FreeTracking(current_tracking);
lbessard@136: 			    --include_stack_ptr;
etisserant@0: 			    yy_delete_buffer(YY_CURRENT_BUFFER);
etisserant@0: 			    yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
lbessard@136: 			    current_tracking = include_stack[include_stack_ptr].env;
etisserant@0: 			      /* removing constness of char *. This is safe actually,
etisserant@0: 			       * since the only real const char * that is stored on the stack is
etisserant@1: 			       * the first one (i.e. the one that gets stored in include_stack[0],
etisserant@0: 			       * which is never free'd!
etisserant@0: 			       */
msousa@286: 			    /* NOTE: We do __NOT__ free the malloc()'d memory since 
msousa@286: 			     *       pointers to this filename will be kept by many objects
msousa@286: 			     *       in the abstract syntax tree.
msousa@286: 			     *       This will later be used to provide correct error
msousa@286: 			     *       messages during semantic analysis (stage 3)
msousa@286: 			     */
msousa@286: 			    /* free((char *)current_filename); */
etisserant@0: 			    current_filename = include_stack[include_stack_ptr].filename;
etisserant@0: 			    yy_push_state(include_end);
etisserant@0: 			  }
etisserant@0: 			}
etisserant@0: 
etisserant@0: <include_end>{file_include_pragma_end}	yy_pop_state();
msousa@756: 	/* handle the artificial file includes created by include_string(), which do not end with a '}' */
msousa@756: <include_end>.				unput_text(0); yy_pop_state(); 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************************/
etisserant@0: 	/* Handle all the state changes! */
etisserant@0: 	/*********************************/
etisserant@0: 
mjsousa@866: 	/* INITIAL -> header_state */
etisserant@0: <INITIAL>{
mario@68: 	/* NOTE: how about functions that do not declare variables, and go directly to the body_state???
etisserant@0: 	 *      - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
etisserant@0: 	 *        must have at least one input argument, so a correct declaration will have at least
etisserant@0: 	 *        one VAR_INPUT ... VAR_END construct!
etisserant@0: 	 *      - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
etisserant@0: 	 *        must have at least one input argument, so a correct declaration will have at least
etisserant@0: 	 *        one VAR_INPUT ... VAR_END construct!
etisserant@0: 	 *      - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
etisserant@0: 	 *        argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
etisserant@0: 	 *        construct!
etisserant@0: 	 *
etisserant@0: 	 *       All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
mario@68: 	 *       FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
etisserant@0: 	 *       If the code has an error, and no VAR_END before the body, we will simply
mjsousa@866: 	 *       continue in the <vardecl_state> state, untill the end of the FUNCTION, FUNCTION_BLOCK
etisserant@0: 	 *       or PROGAM.
etisserant@0: 	 */
mjsousa@868: FUNCTION				yy_push_state(header_state); return FUNCTION;
mjsousa@868: FUNCTION_BLOCK				yy_push_state(header_state); return FUNCTION_BLOCK;
mjsousa@868: PROGRAM					yy_push_state(header_state); return PROGRAM;
lbessard@3: CONFIGURATION				BEGIN(config_state); return CONFIGURATION;
etisserant@0: }
etisserant@0: 
mario@68: 	/* INITIAL -> body_state */
etisserant@0: 	/* required if the function, program, etc.. has no VAR block! */
mario@6: 	/* We comment it out since the standard does not allow this.  */
mario@6: 	/* NOTE: Even if we were to include the following code, it    */
mario@6: 	/*       would have no effect whatsoever since the above      */
mario@6: 	/*       rules will take precendence!                         */
mario@6: 	/*
etisserant@0: <INITIAL>{
mario@68: FUNCTION	BEGIN(body_state); return FUNCTION;
mario@68: FUNCTION_BLOCK	BEGIN(body_state); return FUNCTION_BLOCK;
mario@68: PROGRAM		BEGIN(body_state); return PROGRAM;
etisserant@0: }
mario@6: 	*/
mario@6: 
mjsousa@868: 	/* header_state -> (vardecl_list_state) */
mjsousa@866: <header_state>{
mjsousa@868: VAR				| /* execute the next rule's action, i.e. fall-through! */
mjsousa@868: VAR_INPUT			|
mjsousa@868: VAR_OUTPUT			|
mjsousa@868: VAR_IN_OUT			|
mjsousa@868: VAR_EXTERNAL			|
mjsousa@868: VAR_GLOBAL			|
mjsousa@868: VAR_TEMP			|
mjsousa@868: VAR_CONFIG			|
mjsousa@868: VAR_ACCESS			unput_text(0); BEGIN(vardecl_list_state);
mjsousa@868: }
mjsousa@868: 
mjsousa@868: 
mjsousa@868: 	/* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */
mjsousa@866: <vardecl_list_state>{
mjsousa@868: VAR_INPUT			| /* execute the next rule's action, i.e. fall-through! */
mjsousa@868: VAR_OUTPUT			|
mjsousa@868: VAR_IN_OUT			|
mjsousa@868: VAR_EXTERNAL			|
mjsousa@868: VAR_GLOBAL			|
mjsousa@868: VAR_TEMP			|
mjsousa@868: VAR_CONFIG			|
mjsousa@868: VAR_ACCESS			|
mjsousa@866: VAR				unput_text(0); yy_push_state(vardecl_state);
mjsousa@868: 
mjsousa@868: END_FUNCTION			unput_text(0); BEGIN(INITIAL); 
mjsousa@868: END_FUNCTION_BLOCK		unput_text(0); BEGIN(INITIAL); 
mjsousa@868: END_PROGRAM			unput_text(0); BEGIN(INITIAL); 
mjsousa@868: 
mjsousa@868: .				unput_text(0); yy_push_state(body_state); /* anything else, just change to body_state! */
mjsousa@868: }
mjsousa@868: 
mjsousa@868: 
mjsousa@868: 	/* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */
mjsousa@866: <vardecl_state>{
mjsousa@866: END_VAR				yy_pop_state(); return END_VAR; /* pop back to header_state */
mjsousa@866: }
mjsousa@866: 
etisserant@0: 
mjsousa@868: 	/* body_state -> (il_state | st_state | sfc_state) */
mario@68: <body_state>{
mjsousa@868: INITIAL_STEP			unput_text(0); BEGIN(sfc_state); 
mjsousa@868: 
mjsousa@868: {qualified_identifier}		unput_text(0); BEGIN(st_state); /* will always be followed by '[' for an array access, or ':=' as the left hand of an assignment statement */
mjsousa@868: {direct_variable_standard}	unput_text(0); BEGIN(st_state); /* will always be followed by ':=' as the left hand of an assignment statement */
mjsousa@868: 
mjsousa@868: RETURN				unput_text(0); BEGIN(st_state);
mjsousa@868: IF				unput_text(0); BEGIN(st_state);
mjsousa@868: CASE				unput_text(0); BEGIN(st_state);
mjsousa@868: FOR				unput_text(0); BEGIN(st_state);
mjsousa@868: WHILE				unput_text(0); BEGIN(st_state);
mjsousa@868: EXIT				unput_text(0); BEGIN(st_state);
mjsousa@868: REPEAT				unput_text(0); BEGIN(st_state);
lbessard@151: 
mario@6: 	/* ':=' occurs only in transitions, and not Function or FB bodies! */
mjsousa@868: :=				unput_text(0); BEGIN(st_state);
etisserant@0: 
etisserant@0: {identifier}	{int token = get_identifier_token(yytext);
mjsousa@866: 		 if ((token == prev_declared_fb_name_token) || (token == prev_declared_variable_name_token)) {
mjsousa@866: 		   /* the code has a call to a function block OR has an assingment with a variable as the lvalue */
mjsousa@868: 		   unput_text(0); BEGIN(st_state);
mjsousa@866: 		 } else
mjsousa@866:  		 if (token == prev_declared_derived_function_name_token) {
mjsousa@866: 		   /* the code has a call to a function - must be IL */
mjsousa@868: 		   unput_text(0); BEGIN(il_state);
etisserant@0: 		 } else {
mjsousa@866: 		   /* Might be a lable in IL, or a bug in ST/IL code. We jump to IL */
mjsousa@868: 		   unput_text(0); BEGIN(il_state);
etisserant@0: 		 }
etisserant@0: 		}
mario@68: 
mjsousa@868: .		unput_text(0); BEGIN(il_state); /* Don't know what it could be. This is most likely a bug. Let's just to a random state... */
mario@68: }	/* end of body_state lexical parser */
lbessard@3: 
mjsousa@866: 
mjsousa@866: 
mjsousa@868: 	/* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@3: <il_state,st_state>{
lbessard@3: END_FUNCTION		yy_pop_state(); unput_text(0);
lbessard@3: END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
lbessard@3: END_PROGRAM		yy_pop_state(); unput_text(0);
lbessard@3: END_TRANSITION		yy_pop_state(); unput_text(0);
mario@6: END_ACTION		yy_pop_state(); unput_text(0);
lbessard@3: }
lbessard@3: 
mjsousa@868: 	/* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@4: <sfc_state>{
lbessard@4: END_FUNCTION		yy_pop_state(); unput_text(0);
lbessard@4: END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
lbessard@4: END_PROGRAM		yy_pop_state(); unput_text(0);
lbessard@4: }
lbessard@4: 
etisserant@0: 	/* config -> INITIAL */
etisserant@0: END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***************************************/
etisserant@0: 	/* Next is to to remove all whitespace */
etisserant@0: 	/***************************************/
etisserant@0: 	/* NOTE: pragmas are handled right at the beginning... */
etisserant@0: 
mjsousa@866: 	/* The whitespace */
mjsousa@866: <INITIAL,header_state,config_state,body_state,vardecl_list_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace}	/* Eat any whitespace */
mjsousa@866: <il_state>{il_whitespace}		/* Eat any whitespace */
mjsousa@866: 
mjsousa@866: 	/* The comments */
mjsousa@867: <body_state,vardecl_list_state>{comment_beg}		yy_push_state(comment_state);
mjsousa@867: {comment_beg}						yy_push_state(comment_state);
mjsousa@866: <comment_state>{
mjsousa@867: {comment_beg}						{if (get_opt_nested_comments()) yy_push_state(comment_state);}
mjsousa@867: {comment_end}						yy_pop_state();
mjsousa@867: .							/* Ignore text inside comment! */
mjsousa@867: \n							/* Ignore text inside comment! */
mjsousa@866: }
msousa@267: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B.1.1 Letters, digits and identifiers */
etisserant@0: 	/*****************************************/
etisserant@0: 	/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
etisserant@0: 	 *       On the other hand, the spec does not define them as keywords,
etisserant@0: 	 *       which means they may be re-used for variable names, etc...!
etisserant@0: 	 *       The syntax parser already caters for the possibility of these
etisserant@0: 	 *       tokens being used for variable names in their declarations.
etisserant@0: 	 *       When they are declared, they will be added to the variable symbol table!
etisserant@0: 	 *       Further appearances of these tokens must no longer be parsed
etisserant@0: 	 *       as R1_tokens etc..., but rather as variable_name_tokens!
etisserant@0: 	 *
etisserant@0: 	 *       That is why the first thing we do with identifiers, even before
etisserant@0: 	 *       checking whether they may be a 'keyword', is to check whether
etisserant@0: 	 *       they have been previously declared as a variable name,
etisserant@0: 	 *
mario@13: 	 *       However, we have a dilema! Should we here also check for
mario@13: 	 *       prev_declared_derived_function_name_token?
mario@13: 	 *       If we do, then the 'MOD' default library function (defined in
mario@13: 	 *       the standard) will always be returned as a function name, and
mario@13: 	 *       it will therefore not be possible to use it as an operator as 
mario@13: 	 *       in the following ST expression 'X := Y MOD Z;' !
mario@13: 	 *       If we don't, then even it will not be possible to use 'MOD'
mario@13: 	 *       as a funtion as in 'X := MOD(Y, Z);'
mario@13: 	 *       We solve this by NOT testing for function names here, and
mario@13: 	 *       handling this function and keyword clash in bison!
etisserant@0: 	 */
mario@83:  /*
etisserant@0: {identifier} 	{int token = get_identifier_token(yytext);
mario@81: 		 // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); 
etisserant@0: 		 if ((token == prev_declared_variable_name_token) ||
mario@13: //		     (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
etisserant@0: 		     (token == prev_declared_fb_name_token)) {
mario@83: 		 // if (token != identifier_token)
mario@83: 		 // * NOTE: if we replace the above uncommented conditions with
mario@13:                   *       the simple test of (token != identifier_token), then 
mario@13:                   *       'MOD' et al must be removed from the 
mario@13:                   *       library_symbol_table as a default function name!
mario@83: 		  * //
etisserant@0: 		   yylval.ID=strdup(yytext);
mario@81: 		   // fprintf(stderr, "returning token %d\n", token); 
etisserant@0: 		   return token;
etisserant@0: 		 }
mario@83: 		 // otherwise, leave it for the other lexical parser rules... 
mario@81: 		 // fprintf(stderr, "rejecting\n"); 
etisserant@0: 		 REJECT;
etisserant@0: 		}
mario@83:  */
etisserant@0: 
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****   N O W    D O   T H E   K E Y W O R D S   *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/*****                                            *****/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 	/******************************************************/
etisserant@0: 
etisserant@0: 
mjsousa@919: REF	{if (get_opt_ref_operator()) return REF;        else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@919: REF_TO	{if (get_opt_ref_operator()) return REF_TO;     else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@919: NULL	{if (get_opt_ref_operator()) return NULL_token; else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
mjsousa@873: 
mario@82: EN	return EN;			/* Keyword */
mario@82: ENO	return ENO;			/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************/
etisserant@0: 	/* B 1.2.1 - Numeric Literals */
etisserant@0: 	/******************************/
mario@82: TRUE		return TRUE;		/* Keyword */
msousa@257: BOOL#1  	return boolean_true_literal_token;
msousa@257: BOOL#TRUE	return boolean_true_literal_token;
msousa@257: SAFEBOOL#1	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
msousa@257: SAFEBOOL#TRUE	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: 
mario@82: FALSE		return FALSE;		/* Keyword */
msousa@257: BOOL#0  	return boolean_false_literal_token;
msousa@257: BOOL#FALSE  	return boolean_false_literal_token;
msousa@257: SAFEBOOL#0	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
msousa@257: SAFEBOOL#FALSE	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************/
etisserant@0: 	/* B 1.2.3.1 - Duration */
etisserant@0: 	/************************/
mario@82: t#		return T_SHARP;		/* Delimiter */
mario@82: T#		return T_SHARP;		/* Delimiter */
mario@82: TIME		return TIME;		/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************************/
etisserant@0: 	/* B 1.2.3.2 - Time of day and Date */
etisserant@0: 	/************************************/
mario@82: TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
mario@82: TOD		return TIME_OF_DAY;	/* Keyword (Data Type) */
mario@82: DATE		return DATE;		/* Keyword (Data Type) */
mario@82: d#		return D_SHARP;		/* Delimiter */
mario@82: D#		return D_SHARP;		/* Delimiter */
mario@82: DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
mario@82: DT		return DATE_AND_TIME;	/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***********************************/
etisserant@0: 	/* B 1.3.1 - Elementary Data Types */
etisserant@0: 	/***********************************/
msousa@257: BOOL		return BOOL;		/* Keyword (Data Type) */
msousa@257: 
mario@82: BYTE		return BYTE;		/* Keyword (Data Type) */
mario@82: WORD		return WORD;		/* Keyword (Data Type) */
mario@82: DWORD		return DWORD;		/* Keyword (Data Type) */
mario@82: LWORD		return LWORD;		/* Keyword (Data Type) */
etisserant@0: 
msousa@257: SINT		return SINT;		/* Keyword (Data Type) */
msousa@257: INT		return INT;		/* Keyword (Data Type) */
msousa@257: DINT		return DINT;		/* Keyword (Data Type) */
msousa@257: LINT		return LINT;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: USINT		return USINT;		/* Keyword (Data Type) */
msousa@257: UINT		return UINT;		/* Keyword (Data Type) */
msousa@257: UDINT		return UDINT;		/* Keyword (Data Type) */
msousa@257: ULINT		return ULINT;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: REAL		return REAL;		/* Keyword (Data Type) */
msousa@257: LREAL		return LREAL;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: WSTRING		return WSTRING;		/* Keyword (Data Type) */
msousa@257: STRING		return STRING;		/* Keyword (Data Type) */
msousa@257: 
msousa@257: TIME		return TIME;		/* Keyword (Data Type) */
msousa@257: DATE		return DATE;		/* Keyword (Data Type) */
msousa@257: DT		return DT;		/* Keyword (Data Type) */
msousa@257: TOD		return TOD;		/* Keyword (Data Type) */
msousa@257: DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
msousa@257: TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
msousa@257: 
msousa@257: 	/*****************************************************************/
msousa@257: 	/* Keywords defined in "Safety Software Technical Specification" */
msousa@257: 	/*****************************************************************/
msousa@257:         /* 
msousa@257:          * NOTE: The following keywords are define in 
msousa@257:          *       "Safety Software Technical Specification,
msousa@257:          *        Part 1: Concepts and Function Blocks,  
msousa@257:          *        Version 1.0 – Official Release"
msousa@257:          *        written by PLCopen - Technical Committee 5
msousa@257:          *
msousa@257:          *        We only support these extensions and keywords
msousa@257:          *        if the apropriate command line option is given.
msousa@257:          */
msousa@257: SAFEBOOL	     {if (get_opt_safe_extensions()) {return SAFEBOOL;}          else {REJECT;}} 
msousa@257: 
msousa@257: SAFEBYTE	     {if (get_opt_safe_extensions()) {return SAFEBYTE;}          else {REJECT;}} 
msousa@257: SAFEWORD	     {if (get_opt_safe_extensions()) {return SAFEWORD;}          else {REJECT;}} 
msousa@257: SAFEDWORD	     {if (get_opt_safe_extensions()) {return SAFEDWORD;}         else{REJECT;}}
msousa@257: SAFELWORD	     {if (get_opt_safe_extensions()) {return SAFELWORD;}         else{REJECT;}}
msousa@257:                
msousa@257: SAFEREAL	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
msousa@257: SAFELREAL    	     {if (get_opt_safe_extensions()) {return SAFELREAL;}         else{REJECT;}}
msousa@257:                   
msousa@257: SAFESINT	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
msousa@257: SAFEINT	             {if (get_opt_safe_extensions()) {return SAFEINT;}           else{REJECT;}}
msousa@257: SAFEDINT	     {if (get_opt_safe_extensions()) {return SAFEDINT;}          else{REJECT;}}
msousa@257: SAFELINT             {if (get_opt_safe_extensions()) {return SAFELINT;}          else{REJECT;}}
msousa@257: 
msousa@257: SAFEUSINT            {if (get_opt_safe_extensions()) {return SAFEUSINT;}         else{REJECT;}}
msousa@257: SAFEUINT             {if (get_opt_safe_extensions()) {return SAFEUINT;}          else{REJECT;}}
msousa@257: SAFEUDINT            {if (get_opt_safe_extensions()) {return SAFEUDINT;}         else{REJECT;}}
msousa@257: SAFEULINT            {if (get_opt_safe_extensions()) {return SAFEULINT;}         else{REJECT;}}
msousa@257: 
msousa@257:  /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
msousa@257:  /*  so it is best not to support them at all... */
msousa@257:  /*
msousa@257: SAFEWSTRING          {if (get_opt_safe_extensions()) {return SAFEWSTRING;}       else{REJECT;}}
msousa@257: SAFESTRING           {if (get_opt_safe_extensions()) {return SAFESTRING;}        else{REJECT;}}
msousa@257:  */
msousa@257: 
msousa@257: SAFETIME             {if (get_opt_safe_extensions()) {return SAFETIME;}          else{REJECT;}}
msousa@257: SAFEDATE             {if (get_opt_safe_extensions()) {return SAFEDATE;}          else{REJECT;}}
msousa@257: SAFEDT               {if (get_opt_safe_extensions()) {return SAFEDT;}            else{REJECT;}}
msousa@257: SAFETOD              {if (get_opt_safe_extensions()) {return SAFETOD;}           else{REJECT;}}
msousa@257: SAFEDATE_AND_TIME    {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
msousa@257: SAFETIME_OF_DAY      {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;}   else{REJECT;}}
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.3.2 - Generic data types */
etisserant@0: 	/********************************/
etisserant@0: 	/* Strangely, the following symbols do not seem to be required! */
etisserant@0: 	/* But we include them so they become reserved words, and do not
etisserant@0: 	 * get passed up to bison as an identifier...
etisserant@0: 	 */
mario@82: ANY		return ANY;		/* Keyword (Data Type) */
mario@82: ANY_DERIVED	return ANY_DERIVED;	/* Keyword (Data Type) */
mario@82: ANY_ELEMENTARY	return ANY_ELEMENTARY;	/* Keyword (Data Type) */
mario@82: ANY_MAGNITUDE	return ANY_MAGNITUDE;	/* Keyword (Data Type) */
mario@82: ANY_NUM		return ANY_NUM;		/* Keyword (Data Type) */
mario@82: ANY_REAL	return ANY_REAL;	/* Keyword (Data Type) */
mario@82: ANY_INT		return ANY_INT;		/* Keyword (Data Type) */
mario@82: ANY_BIT		return ANY_BIT;		/* Keyword (Data Type) */
mario@82: ANY_STRING	return ANY_STRING;	/* Keyword (Data Type) */
mario@82: ANY_DATE	return ANY_DATE;	/* Keyword (Data Type) */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.3.3 - Derived data types */
etisserant@0: 	/********************************/
mario@82: ":="		return ASSIGN;		/* Delimiter */
mario@82: ".."		return DOTDOT;		/* Delimiter */
mario@82: TYPE		return TYPE;		/* Keyword */
mario@82: END_TYPE	return END_TYPE;	/* Keyword */
mario@82: ARRAY		return ARRAY;		/* Keyword */
mario@82: OF		return OF;		/* Keyword */
mario@82: STRUCT		return STRUCT;		/* Keyword */
mario@82: END_STRUCT	return END_STRUCT;	/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*********************/
etisserant@0: 	/* B 1.4 - Variables */
etisserant@0: 	/*********************/
etisserant@0: 
etisserant@0: 	/******************************************/
etisserant@0: 	/* B 1.4.3 - Declaration & Initialisation */
etisserant@0: 	/******************************************/
mario@82: VAR_INPUT	return VAR_INPUT;	/* Keyword */
mario@82: VAR_OUTPUT	return VAR_OUTPUT;	/* Keyword */
mario@82: VAR_IN_OUT	return VAR_IN_OUT;	/* Keyword */
mario@82: VAR_EXTERNAL	return VAR_EXTERNAL;	/* Keyword */
mario@82: VAR_GLOBAL	return VAR_GLOBAL;	/* Keyword */
mario@82: END_VAR		return END_VAR;		/* Keyword */
mario@82: RETAIN		return RETAIN;		/* Keyword */
mario@82: NON_RETAIN	return NON_RETAIN;	/* Keyword */
mario@82: R_EDGE		return R_EDGE;		/* Keyword */
mario@82: F_EDGE		return F_EDGE;		/* Keyword */
mario@82: AT		return AT;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/***********************/
etisserant@0: 	/* B 1.5.1 - Functions */
etisserant@0: 	/***********************/
mario@82: FUNCTION	return FUNCTION;	/* Keyword */
mario@82: END_FUNCTION	return END_FUNCTION;	/* Keyword */
mario@82: VAR		return VAR;		/* Keyword */
mario@82: CONSTANT	return CONSTANT;	/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************/
etisserant@0: 	/* B 1.5.2 - Function Blocks */
etisserant@0: 	/*****************************/
mario@82: FUNCTION_BLOCK		return FUNCTION_BLOCK;		/* Keyword */
mario@82: END_FUNCTION_BLOCK	return END_FUNCTION_BLOCK;	/* Keyword */
mario@82: VAR_TEMP		return VAR_TEMP;		/* Keyword */
mario@82: VAR			return VAR;			/* Keyword */
mario@82: NON_RETAIN		return NON_RETAIN;		/* Keyword */
mario@82: END_VAR			return END_VAR;			/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/**********************/
etisserant@0: 	/* B 1.5.3 - Programs */
etisserant@0: 	/**********************/
mario@82: PROGRAM		return PROGRAM;			/* Keyword */
mario@82: END_PROGRAM	return END_PROGRAM;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************/
etisserant@0: 	/* B 1.6 Sequential Function Chart elements */
etisserant@0: 	/********************************************/
etisserant@0: 	/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
etisserant@0: 	.* as other identifiers that may be used as variable names inside IL and ST programs.
etisserant@0: 	 * They will have to be handled when we include parsing of SFC... For now, simply
etisserant@0: 	 * ignore them!
etisserant@0: 	 */
etisserant@1: 	 
mario@82: ACTION		return ACTION;			/* Keyword */
mario@82: END_ACTION	return END_ACTION;		/* Keyword */
mario@82: 
mario@82: TRANSITION	return TRANSITION;		/* Keyword */
mario@82: END_TRANSITION	return END_TRANSITION;		/* Keyword */
mario@82: FROM		return FROM;			/* Keyword */
mario@82: TO		return TO;			/* Keyword */
mario@82: 
mario@82: INITIAL_STEP	return INITIAL_STEP;		/* Keyword */
mario@82: STEP		return STEP;			/* Keyword */
mario@82: END_STEP	return END_STEP;		/* Keyword */
etisserant@0: 
mario@74: 	/* PRIORITY is not a keyword, so we only return it when 
mario@74: 	 * it is explicitly required and we are not expecting any identifiers
mario@74: 	 * that could also use the same letter sequence (i.e. an identifier: piority)
mario@74: 	 */
mario@86: <sfc_priority_state>PRIORITY	return PRIORITY;
mario@74: 
mario@68: <sfc_qualifier_state>{
etisserant@0: L		return L;
etisserant@0: D		return D;
etisserant@0: SD		return SD;
etisserant@0: DS		return DS;
etisserant@0: SL		return SL;
etisserant@0: N		return N;
etisserant@0: P		return P;
Laurent@627: P0		return P0;
Laurent@627: P1		return P1;
etisserant@0: R		return R;
etisserant@0: S		return S;
etisserant@1: }
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 1.7 Configuration elements */
etisserant@0: 	/********************************/
mario@82: CONFIGURATION		return CONFIGURATION;		/* Keyword */
mario@82: END_CONFIGURATION	return END_CONFIGURATION;	/* Keyword */
mario@82: TASK			return TASK;			/* Keyword */
mario@82: RESOURCE		return RESOURCE;		/* Keyword */
mario@82: ON			return ON;			/* Keyword */
mario@82: END_RESOURCE		return END_RESOURCE;		/* Keyword */
mario@82: VAR_CONFIG		return VAR_CONFIG;		/* Keyword */
mario@82: VAR_ACCESS		return VAR_ACCESS;		/* Keyword */
mario@82: END_VAR			return END_VAR;			/* Keyword */
mario@82: WITH			return WITH;			/* Keyword */
mario@82: PROGRAM			return PROGRAM;			/* Keyword */
mario@82: RETAIN			return RETAIN;			/* Keyword */
mario@82: NON_RETAIN		return NON_RETAIN;		/* Keyword */
mario@82: READ_WRITE		return READ_WRITE;		/* Keyword */
mario@82: READ_ONLY		return READ_ONLY;		/* Keyword */
mario@74: 
mario@74: 	/* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when 
mario@74: 	 * it is explicitly required and we are not expecting any identifiers
mario@74: 	 * that could also use the same letter sequence (i.e. an identifier: piority, ...)
mario@74: 	 */
mario@74: <task_init_state>{
etisserant@0: PRIORITY		return PRIORITY;
etisserant@0: SINGLE			return SINGLE;
etisserant@0: INTERVAL		return INTERVAL;
mario@74: }
etisserant@0: 
etisserant@0: 	/***********************************/
etisserant@0: 	/* B 2.1 Instructions and Operands */
etisserant@0: 	/***********************************/
lbessard@3: <il_state>\n		return EOL;
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*******************/
etisserant@0: 	/* B 2.2 Operators */
etisserant@0: 	/*******************/
etisserant@0: 	/* NOTE: we can't have flex return the same token for
etisserant@0: 	 *       ANDN and &N, neither for AND and &, since
etisserant@0: 	 *       AND and ANDN are considered valid variable
etisserant@0: 	 *       function or functionblock type names!
etisserant@0: 	 *       This means that the parser may decide that the
etisserant@0: 	 *       AND or ANDN strings found in the source code
etisserant@0: 	 *       are being used as variable names
etisserant@0: 	 *       and not as operators, and will therefore transform
etisserant@0: 	 *       these tokens into indentifier tokens!
etisserant@0: 	 *       We can't have the parser thinking that the source
etisserant@0: 	 *       code contained the string AND (which may be interpreted
etisserant@0: 	 *       as a vairable name) when in reality the source code
etisserant@0: 	 *       merely contained the character &, so we use two
etisserant@0: 	 *       different tokens for & and AND (and similarly
etisserant@0: 	 *       ANDN and &N)!
etisserant@0: 	 */
mario@68:  /* The following tokens clash with ST expression operators and Standard Functions */
mario@73:  /* They are also keywords! */
mario@82: AND		return AND;		/* Keyword */
mario@82: MOD		return MOD;		/* Keyword */
mario@82: OR		return OR;		/* Keyword */
mario@82: XOR		return XOR;		/* Keyword */
mario@82: NOT		return NOT;		/* Keyword */
mario@68: 
mario@68:  /* The following tokens clash with Standard Functions */
mario@82:  /* They are keywords because they are a function name */
mario@73: <il_state>{
mario@82: ADD		return ADD;		/* Keyword (Standard Function) */
mario@82: DIV		return DIV;		/* Keyword (Standard Function) */
mario@82: EQ		return EQ;		/* Keyword (Standard Function) */
mario@82: GE		return GE;		/* Keyword (Standard Function) */
mario@82: GT		return GT;		/* Keyword (Standard Function) */
mario@82: LE		return LE;		/* Keyword (Standard Function) */
mario@82: LT		return LT;		/* Keyword (Standard Function) */
mario@82: MUL		return MUL;		/* Keyword (Standard Function) */
mario@82: NE		return NE;		/* Keyword (Standard Function) */
mario@82: SUB		return SUB;		/* Keyword (Standard Function) */
mario@73: }
mario@68: 
mario@68:  /* The following tokens clash with SFC action qualifiers */
mario@82:  /* They are not keywords! */
mario@73: <il_state>{
mario@68: S		return S;
mario@68: R		return R;
mario@73: }
mario@68: 
mario@68:  /* The following tokens clash with ST expression operators */
mario@82: &		return AND2;		/* NOT a Delimiter! */
mario@68: 
mario@68:  /* The following tokens have no clashes */
mario@82:  /* They are not keywords! */
mario@73: <il_state>{
etisserant@0: LD		return LD;
etisserant@0: LDN		return LDN;
etisserant@0: ST		return ST;
etisserant@0: STN		return STN;
etisserant@0: S1		return S1;
etisserant@0: R1		return R1;
etisserant@0: CLK		return CLK;
etisserant@0: CU		return CU;
etisserant@0: CD		return CD;
etisserant@0: PV		return PV;
etisserant@0: IN		return IN;
etisserant@0: PT		return PT;
etisserant@0: ANDN		return ANDN;
etisserant@0: &N		return ANDN2;
etisserant@0: ORN		return ORN;
etisserant@0: XORN		return XORN;
etisserant@0: CAL		return CAL;
etisserant@0: CALC		return CALC;
etisserant@0: CALCN		return CALCN;
etisserant@0: RET		return RET;
etisserant@0: RETC		return RETC;
etisserant@0: RETCN		return RETCN;
etisserant@0: JMP		return JMP;
etisserant@0: JMPC		return JMPC;
etisserant@0: JMPCN		return JMPCN;
mario@73: }
etisserant@0: 
etisserant@0: 	/***********************/
etisserant@0: 	/* B 3.1 - Expressions */
etisserant@0: 	/***********************/
mario@82: "**"		return OPER_EXP;	/* NOT a Delimiter! */
mario@82: "<>"		return OPER_NE;		/* NOT a Delimiter! */
mario@82: ">="		return OPER_GE;		/* NOT a Delimiter! */
mario@82: "<="		return OPER_LE;		/* NOT a Delimiter! */
mario@82: &		return AND2;		/* NOT a Delimiter! */
mario@82: AND		return AND;		/* Keyword */
mario@82: XOR		return XOR;		/* Keyword */
mario@82: OR		return OR;		/* Keyword */
mario@82: NOT		return NOT;		/* Keyword */
mario@82: MOD		return MOD;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B 3.2.2 Subprogram Control Statements */
etisserant@0: 	/*****************************************/
mario@82: :=		return ASSIGN;		/* Delimiter */
mario@82: =>		return SENDTO;		/* Delimiter */
mario@82: RETURN		return RETURN;		/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 3.2.3 Selection Statements */
etisserant@0: 	/********************************/
mario@82: IF		return IF;		/* Keyword */
mario@82: THEN		return THEN;		/* Keyword */
mario@82: ELSIF		return ELSIF;		/* Keyword */
mario@82: ELSE		return ELSE;		/* Keyword */
mario@82: END_IF		return END_IF;		/* Keyword */
mario@82: 
mario@82: CASE		return CASE;		/* Keyword */
mario@82: OF		return OF;		/* Keyword */
mario@82: ELSE		return ELSE;		/* Keyword */
mario@82: END_CASE	return END_CASE;	/* Keyword */
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************/
etisserant@0: 	/* B 3.2.4 Iteration Statements */
etisserant@0: 	/********************************/
mario@82: FOR		return FOR;		/* Keyword */
mario@82: TO		return TO;		/* Keyword */
mario@82: BY		return BY;		/* Keyword */
mario@82: DO		return DO;		/* Keyword */
mario@82: END_FOR		return END_FOR;		/* Keyword */
mario@82: 
mario@82: WHILE		return WHILE;		/* Keyword */
mario@82: DO		return DO;		/* Keyword */
mario@82: END_WHILE	return END_WHILE;	/* Keyword */
mario@82: 
mario@82: REPEAT		return REPEAT;		/* Keyword */
mario@82: UNTIL		return UNTIL;		/* Keyword */
mario@82: END_REPEAT	return END_REPEAT;	/* Keyword */
mario@82: 
mario@82: EXIT		return EXIT;		/* Keyword */
etisserant@0: 
etisserant@0: 
msousa@257: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****  N O W    W O R K    W I T H    V A L U E S  *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/*****                                              *****/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 	/********************************************************/
etisserant@0: 
etisserant@0: 
etisserant@0: 	/********************************************/
etisserant@0: 	/* B.1.4.1   Directly Represented Variables */
etisserant@0: 	/********************************************/
lbessard@175: {direct_variable}   {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************************/
etisserant@0: 	/* B 1.4.3 - Declaration & Initialisation */
etisserant@0: 	/******************************************/
etisserant@0: {incompl_location}	{yylval.ID=strdup(yytext); return incompl_location_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************/
etisserant@0: 	/* B 1.2.3.1 - Duration */
etisserant@0: 	/************************/
etisserant@0: {fixed_point}		{yylval.ID=strdup(yytext); return fixed_point_token;}
msousa@547: {interval}		{/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
msousa@547: {erroneous_interval}	{return erroneous_interval_token;}
msousa@547: 
msousa@547: <time_literal_state>{
msousa@547: {integer}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
msousa@547: {integer}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
msousa@547: {integer}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
msousa@547: {integer}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
msousa@547: {integer}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
msousa@547: {fixed_point}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
msousa@547: {fixed_point}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
msousa@547: {fixed_point}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
msousa@547: {fixed_point}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
msousa@547: {fixed_point}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
msousa@547: 
msousa@547: _			/* do nothing - eat it up!*/
msousa@616: \#			{/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
msousa@616: .			{/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
msousa@547: \n			{ERROR;}
msousa@547: }
etisserant@0: 	/*******************************/
etisserant@0: 	/* B.1.2.2   Character Strings */
etisserant@0: 	/*******************************/
etisserant@0: {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
etisserant@0: {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/******************************/
etisserant@0: 	/* B.1.2.1   Numeric literals */
etisserant@0: 	/******************************/
etisserant@0: {integer}		{yylval.ID=strdup(yytext); return integer_token;}
etisserant@0: {real}			{yylval.ID=strdup(yytext); return real_token;}
etisserant@0: {binary_integer}	{yylval.ID=strdup(yytext); return binary_integer_token;}
etisserant@0: {octal_integer} 	{yylval.ID=strdup(yytext); return octal_integer_token;}
etisserant@0: {hex_integer} 		{yylval.ID=strdup(yytext); return hex_integer_token;}
etisserant@0: 
etisserant@0: 
etisserant@0: 	/*****************************************/
etisserant@0: 	/* B.1.1 Letters, digits and identifiers */
etisserant@0: 	/*****************************************/
mjsousa@866: <st_state>{identifier}/({st_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
mjsousa@866: <il_state>{identifier}/({il_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
etisserant@0: {identifier} 				{yylval.ID=strdup(yytext);
mario@75: 					 // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
etisserant@0: 					 return get_identifier_token(yytext);}
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****   T H E    L E F T O V E R S . . .   *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/*****                                      *****/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 	/************************************************/
etisserant@0: 
etisserant@0: 	/* do the single character tokens...
etisserant@0: 	 *
etisserant@0: 	 *  e.g.:  ':'  '('  ')'  '+'  '*'  ...
etisserant@0: 	 */
etisserant@0: .	{return yytext[0];}
etisserant@0: 
etisserant@0: 
etisserant@0: %%
etisserant@0: 
etisserant@0: 
msousa@757: /*************************/
msousa@757: /* Tracking Functions... */
msousa@757: /*************************/
msousa@757: 
mjsousa@880: #define MAX_LINE_LENGTH 1024
msousa@757: 
msousa@757: tracking_t *GetNewTracking(FILE* in_file) {
msousa@757:   tracking_t* new_env = new tracking_t;
msousa@757:   new_env->eof = 0;
msousa@757:   new_env->lineNumber = 0;
msousa@757:   new_env->currentChar = 0;
msousa@757:   new_env->lineLength = 0;
msousa@757:   new_env->currentTokenStart = 0;
mjsousa@879:   new_env->buffer = (char*)malloc(MAX_LINE_LENGTH);
msousa@757:   new_env->in_file = in_file;
msousa@757:   return new_env;
msousa@757: }
msousa@757: 
msousa@757: 
mjsousa@879: void FreeTracking(tracking_t *tracking) {
mjsousa@879:   free(tracking->buffer);
mjsousa@879:   delete tracking;
mjsousa@879: }
mjsousa@879: 
mjsousa@879: 
msousa@757: /* GetNextChar: reads a character from input */
msousa@757: int GetNextChar(char *b, int maxBuffer) {
msousa@757:   char *p;
msousa@757:   
msousa@757:   if (  current_tracking->eof  )
msousa@757:     return 0;
msousa@757:   
msousa@757:   while (  current_tracking->currentChar >= current_tracking->lineLength  ) {
msousa@757:     current_tracking->currentChar = 0;
msousa@757:     current_tracking->currentTokenStart = 1;
msousa@757:     current_tracking->eof = false;
msousa@757:     
mjsousa@879:     p = fgets(current_tracking->buffer, MAX_LINE_LENGTH, current_tracking->in_file);
msousa@757:     if (  p == NULL  ) {
msousa@757:       if (  ferror(current_tracking->in_file)  )
msousa@757:         return 0;
msousa@757:       current_tracking->eof = true;
msousa@757:       return 0;
msousa@757:     }
msousa@757:     
msousa@757:     current_tracking->lineLength = strlen(current_tracking->buffer);
mjsousa@880:     
mjsousa@880:     /* only increment line number if the buffer was big enough to read the whole line! */
mjsousa@880:     char last_char = current_tracking->buffer[current_tracking->lineLength - 1];
mjsousa@880:     if (('\n' == last_char) || ('\r' == last_char))  // '\r' ---> CR, '\n'  ---> LF
mjsousa@880:       current_tracking->lineNumber++;
msousa@757:   }
msousa@757:   
msousa@757:   b[0] = current_tracking->buffer[current_tracking->currentChar];
msousa@757:   if (b[0] == ' ' || b[0] == '\t')
msousa@757:     current_tracking->currentTokenStart++;
msousa@757:   current_tracking->currentChar++;
msousa@757: 
msousa@757:   return b[0]==0?0:1;
msousa@757: }
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
etisserant@0: /***********************************/
etisserant@0: /* Utility function definitions... */
etisserant@0: /***********************************/
etisserant@0: 
etisserant@0: /* print the include file stack to stderr... */
etisserant@0: void print_include_stack(void) {
etisserant@0:   int i;
etisserant@0: 
etisserant@0:   if ((include_stack_ptr - 1) >= 0)
etisserant@0:     fprintf (stderr, "in file "); 
etisserant@0:   for (i = include_stack_ptr - 1; i >= 0; i--)
lbessard@136:     fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
etisserant@0: }
etisserant@0: 
etisserant@0: 
msousa@756: 
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: void handle_include_file_(FILE *filehandle, const char *filename) {
msousa@756:   if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
msousa@756:     fprintf(stderr, "Includes nested too deeply\n");
msousa@756:     exit( 1 );
msousa@756:   }
msousa@756:   
msousa@756:   yyin = filehandle;
msousa@756:   
msousa@756:   include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
msousa@756:   include_stack[include_stack_ptr].env = current_tracking;
msousa@756:   include_stack[include_stack_ptr].filename = current_filename;
msousa@756:   
msousa@756:   current_filename = strdup(filename);
msousa@756:   current_tracking = GetNewTracking(yyin);
msousa@756:   include_stack_ptr++;
msousa@756: 
msousa@756:   /* switch input buffer to new file... */
msousa@756:   yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: /* insert the code (in <source_code>) into the source code we are parsing.
msousa@756:  * This is done by creating an artificial file with that new source code, and then 'including' the file
msousa@756:  */
msousa@757: void include_string_(const char *source_code) {
msousa@756:   FILE *tmp_file = tmpfile();
msousa@756:   
msousa@756:   if(tmp_file == NULL) {
msousa@756:     perror("Error creating temp file.");
msousa@756:     exit(EXIT_FAILURE);
msousa@756:   }
msousa@756: 
msousa@756:   fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
msousa@756:   rewind(tmp_file);
msousa@756: 
msousa@756:   /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
msousa@756:   handle_include_file_(tmp_file, "");
msousa@756: //fclose(tmp_file);  /* do NOT close file. It must only be closed when we finish reading from it! */
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
msousa@756: void include_file(const char *filename) {
msousa@756:   FILE *filehandle = NULL;
msousa@756:   
msousa@756:   for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
msousa@756:     char *full_name;
msousa@756:     full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
msousa@756:     if (full_name == NULL) {
msousa@756:       fprintf(stderr, "Out of memory!\n");
msousa@756:       exit( 1 );
msousa@756:     }
msousa@756:     filehandle = fopen(full_name, "r");
msousa@756:     free(full_name);
msousa@756:   }
msousa@756: 
msousa@756:   if (NULL == filehandle) {
msousa@756:     fprintf(stderr, "Error opening included file %s\n", filename);
msousa@756:     exit( 1 );
msousa@756:   }
msousa@756: 
msousa@756:   /* now process the new file... */
msousa@756:   handle_include_file_(filehandle, filename);
msousa@756: }
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: 
msousa@756: 
etisserant@0: /* return all the text in the current token back to the input stream, except the first n chars. */
etisserant@0: void unput_text(unsigned int n) {
etisserant@0:   /* it seems that flex has a bug in that it will not correctly count the line numbers
etisserant@0:    * if we return newlines back to the input stream. These newlines will be re-counted
etisserant@0:    * a second time when they are processed again by flex.
etisserant@0:    * We therefore determine how many newlines are in the text we are returning,
etisserant@0:    * and decrement the line counter acordingly...
etisserant@0:    */
mjsousa@879:   /*
mjsousa@879:   unsigned int i;
lbessard@136:   
etisserant@0:   for (i = n; i < strlen(yytext); i++)
etisserant@0:     if (yytext[i] == '\n')
mjsousa@879:       current_tracking->lineNumber--;
mjsousa@879:   */
etisserant@0:   /* now return all the text back to the input stream... */
etisserant@0:   yyless(n);
etisserant@0: }
etisserant@0: 
etisserant@0: 
msousa@547: /* return all the text in the current token back to the input stream, 
msousa@547:  * but first return to the stream an additional character to mark the end of the token. 
msousa@547:  */
msousa@547: void unput_and_mark(const char c) {
msousa@547:   char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */
msousa@547:   unput(c);
msousa@547:   for (int i = yyleng-1; i >= 0; i--)
msousa@547:     unput(yycopy[i]);
msousa@547: 
msousa@547:   free(yycopy);
msousa@547: }
msousa@547: 
msousa@547: 
msousa@547: 
etisserant@0: /* Called by flex when it reaches the end-of-file */
etisserant@0: int yywrap(void)
etisserant@0: {
etisserant@0:   /* We reached the end of the input file... */
etisserant@0: 
etisserant@0:   /* Should we continue with another file? */
etisserant@0:   /* If so:
etisserant@0:    *   open the new file...
etisserant@0:    *   return 0;
etisserant@0:    */
etisserant@0: 
msousa@737:   /* to stop processing...
etisserant@0:    *   return 1;
etisserant@0:    */
etisserant@0: 
etisserant@0:   return 1;  /* Stop scanning at end of input file. */
etisserant@0: }
etisserant@0: 
etisserant@0: 
etisserant@0: 
msousa@757: /*******************************/
msousa@757: /* Public Interface for Bison. */
msousa@757: /*******************************/
msousa@757: 
msousa@757: /* The following functions will be called from inside bison code! */
msousa@757: 
msousa@757: void include_string(const char *source_code) {include_string_(source_code);}
msousa@757: 
msousa@757: 
msousa@757: /* Tell flex which file to parse. This function will not imediately start parsing the file.
msousa@757:  * To parse the file, you then need to call yyparse()
msousa@757:  *
mjsousa@761:  * Returns NULL on error opening the file (and a valid errno), or 0 on success.
mjsousa@761:  * Caller must close the file!
mjsousa@761:  */
mjsousa@761: FILE *parse_file(const char *filename) {
msousa@757:   FILE *filehandle = NULL;
msousa@757: 
mjsousa@761:   if((filehandle = fopen(filename, "r")) != NULL) {
mjsousa@761:     yyin = filehandle;
mjsousa@761:     current_filename = strdup(filename);
mjsousa@761:     current_tracking = GetNewTracking(yyin);
mjsousa@761:   }
mjsousa@761:   return filehandle;
msousa@757: }
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
msousa@757: 
etisserant@0: /*************************************/
etisserant@0: /* Include a main() function to test */
etisserant@0: /* the token parsing by flex....     */
etisserant@0: /*************************************/
etisserant@0: #ifdef TEST_MAIN
etisserant@0: 
etisserant@0: #include "../util/symtable.hh"
etisserant@0: 
etisserant@0: yystype yylval;
etisserant@0: YYLTYPE yylloc;
etisserant@0: 
etisserant@0: 
mario@15: 
mario@15: 
etisserant@0: int get_identifier_token(const char *identifier_str) {return 0;}
lbessard@175: int get_direct_variable_token(const char *direct_variable_str) {return 0;}
etisserant@0: 
etisserant@0: 
etisserant@0: int main(int argc, char **argv) {
etisserant@0: 
etisserant@0:   FILE *in_file;
etisserant@0:   int res;
lbessard@136: 	
etisserant@0:   if (argc == 1) {
etisserant@0:     /* Work as an interactive (command line) parser... */
etisserant@0:     while((res=yylex()))
etisserant@0:       fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
etisserant@0:   } else {
etisserant@0:     /* Work as non-interactive (file) parser... */
etisserant@0:     if((in_file = fopen(argv[1], "r")) == NULL) {
etisserant@0:       char *errmsg = strdup2("Error opening main file ", argv[1]);
etisserant@0:       perror(errmsg);
etisserant@0:       free(errmsg);
etisserant@0:       return -1;
etisserant@0:     }
etisserant@0: 
etisserant@0:     /* parse the file... */
etisserant@0:     yyin = in_file;
etisserant@0:     current_filename = argv[1];
etisserant@0:     while(1) {
etisserant@0:       res=yylex();
etisserant@0:       fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
etisserant@0:     }
etisserant@0:   }
lbessard@136: 	
lbessard@136: 	return 0;
etisserant@0: 
etisserant@0: }
etisserant@0: #endif