etisserant@0: /*
msousa@264: * matiec - a compiler for the programming languages defined in IEC 61131-3
msousa@264: *
msousa@264: * Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt)
msousa@264: *
msousa@264: * This program is free software: you can redistribute it and/or modify
msousa@264: * it under the terms of the GNU General Public License as published by
mjsousa@866: * the Free Software Foundation, either version 3 of thest_whitespaceLicense, or
msousa@264: * (at your option) any later version.
msousa@264: *
msousa@264: * This program is distributed in the hope that it will be useful,
msousa@264: * but WITHOUT ANY WARRANTY; without even the implied warranty of
msousa@264: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
msousa@264: * GNU General Public License for more details.
msousa@264: *
msousa@264: * You should have received a copy of the GNU General Public License
msousa@264: * along with this program. If not, see .
msousa@264: *
etisserant@0: *
etisserant@0: * This code is made available on the understanding that it will not be
etisserant@0: * used in safety-critical situations without a full and competent review.
etisserant@0: */
etisserant@0:
etisserant@0: /*
msousa@264: * An IEC 61131-3 compiler.
etisserant@0: *
etisserant@0: * Based on the
etisserant@0: * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
etisserant@0: *
etisserant@0: */
etisserant@0:
etisserant@0: /*
etisserant@0: * Stage 1
etisserant@0: * =======
etisserant@0: *
etisserant@0: * This file contains the lexical tokens definitions, from which
etisserant@0: * the flex utility will generate a lexical parser function.
etisserant@0: */
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* Lexical Parser Options... */
etisserant@0: /*****************************/
etisserant@0:
etisserant@0: /* The lexical analyser will never work in interactive mode,
etisserant@0: * i.e., it will only process programs saved to files, and never
etisserant@0: * programs being written inter-actively by the user.
etisserant@0: * This option saves the resulting parser from calling the
etisserant@0: * isatty() function, that seems to be generating some compile
etisserant@0: * errors under some (older?) versions of flex.
etisserant@0: */
etisserant@0: %option never-interactive
etisserant@0:
etisserant@0: /* Have the lexical analyser use a 'char *yytext' instead of an
etisserant@0: * array of char 'char yytext[??]' to store the lexical token.
etisserant@0: */
etisserant@0: %pointer
etisserant@0:
etisserant@0:
etisserant@0: /* Have the lexical analyser ignore the case of letters.
etisserant@0: * This will occur for all the tokens and keywords, but
etisserant@0: * the resulting text handed up to the syntax parser
etisserant@0: * will not be changed, and keep the original case
etisserant@0: * of the letters in the input file.
etisserant@0: */
etisserant@0: %option case-insensitive
etisserant@0:
etisserant@0: /* Have the generated lexical analyser keep track of the
etisserant@0: * line number it is currently analysing.
etisserant@0: * This is used to pass up to the syntax parser
etisserant@0: * the number of the line on which the current
etisserant@0: * token was found. It will enable the syntax parser
etisserant@0: * to generate more informatve error messages...
etisserant@0: */
etisserant@0: %option yylineno
etisserant@0:
etisserant@0: /* required for the use of the yy_pop_state() and
etisserant@0: * yy_push_state() functions
etisserant@0: */
etisserant@0: %option stack
etisserant@0:
etisserant@0: /* The '%option stack' also requests the inclusion of
etisserant@0: * the yy_top_state(), however this function is not
etisserant@0: * currently being used. This means that the compiler
etisserant@0: * is complaining about the existance of this function.
etisserant@0: * The following option removes the yy_top_state()
etisserant@0: * function from the resulting c code, so the compiler
etisserant@0: * no longer complains.
etisserant@0: */
etisserant@0: %option noyy_top_state
etisserant@0:
msousa@547: /* We will be using unput() in our flex code, so we cannot set the following option!... */
msousa@547: /*
msousa@267: %option nounput
msousa@547: */
msousa@267:
andrej@1050: /* The '%option debug' makes the generated scanner run in
andrej@1050: * debug mode.
andrej@1050: %option debug
andrej@1050: */
andrej@1050:
etisserant@0: /**************************************************/
etisserant@0: /* External Variable and Function declarations... */
etisserant@0: /**************************************************/
etisserant@0:
etisserant@0:
etisserant@0: %{
etisserant@0: /* Define TEST_MAIN to include a main() function.
etisserant@0: * Useful for testing the parser generated by flex.
etisserant@0: */
etisserant@0: /*
etisserant@0: #define TEST_MAIN
etisserant@0: */
etisserant@0: /* If lexical parser is compiled by itself, we need to define the following
etisserant@0: * constant to some string. Under normal circumstances LIBDIRECTORY is set
etisserant@0: * in the syntax parser header file...
etisserant@0: */
etisserant@0: #ifdef TEST_MAIN
etisserant@40: #define DEFAULT_LIBDIR "just_testing"
etisserant@0: #endif
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /* Required for strdup() */
etisserant@0: #include
etisserant@0:
etisserant@0: /* Required only for the declaration of abstract syntax classes
etisserant@0: * (class symbol_c; class token_c; class list_c;)
etisserant@0: * These will not be used in flex, but the token type union defined
Edouard@822: * in iec_bison.hh contains pointers to these classes, so we must include
etisserant@0: * it here.
etisserant@0: */
etisserant@0: #include "../absyntax/absyntax.hh"
etisserant@0:
mario@15:
Edouard@822: /* iec_bison.hh is generated by bison.
etisserant@0: * Contains the definition of the token constants, and the
etisserant@0: * token value type YYSTYPE (in our case, a 'const char *')
etisserant@0: */
Edouard@822: #include "iec_bison.hh"
mario@15: #include "stage1_2_priv.hh"
mario@15:
etisserant@0:
etisserant@0: /* Variable defined by the bison parser,
etisserant@0: * where the value of the tokens will be stored
etisserant@0: */
etisserant@0: extern YYSTYPE yylval;
etisserant@0:
etisserant@0: /* The name of the file currently being parsed...
etisserant@0: * Note that flex accesses and updates this global variable
msousa@757: * apropriately whenever it comes across an (*#include *) directive...
msousa@757: */
msousa@757: const char *current_filename = NULL;
msousa@757:
mario@15:
etisserant@0:
etisserant@0: /* Variable defined by the bison parser.
etisserant@0: * It must be initialised with the location
etisserant@0: * of the token being parsed.
etisserant@0: * This is only needed if we want to keep
etisserant@0: * track of the locations, in order to give
etisserant@0: * more meaningful error messages!
etisserant@0: */
conti@415: /*
conti@415: *extern YYLTYPE yylloc;
conti@415: b*/
lbessard@136: #define YY_INPUT(buf,result,max_size) {\
lbessard@136: result = GetNextChar(buf, max_size);\
lbessard@136: if ( result <= 0 )\
lbessard@136: result = YY_NULL;\
lbessard@136: }
lbessard@136:
msousa@287:
etisserant@0: /* Macro that is executed for every action.
etisserant@0: * We use it to pass the location of the token
etisserant@0: * back to the bison parser...
etisserant@0: */
lbessard@136: #define YY_USER_ACTION {\
msousa@287: yylloc.first_line = current_tracking->lineNumber; \
msousa@287: yylloc.first_column = current_tracking->currentTokenStart; \
msousa@287: yylloc.first_file = current_filename; \
msousa@287: yylloc.first_order = current_order; \
msousa@287: yylloc.last_line = current_tracking->lineNumber; \
msousa@287: yylloc.last_column = current_tracking->currentChar - 1; \
msousa@287: yylloc.last_file = current_filename; \
msousa@287: yylloc.last_order = current_order; \
msousa@287: current_tracking->currentTokenStart = current_tracking->currentChar; \
msousa@287: current_order++; \
etisserant@0: }
etisserant@0:
mjsousa@879:
mjsousa@879:
etisserant@0: /* Since this lexical parser we defined only works in ASCII based
etisserant@0: * systems, we might as well make sure it is being compiled on
etisserant@0: * one...
etisserant@0: * Lets check a few random characters...
etisserant@0: */
etisserant@0: #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
etisserant@0: ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
etisserant@0: #error This lexical analyser is not portable to a non ASCII based system.
etisserant@0: #endif
etisserant@0:
etisserant@0:
etisserant@0: /* Function only called from within flex, but defined
etisserant@0: * in iec.y!
lbessard@3: * We declare it here...
etisserant@0: *
etisserant@0: * Search for a symbol in either of the two symbol tables
etisserant@0: * and return the token id of the first symbol found.
etisserant@0: * Searches first in the variables, and only if not found
etisserant@0: * does it continue searching in the library elements
etisserant@0: */
etisserant@0: //token_id_t get_identifier_token(const char *identifier_str);
etisserant@0: int get_identifier_token(const char *identifier_str);
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0: /***************************************************/
etisserant@0: /* Forward Declaration of functions defined later. */
etisserant@0: /***************************************************/
etisserant@0:
etisserant@0: %{
etisserant@0: /* return all the text in the current token back to the input stream. */
etisserant@0: void unput_text(unsigned int n);
msousa@547: /* return all the text in the current token back to the input stream,
msousa@547: * but first return to the stream an additional character to mark the end of the token.
msousa@547: */
msousa@547: void unput_and_mark(const char c);
msousa@756:
msousa@756: void include_file(const char *include_filename);
msousa@757:
mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
mjsousa@1016: * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule.
mjsousa@1016: * However, we must store any text we consume in every rule, so we can push it back into the buffer
mjsousa@1016: * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
mjsousa@1016: * the body_state.
mjsousa@1016: */
mjsousa@1016: void append_bodystate_buffer(const char *yytext);
mjsousa@1016: void unput_bodystate_buffer(void);
mjsousa@1016: int isempty_bodystate_buffer(void);
mjsousa@1016:
msousa@757: int GetNextChar(char *b, int maxBuffer);
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /****************************/
etisserant@0: /* Lexical Parser States... */
etisserant@0: /****************************/
etisserant@0:
etisserant@0: /* NOTE: Our psrser can parse st or il code, intermixed
etisserant@0: * within the same file.
etisserant@0: * With IL we come across the issue of the EOL (end of line) token.
etisserant@0: * ST, and the declaration parts of IL do not use this token!
etisserant@0: * If the lexical analyser were to issue this token during ST
etisserant@0: * language parsing, or during the declaration of data types,
etisserant@0: * function headers, etc. in IL, the syntax parser would crash.
etisserant@0: *
etisserant@0: * We can solve this issue using one of three methods:
etisserant@0: * (1) Augment all the syntax that does not accept the EOL
etisserant@0: * token to simply ignore it. This makes the syntax
etisserant@0: * definition (in iec.y) very cluttered!
etisserant@0: * (2) Let the lexical parser figure out which language
etisserant@0: * it is parsing, and decide whether or not to issue
etisserant@0: * the EOL token. This requires the lexical parser
etisserant@0: * to have knowledge of the syntax!, making for a poor
etisserant@0: * overall organisation of the code. It would also make it
etisserant@0: * very difficult to understand the lexical parser as it
etisserant@0: * would use several states, and a state machine to transition
etisserant@0: * between the states. The state transitions would be
etisserant@0: * intermingled with the lexical parser defintion!
etisserant@0: * (3) Use a mixture of (1) and (2). The lexical analyser
etisserant@0: * merely distinguishes between function headers and function
etisserant@0: * bodies, but no longer makes a distinction between il and
etisserant@0: * st language bodies. When parsing a body, it will return
etisserant@0: * the EOL token. In other states '\n' will be ignored as
etisserant@0: * whitespace.
etisserant@0: * The ST language syntax has been augmented in the syntax
etisserant@0: * parser configuration to ignore any EOL tokens that it may
etisserant@0: * come across!
etisserant@0: * This option has both drawbacks of option (1) and (2), but
etisserant@0: * much less intensely.
etisserant@0: * The syntax that gets cluttered is limited to the ST statements
etisserant@0: * (which is rather limited, compared to the function headers and
etisserant@0: * data type declarations, etc...), while the state machine in
etisserant@0: * the lexical parser becomes very simple. All state transitions
etisserant@0: * can be handled within the lexical parser by itself, and can be
etisserant@0: * easily identified. Thus knowledge of the syntax required by
etisserant@0: * the lexical parser is very limited!
etisserant@0: *
etisserant@0: * Amazingly enough, I (Mario) got to implement option (3)
etisserant@0: * at first, requiring two basic states, decl and body.
etisserant@0: * The lexical parser will enter the body state when
etisserant@0: * it is parsing the body of a function/program/function block. The
etisserant@0: * state transition is done when we find a VAR_END that is not followed
etisserant@0: * by a VAR! This is the syntax knowledge that gets included in the
etisserant@0: * lexical analyser with this option!
etisserant@0: * Unfortunately, getting the st syntax parser to ignore EOL anywhere
etisserant@0: * where they might appear leads to conflicts. This is due to the fact
etisserant@0: * that the syntax parser uses the single look-ahead token to remove
etisserant@0: * possible conflicts. When we insert a possible EOL, the single
etisserant@0: * look ahead token becomes the EOL, which means the potential conflicts
etisserant@0: * could no longer be resolved.
etisserant@0: * Removing these conflicts would make the st syntax parser very convoluted,
etisserant@0: * and adding the extraneous EOL would make it very cluttered.
etisserant@0: * This option was therefore dropped in favour of another!
etisserant@0: *
etisserant@0: * I ended up implementing (2). Unfortunately the lexical analyser can
etisserant@0: * not easily distinguish between il and st code, since function
etisserant@0: * calls in il are very similar to function block calls in st.
etisserant@0: * We therefore use an extra 'body' state. When the lexical parser
etisserant@0: * finds that last END_VAR, it enters the body state. This state
etisserant@0: * must figure out what language is being parsed from the first few
mario@68: * tokens, and switch to the correct state (st, il or sfc) according to the
etisserant@0: * language. This means that we insert quite a bit of knowledge of the
etisserant@0: * syntax of the languages into the lexical parser. This is ugly, but it
etisserant@0: * works, and at least it is possible to keep all the state changes together
etisserant@0: * to make it easier to remove them later on if need be.
mario@68: * Once the language being parsed has been identified,
mario@68: * the body state returns any matched text back to the buffer with unput(),
mario@68: * to be later matched correctly by the apropriate language parser (st, il or sfc).
mario@68: *
mario@68: * Aditionally, in sfc state it may further recursively enter the body state
mario@68: * once again. This is because an sfc body may contain ACTIONS, which are then
mario@68: * written in one of the three languages (ST, IL or SFC), so once again we need
mario@68: * to figure out which language the ACTION in the SFC was written in. We already
mario@68: * ahve all that done in the body state, so we recursively transition to the body
mario@68: * state once again.
mario@68: * Note that in this case, when coming out of the st/il state (whichever language
mario@68: * the action was written in) the sfc state will become active again. This is done by
mario@68: * pushing and poping the previously active state!
mario@68: *
mario@68: * The sfc_qualifier_state is required because when parsing actions within an
mario@68: * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
mario@68: * to bison to work correctly, these qualifiers must be returned as tokens. However,
mario@68: * these tokens are not reserved keywords, which means it should be possible to
mario@68: * define variables/functions/FBs with any of these names (including
mario@68: * S and R which are special because they are also IL operators). So, when we are not
mario@68: * expecting any action qualifiers, flex does not return these tokens, and is free
mario@68: * to interpret them as previously defined variables/functions/... as the case may be.
mario@68: *
msousa@547: * The time_literal_state is required because TIME# literals are decomposed into
msousa@547: * portions, and wewant to send these portions one by one to bison. Each poertion will
msousa@547: * represent the value in days/hours/minutes/seconds/ms.
msousa@547: * Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
msousa@547: * we need to disable lexical identification of identifiers while parsing TIME# literals!
msousa@547: * e.g.: TIME#55d_4h_56m
msousa@547: * We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
msousa@547: * Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
msousa@547: * and return that identifier instead! So, we added this state!
msousa@547: *
mjsousa@952: * The ignore_pou_state state is only used when bison says it is doing the pre-parsing.
mjsousa@952: * During pre-parsing, the main state machine will only transition between
mjsousa@952: * INITIAL and ignore_pou_state, and from here back to INITIAL. All other
mjsousa@952: * transitions are inhibited. This inhibition is actually just enforced by making
mjsousa@952: * sure that the INITIAL ---> ignore_pou_state transition is tested before all other
mjsousa@952: * transitions coming out of INITIAL state. All other transitions are unaffected, as they
mjsousa@952: * never get a chance to be evaluated when bison is doing pre-parsing.
mjsousa@952: * Pre-parsing is a first quick scan through the whole input source code simply
mjsousa@952: * to determine the list of POUs and datatypes that will be defined in that
mjsousa@952: * code. Basically, the objective is to fill up the previously_declared_xxxxx
mjsousa@952: * maps, without processing the code itself. Once these maps have been filled up,
mjsousa@952: * bison will throw away the AST (abstract syntax tree) created up to that point,
mjsousa@952: * and scan through the same source code again, but this time creating a correct AST.
mjsousa@952: * This pre-scan allows the source code to reference POUs and datatypes that are
mjsousa@952: * only declared after they are used!
mjsousa@868: *
mjsousa@952: *
mjsousa@952: * Here is a main state machine...
mjsousa@952: * --+
mjsousa@952: * | these states are
mjsousa@952: * +------------> get_pou_name_state ----> ignore_pou_state | only active
mjsousa@952: * | | | when bison is
mjsousa@952: * | ------------------------------------------+ | doing the
mjsousa@952: * | | | pre-parsing!!
mjsousa@952: * | v --+
mjsousa@868: * +---> INITIAL <-------> config
mjsousa@868: * | \
mjsousa@868: * | V
mjsousa@868: * | header_state
mjsousa@868: * | |
mjsousa@868: * | V
mjsousa@868: * vardecl_list_state <------> var_decl
mjsousa@868: * ^ |
mjsousa@868: * | | [using push()]
mjsousa@868: * | |
mjsousa@868: * | V
mjsousa@868: * | body,
mjsousa@868: * | |
mjsousa@868: * | |
mjsousa@868: * | -------------------
mjsousa@868: * | | | |
mjsousa@868: * | v v v
mjsousa@868: * | st il sfc
mjsousa@868: * | | | | [using pop() when leaving st/il/sfc => goes to vardecl_list_state]
mjsousa@868: * | | | |
mjsousa@868: * -----------------------
mjsousa@868: *
mjsousa@868: * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then
mjsousa@868: * we also push() to the body state. This means that sometimes, when pop()ing
mjsousa@868: * from st and il, the state machine may return to the sfc state!
mjsousa@868: * - The transitions form sfc to body will be decided by bison, which will
mjsousa@868: * tell flex to do the transition by calling cmd_goto_body_state().
mjsousa@868: *
mjsousa@866: *
etisserant@0: * Possible state changes are:
mjsousa@952: * INITIAL -> goto(ignore_pou_state)
mjsousa@952: * (This transition state is only used when bison says it is doing the pre-parsing.)
mjsousa@952: * (This transition takes precedence over all other transitions!)
mjsousa@952: * (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found)
mjsousa@952: *
mario@68: * INITIAL -> goto(config_state)
mario@68: * (when a CONFIGURATION is found)
mjsousa@866: *
mjsousa@866: * INITIAL -> goto(header_state)
mjsousa@866: * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found)
mjsousa@952: *
mjsousa@866: * header_state -> goto(vardecl_list_state)
mjsousa@866: * (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration)
mjsousa@866: *
mjsousa@866: * vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state)
mjsousa@866: * (when a VAR token is found)
mjsousa@866: * vardecl_state -> pop() to (vardecl_list_state)
mjsousa@866: * (when a END_VAR token is found)
mjsousa@866: *
mjsousa@868: * vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state)
mjsousa@866: * (when the last END_VAR is found!)
mjsousa@866: *
mjsousa@868: * body_state -> goto(sfc_state)
mario@68: * (when it figures out it is parsing sfc language)
mjsousa@868: * body_state -> goto(st_state)
mario@68: * (when it figures out it is parsing st language)
mjsousa@868: * body_state -> goto(il_state)
mario@68: * (when it figures out it is parsing il language)
mjsousa@868: * st_state -> pop() to vardecl_list_state
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68: * END_ACTION or END_TRANSITION is found)
mjsousa@868: * il_state -> pop() to vardecl_list_state
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68: * END_ACTION or END_TRANSITION is found)
mjsousa@868: * sfc_state -> pop() to vardecl_list_state
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mjsousa@866: *
mjsousa@952: * ignore_pou_state -> goto(INITIAL)
mjsousa@952: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found)
mjsousa@868: * vardecl_list_state -> goto(INITIAL)
mjsousa@952: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mjsousa@952: * config_state -> goto(INITIAL)
mjsousa@952: * (when a END_CONFIGURATION is found)
mjsousa@866: *
mjsousa@866: *
mjsousa@866: * sfc_state -> push current state(sfc_state); goto(body_state)
mario@68: * (when parsing an action. This transition is requested by bison)
mjsousa@866: * sfc_state -> push current state(sfc_state); goto(sfc_qualifier_state)
mario@68: * (when expecting an action qualifier. This transition is requested by bison)
mjsousa@866: * sfc_qualifier_state -> pop() to sfc_state
mario@68: * (when no longer expecting an action qualifier. This transition is requested by bison)
mjsousa@866: *
mario@74: * config_state -> push(config_state); goto(task_init_state)
mario@74: * (when parsing a task initialisation. This transition is requested by bison)
mario@74: * task_init_state -> pop()
mario@74: * (when no longer parsing task initialisation parameters. This transition is requested by bison)
mario@74: *
mjsousa@866: *
mjsousa@866: * There is another secondary state machine for parsing comments, another for file_includes,
mjsousa@866: * and yet another for time literals.
mario@74: */
mario@68:
mario@68:
mjsousa@952: /* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */
mjsousa@952: %x ignore_pou_state
mjsousa@952: %x get_pou_name_state
mjsousa@952:
etisserant@0: /* we are parsing a configuration. */
lbessard@3: %s config_state
etisserant@0:
mario@74: /* Inside a configuration, we are parsing a task initialisation parameters */
mario@74: /* This means that PRIORITY, SINGLE and INTERVAL must be handled as
mario@74: * tokens, and not as possible identifiers. Note that the above words
mario@74: * are not keywords.
mario@74: */
mario@74: %s task_init_state
mario@74:
mjsousa@866: /* we are looking for the first VAR inside a function's, program's or function block's declaration */
mjsousa@868: /* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */
mjsousa@866: %s header_state
mjsousa@866:
mjsousa@866: /* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */
mjsousa@866: %x vardecl_list_state
mjsousa@866: /* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */
mjsousa@866: %s vardecl_state
etisserant@0:
mjsousa@868: /* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */
mario@68: %x body_state
etisserant@0:
etisserant@0: /* we are parsing il code -> flex must return the EOL tokens! */
lbessard@3: %s il_state
etisserant@0:
etisserant@0: /* we are parsing st code -> flex must not return the EOL tokens! */
lbessard@3: %s st_state
etisserant@0:
mario@68: /* we are parsing sfc code -> flex must not return the EOL tokens! */
lbessard@3: %s sfc_state
etisserant@0:
mario@68: /* we are parsing sfc code, and expecting an action qualifier. */
mario@68: %s sfc_qualifier_state
etisserant@0:
mario@86: /* we are parsing sfc code, and expecting the priority token. */
mario@86: %s sfc_priority_state
etisserant@0:
msousa@547: /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
msousa@547: %x time_literal_state
mario@75:
mjsousa@866: /* we are parsing a comment. */
mjsousa@866: %x comment_state
mjsousa@866:
mario@75:
etisserant@0: /*******************/
etisserant@0: /* File #include's */
etisserant@0: /*******************/
etisserant@0:
etisserant@0: /* We extend the IEC 61131-3 standard syntax to allow inclusion
etisserant@0: * of other files, using the IEC 61131-3 pragma directive...
etisserant@0: * The accepted syntax is:
etisserant@0: * {#include ""}
etisserant@0: */
etisserant@0:
etisserant@0: /* the "include" states are used for picking up the name of an include file */
etisserant@0: %x include_beg
etisserant@0: %x include_filename
etisserant@0: %x include_end
etisserant@0:
etisserant@0:
etisserant@0: file_include_pragma_filename [^\"]*
mjsousa@866: file_include_pragma_beg "{#include"{st_whitespace}\"
mjsousa@866: file_include_pragma_end \"{st_whitespace}"}"
etisserant@0: file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
etisserant@0:
etisserant@0:
etisserant@0: %{
mjsousa@879:
mjsousa@879: /* A counter to track the order by which each token is processed.
mjsousa@879: * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
mjsousa@879: * i.e.. it may get incremented by more than one between two consecutive tokens.
mjsousa@879: * This is due to the fact that the counter gets incremented every 'user action' in flex,
mjsousa@879: * however not every user action will result in a token being passed to bison.
mjsousa@879: * Nevertheless this is still OK, as we are only interested in the relative
mjsousa@879: * ordering of tokens...
mjsousa@879: */
mjsousa@879: static long int current_order = 0;
mjsousa@879:
etisserant@0: typedef struct {
msousa@757: int eof;
msousa@757: int lineNumber;
msousa@757: int currentChar;
msousa@757: int lineLength;
msousa@757: int currentTokenStart;
msousa@757: char *buffer;
msousa@757: FILE *in_file;
msousa@757: } tracking_t;
msousa@757:
mjsousa@879: /* A forward declaration of a function defined at the end of this file. */
mjsousa@879: void FreeTracking(tracking_t *tracking);
mjsousa@879:
mjsousa@879:
mjsousa@879: #define MAX_INCLUDE_DEPTH 16
mjsousa@879:
msousa@757: typedef struct {
etisserant@0: YY_BUFFER_STATE buffer_state;
msousa@757: tracking_t *env;
etisserant@0: const char *filename;
etisserant@0: } include_stack_t;
etisserant@0:
msousa@757: tracking_t *current_tracking = NULL;
etisserant@0: include_stack_t include_stack[MAX_INCLUDE_DEPTH];
etisserant@0: int include_stack_ptr = 0;
etisserant@0:
etisserant@0: const char *INCLUDE_DIRECTORIES[] = {
etisserant@40: DEFAULT_LIBDIR,
etisserant@40: ".",
etisserant@40: "/lib",
etisserant@40: "/usr/lib",
etisserant@40: "/usr/lib/iec",
etisserant@0: NULL /* must end with NULL!! */
etisserant@0: };
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* Prelimenary constructs... */
etisserant@0: /*****************************/
etisserant@0:
mjsousa@866: /* PRAGMAS */
mjsousa@866: /* ======= */
msousa@267: /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
msousa@267: * especially the prototypes of Functions and FBs defined in the standard
msousa@267: * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax
msousa@267: * with two pragmas to indicate that the code is to be parsed (going through the
msousa@267: * lexical, syntactical, and semantic analysers), but no code is to be generated.
msousa@267: *
msousa@267: * The accepted syntax is:
msousa@267: * {no_code_generation begin}
msousa@267: * ... prototypes ...
msousa@267: * {no_code_generation end}
msousa@267: *
msousa@267: * When parsing these prototypes the abstract syntax tree will be populated as usual,
msousa@267: * allowing the semantic analyser to correctly analyse the semantics of calls to these
msousa@267: * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
msousa@267: * between the above two pragmas.
msousa@267: */
msousa@267:
msousa@267: disable_code_generation_pragma "{disable code generation}"
msousa@267: enable_code_generation_pragma "{enable code generation}"
msousa@267:
msousa@267:
msousa@267: /* Any other pragma... */
mjsousa@869: pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}")
mjsousa@868:
mjsousa@868:
mjsousa@866:
mjsousa@866: /* COMMENTS */
mjsousa@866: /* ======== */
mjsousa@866:
mjsousa@866: /* In order to allow nested comments, comments are handled by a specific comment_state state */
mjsousa@866: /* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state.
mjsousa@866: * Whenever a "*)" is found, we pop a state off the stack
mjsousa@866: */
mjsousa@866:
mjsousa@866: /* comments... */
mjsousa@866: comment_beg "(*"
mjsousa@866: comment_end "*)"
mjsousa@866:
mjsousa@866: /* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB
mjsousa@866: * invocations with the 'NOT =>' syntax (which needs two look ahead
mjsousa@866: * tokens to be parsed correctly - and bison being LALR(1) only supports one).
mjsousa@866: * The current work around requires flex to completely parse the ' =>'
mjsousa@866: * sequence. This sequence includes whitespace and/or comments between the
mjsousa@866: * and the "=>" token.
mjsousa@866: *
mjsousa@866: * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context,
mjsousa@866: * which means we can not use the comment_state method of specifying/finding and ignoring
mjsousa@866: * comments.
mjsousa@866: *
mjsousa@866: * For this reason only, we must also define what a complete comment looks like, so
mjsousa@866: * it may be used in this rule. Since the rule uses the whitespace_or_comment
mjsousa@866: * construct as trailing context, this definition of comment must not use any
mjsousa@866: * trailing context either.
mjsousa@866: *
mjsousa@866: * Aditionally, it is not possible to define nested comments in flex without the use of
mjsousa@866: * states, so for this particular location, we do NOT support nested comments.
mjsousa@866: */
etisserant@0: /* NOTE: this seemingly unnecessary complex definition is required
etisserant@0: * to be able to eat up comments such as:
etisserant@0: * '(* Testing... ! ***** ******)'
etisserant@0: * without using the trailing context command in flex (/{context})
etisserant@0: * since {comment} itself will later be used with
etisserant@0: * trailing context ({comment}/{context})
etisserant@0: */
etisserant@0: not_asterisk [^*]
etisserant@0: not_close_parenthesis_nor_asterisk [^*)]
etisserant@0: asterisk "*"
mjsousa@866: comment_text ({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk})
etisserant@0: comment "(*"({comment_text}*)({asterisk}+)")"
etisserant@0:
etisserant@0:
mjsousa@866:
mjsousa@866: /* 3.1 Whitespace */
mjsousa@866: /* ============== */
etisserant@0: /*
mjsousa@866: * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4)
mjsousa@866: *
mjsousa@866: * Whitespace definition includes the newline character.
mjsousa@866: *
mjsousa@866: * However, the standard is inconsistent in that in IL the newline character
mjsousa@866: * is considered a token (EOL - end of line).
mjsousa@866: * In our implementation we therefore have two definitions of whitespace
mjsousa@866: * - one for ST, that includes the newline character
mjsousa@866: * - one for IL without the newline character.
mjsousa@866: * Additionally, when parsing IL, the newline character is treated as the EOL token.
mjsousa@866: * This requires the use of a state machine in the lexical parser that needs at least
mjsousa@866: * some knowledge of the syntax itself.
mjsousa@866: *
mjsousa@866: * NOTE: Our definition of whitespace will only work in ASCII!
mjsousa@866: *
etisserant@0: * NOTE: we cannot use
etisserant@0: * st_whitespace [:space:]*
etisserant@0: * since we use {st_whitespace} as trailing context. In our case
etisserant@0: * this would not constitute "dangerous trailing context", but the
etisserant@0: * lexical generator (i.e. flex) does not know this (since it does
etisserant@0: * not know which characters belong to the set [:space:]), and will
etisserant@0: * generate a "dangerous trailing context" warning!
etisserant@0: * We use this alternative just to stop the flex utility from
etisserant@0: * generating the invalid (in this case) warning...
etisserant@0: */
etisserant@0:
mjsousa@866: st_whitespace [ \f\n\r\t\v]*
mjsousa@866: il_whitespace [ \f\r\t\v]*
mjsousa@866:
mjsousa@866: st_whitespace_or_pragma_or_commentX ({st_whitespace})|({pragma})|({comment})
mjsousa@866: il_whitespace_or_pragma_or_commentX ({il_whitespace})|({pragma})|({comment})
mjsousa@866:
mjsousa@866: st_whitespace_or_pragma_or_comment {st_whitespace_or_pragma_or_commentX}*
mjsousa@866: il_whitespace_or_pragma_or_comment {il_whitespace_or_pragma_or_commentX}*
mjsousa@866:
mjsousa@866:
mjsousa@866:
mjsousa@866: qualified_identifier {identifier}(\.{identifier})+
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: The following definitions only work if the host computer
etisserant@0: * is using the ASCII maping. For e.g., with EBCDIC [A-Z]
etisserant@0: * contains non-alphabetic characters!
etisserant@0: * The correct way of doing it would be to use
etisserant@0: * the [:upper:] etc... definitions.
etisserant@0: *
etisserant@0: * Unfortunately, further on we need all printable
etisserant@0: * characters (i.e. [:print:]), but excluding '$'.
etisserant@0: * Flex does not allow sets to be composed by excluding
etisserant@0: * elements. Sets may only be constructed by adding new
etisserant@0: * elements, which means that we have to revert to
etisserant@0: * [\x20\x21\x23\x25\x26\x28-x7E] for the definition
etisserant@0: * of the printable characters with the required exceptions.
etisserant@0: * The above also implies the use of ASCII, but now we have
etisserant@0: * no way to work around it|
etisserant@0: *
etisserant@0: * The conclusion is that our parser is limited to ASCII
etisserant@0: * based host computers!!
etisserant@0: */
etisserant@0: letter [A-Za-z]
etisserant@0: digit [0-9]
etisserant@0: octal_digit [0-7]
etisserant@0: hex_digit {digit}|[A-F]
etisserant@0: identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
etisserant@0:
etisserant@0: /*******************/
etisserant@0: /* B.1.2 Constants */
etisserant@0: /*******************/
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1 Numeric literals */
etisserant@0: /******************************/
etisserant@0: integer {digit}((_?{digit})*)
msousa@547:
msousa@547: /* Some helper symbols for parsing TIME literals... */
msousa@547: integer_0_59 (0(_?))*([0-5](_?))?{digit}
msousa@547: integer_0_19 (0(_?))*([0-1](_?))?{digit}
msousa@547: integer_20_23 (0(_?))*2(_?)[0-3]
msousa@547: integer_0_23 {integer_0_19}|{integer_20_23}
msousa@547: integer_0_999 {digit}((_?{digit})?)((_?{digit})?)
msousa@547:
msousa@547:
etisserant@0: binary_integer 2#{bit}((_?{bit})*)
etisserant@0: bit [0-1]
etisserant@0: octal_integer 8#{octal_digit}((_?{octal_digit})*)
etisserant@0: hex_integer 16#{hex_digit}((_?{hex_digit})*)
etisserant@0: exponent [Ee]([+-]?){integer}
etisserant@0: /* The correct definition for real would be:
etisserant@0: * real {integer}\.{integer}({exponent}?)
etisserant@0: *
etisserant@0: * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
etisserant@0: * fixed_point {integer}\.{integer}
etisserant@0: *
etisserant@0: * This means that {integer}\.{integer} could be interpreted
etisserant@0: * as either a fixed_point or a real.
etisserant@0: * I have opted to interpret {integer}\.{integer} as a fixed_point.
etisserant@0: * In order to do this, the definition of real has been changed to:
etisserant@0: * real {integer}\.{integer}{exponent}
etisserant@0: *
etisserant@0: * This means that the syntax parser now needs to define a real to be
etisserant@0: * either a real_token or a fixed_point_token!
etisserant@0: */
etisserant@0: real {integer}\.{integer}{exponent}
etisserant@0:
etisserant@0:
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2 Character Strings */
etisserant@0: /*******************************/
etisserant@0: /*
etisserant@0: common_character_representation :=
etisserant@0:
etisserant@0: |'$$'
etisserant@0: |'$L'|'$N'|'$P'|'$R'|'$T'
etisserant@0: |'$l'|'$n'|'$p'|'$r'|'$t'
etisserant@0:
etisserant@0: NOTE: $ = 0x24
etisserant@0: " = 0x22
etisserant@0: ' = 0x27
etisserant@0:
etisserant@0: printable chars in ASCII: 0x20-0x7E
etisserant@0: */
etisserant@0:
etisserant@0: esc_char_u $L|$N|$P|$R|$T
etisserant@0: esc_char_l $l|$n|$p|$r|$t
etisserant@0: esc_char $$|{esc_char_u}|{esc_char_l}
etisserant@0: double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit})
etisserant@0: single_byte_char (${hex_digit}{hex_digit})
etisserant@0:
etisserant@0: /* WARNING:
etisserant@0: * This definition is only valid in ASCII...
etisserant@0: *
etisserant@0: * Flex includes the function print_char() that defines
etisserant@0: * all printable characters portably (i.e. whatever character
etisserant@0: * encoding is currently being used , ASCII, EBCDIC, etc...)
etisserant@0: * Unfortunately, we cannot generate the definition of
etisserant@0: * common_character_representation portably, since flex
etisserant@0: * does not allow definition of sets by subtracting
etisserant@0: * elements in one set from another set.
etisserant@0: * This means we must build up the defintion of
etisserant@0: * common_character_representation using only set addition,
etisserant@0: * which leaves us with the only choice of defining the
etisserant@0: * characters non-portably...
etisserant@0: */
etisserant@0: common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
etisserant@0: double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation}
etisserant@0: single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation}
etisserant@0:
etisserant@0:
etisserant@0: double_byte_character_string \"({double_byte_character_representation}*)\"
etisserant@0: single_byte_character_string '({single_byte_character_representation}*)'
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: fixed_point {integer}\.{integer}
etisserant@0:
msousa@547:
msousa@547: /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
msousa@547: * as its definition does not match the standard's text.
msousa@547: * IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
msousa@547: * definition instead!
msousa@547: *
msousa@547: * duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
msousa@547: * interval::= days | hours | minutes | seconds | milliseconds
msousa@547: * fixed_point ::= integer [ '.' integer]
msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
msousa@547: * milliseconds ::= fixed_point 'ms'
msousa@547: *
msousa@547: *
msousa@547: * The original IEC 61131-3 v2 definition is:
msousa@547: * duration ::= ('T' | 'TIME') '#' ['-'] interval
msousa@547: * interval ::= days | hours | minutes | seconds | milliseconds
msousa@547: * fixed_point ::= integer [ '.' integer]
msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] hours
msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] minutes
msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds
msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds
msousa@547: * milliseconds ::= fixed_point 'ms'
msousa@547: */
msousa@547:
msousa@547: interval_ms_X ({integer_0_999}(\.{integer})?)ms
msousa@686: interval_s_X {integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
msousa@686: interval_m_X {integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
msousa@686: interval_h_X {integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)
msousa@547:
msousa@547: interval_ms {integer}ms|({fixed_point}ms)
msousa@547: interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s)
msousa@547: interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m)
msousa@547: interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h)
msousa@547: interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d)
msousa@547:
msousa@547: interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}
msousa@547:
msousa@686:
msousa@547: /* to help provide nice error messages, we also parse an incorrect but plausible interval... */
msousa@547: /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not
msousa@547: * be able to parse any other legal lexcial construct (besides a legal interval, but that
msousa@547: * is OK as this rule will appear _after_ the rule to parse legal intervals!).
msousa@547: */
msousa@547: fixed_point_or_integer {fixed_point}|{integer}
msousa@547: erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1 Directly Represented Variables */
etisserant@0: /********************************************/
etisserant@0: /* The correct definition, if the standard were to be followed... */
mario@11:
mario@11: location_prefix [IQM]
mario@11: size_prefix [XBWDL]
mario@11: direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*)
mario@11:
etisserant@0:
etisserant@0: /* For the MatPLC, we will accept %
etisserant@0: * as a direct variable, this being mapped onto the MatPLC point
etisserant@0: * named
etisserant@0: */
etisserant@0: /* TODO: we should not restrict it to only the accepted syntax
etisserant@0: * of as specified by the standard. MatPLC point names
etisserant@0: * have a more permissive syntax.
etisserant@0: *
etisserant@0: * e.g. "P__234"
etisserant@0: * Is a valid MatPLC point name, but not a valid !!
etisserant@0: * The same happens with names such as "333", "349+23", etc...
etisserant@0: * How can we handle these more expressive names in our case?
etisserant@0: * Remember that some direct variable may remain anonymous, with
etisserant@0: * declarations such as:
etisserant@0: * VAR
etisserant@0: * AT %I3 : BYTE := 255;
etisserant@0: * END_VAR
mario@11: * in which case we are currently using "%I3" as the variable
mario@11: * name.
mario@11: */
msousa@547: /* direct_variable_matplc %{identifier} */
msousa@547: /* direct_variable {direct_variable_standard}|{direct_variable_matplc} */
msousa@547: direct_variable {direct_variable_standard}
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: incompl_location %[IQM]\*
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: %%
etisserant@0: /* fprintf(stderr, "flex: state %d\n", YY_START); */
etisserant@0:
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** F I R S T T H I N G S F I R S T *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0:
mario@68: /***********************************************************/
mario@68: /* Handle requests sent by bison for flex to change state. */
mario@68: /***********************************************************/
mario@13: if (get_goto_body_state()) {
mario@68: yy_push_state(body_state);
mario@13: rst_goto_body_state();
mario@6: }
lbessard@3:
mario@68: if (get_goto_sfc_qualifier_state()) {
mario@68: yy_push_state(sfc_qualifier_state);
mario@68: rst_goto_sfc_qualifier_state();
mario@68: }
mario@68:
mario@86: if (get_goto_sfc_priority_state()) {
mario@86: yy_push_state(sfc_priority_state);
mario@86: rst_goto_sfc_priority_state();
mario@86: }
mario@86:
mario@74: if (get_goto_task_init_state()) {
mario@74: yy_push_state(task_init_state);
mario@74: rst_goto_task_init_state();
mario@74: }
mario@74:
mario@68: if (get_pop_state()) {
mario@68: yy_pop_state();
mario@68: rst_pop_state();
mario@68: }
mario@68:
mario@68: /***************************/
etisserant@0: /* Handle the pragmas! */
mario@68: /***************************/
etisserant@0:
etisserant@0: /* We start off by searching for the pragmas we handle in the lexical parser. */
etisserant@0: {file_include_pragma} unput_text(0); yy_push_state(include_beg);
etisserant@0:
msousa@267: /* Pragmas sent to syntax analyser (bison) */
mjsousa@1016: /* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks.
mjsousa@1016: * We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state.
mjsousa@1016: * This is because the pragmas are stored in a statement_list or instruction_list (in bison),
mjsousa@1016: * but these lists must start with the special tokens start_IL_body_token/start_ST_body_token.
mjsousa@1016: * This means that these special tokens must be generated (by the body_state) before processing
mjsousa@1016: * the pragme => we cannot process the trailing pragmas in the vardecl_list_state state.
mjsousa@1016: */
mjsousa@1016: {disable_code_generation_pragma} return disable_code_generation_pragma_token;
mjsousa@1016: {enable_code_generation_pragma} return enable_code_generation_pragma_token;
mjsousa@1016: {disable_code_generation_pragma}/(VAR) return disable_code_generation_pragma_token;
mjsousa@1016: {enable_code_generation_pragma}/(VAR) return enable_code_generation_pragma_token;
mjsousa@1016: {disable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
mjsousa@1016: {enable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
etisserant@0: /* Any other pragma we find, we just pass it up to the syntax parser... */
mario@68: /* Note that the state is exclusive, so we have to include it here too. */
mjsousa@1016: {pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
etisserant@0: {pragma} {/* return the pragmma without the enclosing '{' and '}' */
mjsousa@868: int cut = yytext[1]=='{'?2:1;
Edouard@634: yytext[strlen(yytext)-cut] = '\0';
Edouard@634: yylval.ID=strdup(yytext+cut);
etisserant@0: return pragma_token;
etisserant@0: }
mjsousa@1016: {pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */
Laurent@701: int cut = yytext[1]=='{'?2:1;
mjsousa@866: yytext[strlen(yytext)-cut] = '\0';
Laurent@701: yylval.ID=strdup(yytext+cut);
etisserant@0: return pragma_token;
etisserant@0: }
etisserant@0:
etisserant@0:
etisserant@0: /*********************************/
etisserant@0: /* Handle the file includes! */
etisserant@0: /*********************************/
etisserant@0: {file_include_pragma_beg} BEGIN(include_filename);
etisserant@0:
etisserant@0: {file_include_pragma_filename} {
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: include_file(yytext);
etisserant@0: /* switch to whatever state was active before the include file */
etisserant@0: yy_pop_state();
etisserant@0: /* now process the new file... */
etisserant@0: }
etisserant@0:
etisserant@0:
mjsousa@761: <> { /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules
mjsousa@761: * when checking for syntax errors in the input source code.
mjsousa@761: * This means that in reality flex will be asked to carry on reading the input
mjsousa@761: * even after it has reached the end of all (including the main) input files.
mjsousa@761: * In other owrds, we will be called to return more tokens, even after we have
mjsousa@761: * already returned an END_OF_INPUT token. In this case, we must carry on returning
mjsousa@761: * more END_OF_INPUT tokens.
mjsousa@761: *
mjsousa@761: * However, in the above case we will be asked to carry on reading more tokens
mjsousa@761: * from the main input file, after we have reached the end. For this to work
mjsousa@761: * correctly, we cannot close the main input file!
mjsousa@761: *
mjsousa@761: * This is why we WILL be called with include_stack_ptr == 0 multiple times,
mjsousa@761: * and why we must handle it as a special case
mjsousa@761: * that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again.
mjsousa@761: *
mjsousa@761: * As a corollory, flex can never safely close the main input file, and we must ask
mjsousa@761: * bison to close it!
mario@76: */
mario@76: if (include_stack_ptr == 0) {
mjsousa@761: // fclose(yyin); // Must not do this!!
mjsousa@879: // FreeTracking(current_tracking); // Must not do this!!
mario@73: /* yyterminate() terminates the scanner and returns a 0 to the
mario@73: * scanner's caller, indicating "all done".
mario@73: *
mario@73: * Our syntax parser (written with bison) has the token
mario@73: * END_OF_INPUT associated to the value 0, so even though
mario@73: * we don't explicitly return the token END_OF_INPUT
mario@73: * calling yyterminate() is equivalent to doing that.
mario@73: */
etisserant@0: yyterminate();
msousa@737: } else {
mjsousa@761: fclose(yyin);
mjsousa@879: FreeTracking(current_tracking);
lbessard@136: --include_stack_ptr;
etisserant@0: yy_delete_buffer(YY_CURRENT_BUFFER);
etisserant@0: yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
lbessard@136: current_tracking = include_stack[include_stack_ptr].env;
etisserant@0: /* removing constness of char *. This is safe actually,
etisserant@0: * since the only real const char * that is stored on the stack is
etisserant@1: * the first one (i.e. the one that gets stored in include_stack[0],
etisserant@0: * which is never free'd!
etisserant@0: */
msousa@286: /* NOTE: We do __NOT__ free the malloc()'d memory since
msousa@286: * pointers to this filename will be kept by many objects
msousa@286: * in the abstract syntax tree.
msousa@286: * This will later be used to provide correct error
msousa@286: * messages during semantic analysis (stage 3)
msousa@286: */
msousa@286: /* free((char *)current_filename); */
etisserant@0: current_filename = include_stack[include_stack_ptr].filename;
etisserant@0: yy_push_state(include_end);
etisserant@0: }
etisserant@0: }
etisserant@0:
etisserant@0: {file_include_pragma_end} yy_pop_state();
msousa@756: /* handle the artificial file includes created by include_string(), which do not end with a '}' */
msousa@756: . unput_text(0); yy_pop_state();
etisserant@0:
etisserant@0:
etisserant@0: /*********************************/
etisserant@0: /* Handle all the state changes! */
etisserant@0: /*********************************/
etisserant@0:
mjsousa@866: /* INITIAL -> header_state */
etisserant@0: {
mjsousa@1016: FUNCTION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION;
mjsousa@1016: FUNCTION_BLOCK{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK;
mjsousa@1016: PROGRAM{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM;
mjsousa@1016: CONFIGURATION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION;
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016: {
mjsousa@1016: {identifier} BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token;
mjsousa@1016: . BEGIN(ignore_pou_state); unput_text(0);
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016: {
mjsousa@1016: END_FUNCTION unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_PROGRAM unput_text(0); BEGIN(INITIAL);
mjsousa@1016: END_CONFIGURATION unput_text(0); BEGIN(INITIAL);
mjsousa@1016: .|\n {}/* Ignore text inside POU! (including the '\n' character!)) */
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016:
mjsousa@1016: /* header_state -> (vardecl_list_state) */
mjsousa@1016: /* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain
mjsousa@1016: * at least one VAR_XXX block.
mjsousa@1016: * How about functions that do not declare variables, and go directly to the body_state???
etisserant@0: * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
etisserant@0: * must have at least one input argument, so a correct declaration will have at least
etisserant@0: * one VAR_INPUT ... VAR_END construct!
etisserant@0: * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
etisserant@0: * must have at least one input argument, so a correct declaration will have at least
etisserant@0: * one VAR_INPUT ... VAR_END construct!
etisserant@0: * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
etisserant@0: * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
etisserant@0: * construct!
etisserant@0: *
etisserant@0: * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
mario@68: * FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
etisserant@0: * If the code has an error, and no VAR_END before the body, we will simply
mjsousa@1016: * continue in the state, until the end of the FUNCTION, FUNCTION_BLOCK
etisserant@0: * or PROGAM.
mjsousa@1016: *
mjsousa@1016: * WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function
mjsousa@1016: * may be declared with no Input, Output or IN_OUT variables. This means that the above
mjsousa@1016: * assumption is no longer valid.
mjsousa@1016: * To make things simpler (i.e. so we do not need to change the transition conditions in the flex state machine),
mjsousa@1016: * when using this non-standard extension matiec requires that Functions must include at least one
mjsousa@1016: * VAR .. END_VAR block. This implies that the above assumption remains valid!
mjsousa@1016: * This limitation of requiring a VAR .. END_VAR block is not really very limiting, as a function
mjsousa@1016: * with no input and output parameters will probably need to do some 'work', and for that it will
mjsousa@1016: * probably need some local variables declared in a VAR .. END_VAR block.
mjsousa@1016: * Note however that in the extreme it might make sense to have a function with no variables whatsoever
mjsousa@1016: * (e.g.: a function that only calls other functions that all return VOID - another non standard extension!).
mjsousa@1016: * For now we do not consider this!!
etisserant@0: */
mjsousa@866: {
mjsousa@868: VAR | /* execute the next rule's action, i.e. fall-through! */
mjsousa@868: VAR_INPUT |
mjsousa@868: VAR_OUTPUT |
mjsousa@868: VAR_IN_OUT |
mjsousa@868: VAR_EXTERNAL |
mjsousa@868: VAR_GLOBAL |
mjsousa@868: VAR_TEMP |
mjsousa@868: VAR_CONFIG |
mjsousa@1016: VAR_ACCESS unput_text(0); /* printf("\nChanging to vardecl_list_state\n") */; BEGIN(vardecl_list_state);
mjsousa@868: }
mjsousa@868:
mjsousa@868:
mjsousa@868: /* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */
mjsousa@866: {
mjsousa@868: VAR_INPUT | /* execute the next rule's action, i.e. fall-through! */
mjsousa@868: VAR_OUTPUT |
mjsousa@868: VAR_IN_OUT |
mjsousa@868: VAR_EXTERNAL |
mjsousa@868: VAR_GLOBAL |
mjsousa@868: VAR_TEMP |
mjsousa@868: VAR_CONFIG |
mjsousa@868: VAR_ACCESS |
mjsousa@866: VAR unput_text(0); yy_push_state(vardecl_state);
mjsousa@868:
mjsousa@1010: END_FUNCTION unput_text(0); BEGIN(INITIAL);
mjsousa@1010: END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL);
mjsousa@1010: END_PROGRAM unput_text(0); BEGIN(INITIAL);
mjsousa@868:
mjsousa@1016: . unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");/* anything else, just change to body_state! */
mjsousa@868: }
mjsousa@868:
mjsousa@868:
mjsousa@868: /* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */
mjsousa@866: {
mjsousa@948: END_VAR yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */
mjsousa@866: }
mjsousa@866:
etisserant@0:
mjsousa@868: /* body_state -> (il_state | st_state | sfc_state) */
mario@68: {
mjsousa@1020: {st_whitespace} {/* In body state we do not process any tokens,
mjsousa@1020: * we simply store them for later processing!
mjsousa@1020: * NOTE: all whitespace in the begining
mjsousa@1020: * of body_state must be removed so we can
mjsousa@1020: * detect ':=' in the beginning of TRANSACTION
mjsousa@1020: * conditions preceded by whitespace.
mjsousa@1020: * => only add to bodystate_buffer when not in beginning.
mjsousa@1020: */
mjsousa@1020: if (!isempty_bodystate_buffer())
mjsousa@1020: append_bodystate_buffer(yytext);
mjsousa@1020: }
mjsousa@1016: /* 'INITIAL_STEP' always used in beginning of SFCs !! */
mjsousa@1016: INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(sfc_state);}
mjsousa@1016: else {append_bodystate_buffer(yytext);}
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016: /* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */
mjsousa@1016: := { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */
mjsousa@1016: else {append_bodystate_buffer(yytext);}
mjsousa@1016: }
mjsousa@1016:
andrej@1031: /* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */
mjsousa@1016: END_ACTION | /* execute the next rule's action, i.e. fall-through! */
mjsousa@1016: END_FUNCTION |
mjsousa@1016: END_FUNCTION_BLOCK |
andrej@1031: END_TRANSITION |
mjsousa@1016: END_PROGRAM { append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;}
mjsousa@1016: .|\n { append_bodystate_buffer(yytext);
mjsousa@1016: if (strcmp(yytext, ";") == 0)
mjsousa@1016: {unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;}
mjsousa@1016: }
mjsousa@1016: /* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */
mjsousa@1016: RETURN | /* execute the next rule's action, i.e. fall-through! */
mjsousa@1016: IF |
mjsousa@1016: CASE |
mjsousa@1016: FOR |
mjsousa@1016: WHILE |
mjsousa@1016: EXIT |
mjsousa@1016: REPEAT { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(st_state); return start_ST_body_token;}
mjsousa@1016: else {append_bodystate_buffer(yytext);}
mjsousa@1016: }
mjsousa@1016:
mario@68: } /* end of body_state lexical parser */
lbessard@3:
mjsousa@866:
mjsousa@868: /* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@3: {
lbessard@3: END_FUNCTION yy_pop_state(); unput_text(0);
lbessard@3: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
lbessard@3: END_PROGRAM yy_pop_state(); unput_text(0);
lbessard@3: END_TRANSITION yy_pop_state(); unput_text(0);
mario@6: END_ACTION yy_pop_state(); unput_text(0);
lbessard@3: }
lbessard@3:
mjsousa@868: /* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */
lbessard@4: {
lbessard@4: END_FUNCTION yy_pop_state(); unput_text(0);
lbessard@4: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
lbessard@4: END_PROGRAM yy_pop_state(); unput_text(0);
lbessard@4: }
lbessard@4:
etisserant@0: /* config -> INITIAL */
etisserant@0: END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION;
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /***************************************/
etisserant@0: /* Next is to to remove all whitespace */
etisserant@0: /***************************************/
etisserant@0: /* NOTE: pragmas are handled right at the beginning... */
etisserant@0:
mjsousa@866: /* The whitespace */
mjsousa@1016: {st_whitespace} /* Eat any whitespace */
mjsousa@866: {il_whitespace} /* Eat any whitespace */
mjsousa@1020: /* NOTE: Due to the need of having the following rule have higher priority,
mjsousa@1020: * the following rule was moved to an earlier position in this file.
mjsousa@1020: {st_whitespace} {...}
mjsousa@1020: */
mjsousa@866:
mjsousa@866: /* The comments */
mjsousa@952: {comment_beg} yy_push_state(comment_state);
mjsousa@867: {comment_beg} yy_push_state(comment_state);
mjsousa@866: {
mjsousa@867: {comment_beg} {if (get_opt_nested_comments()) yy_push_state(comment_state);}
mjsousa@867: {comment_end} yy_pop_state();
mjsousa@867: . /* Ignore text inside comment! */
mjsousa@867: \n /* Ignore text inside comment! */
mjsousa@866: }
msousa@267:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
etisserant@0: * On the other hand, the spec does not define them as keywords,
etisserant@0: * which means they may be re-used for variable names, etc...!
etisserant@0: * The syntax parser already caters for the possibility of these
etisserant@0: * tokens being used for variable names in their declarations.
etisserant@0: * When they are declared, they will be added to the variable symbol table!
etisserant@0: * Further appearances of these tokens must no longer be parsed
etisserant@0: * as R1_tokens etc..., but rather as variable_name_tokens!
etisserant@0: *
etisserant@0: * That is why the first thing we do with identifiers, even before
etisserant@0: * checking whether they may be a 'keyword', is to check whether
etisserant@0: * they have been previously declared as a variable name,
etisserant@0: *
mario@13: * However, we have a dilema! Should we here also check for
mario@13: * prev_declared_derived_function_name_token?
mario@13: * If we do, then the 'MOD' default library function (defined in
mario@13: * the standard) will always be returned as a function name, and
mario@13: * it will therefore not be possible to use it as an operator as
mario@13: * in the following ST expression 'X := Y MOD Z;' !
mario@13: * If we don't, then even it will not be possible to use 'MOD'
mario@13: * as a funtion as in 'X := MOD(Y, Z);'
mario@13: * We solve this by NOT testing for function names here, and
mario@13: * handling this function and keyword clash in bison!
etisserant@0: */
mjsousa@1016: /* NOTE: The following code has been commented out as most users do not want matiec
mjsousa@1016: * to allow the use of 'R1', 'IN' ... IL operators as identifiers,
mjsousa@1016: * even though a literal reading of the standard allows this.
mjsousa@1016: * We could add this as a commadnd line option, but it is not yet done.
mjsousa@1016: * For now we just comment out the code, but leave it the commented code
mjsousa@1016: * in so we can re-activate quickly (without having to go through old commits
mjsousa@1016: * in the mercurial repository to figure out the missing code!
mjsousa@1016: */
mario@83: /*
etisserant@0: {identifier} {int token = get_identifier_token(yytext);
mario@81: // fprintf(stderr, "flex: analysing identifier '%s'...", yytext);
etisserant@0: if ((token == prev_declared_variable_name_token) ||
mario@13: // (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
etisserant@0: (token == prev_declared_fb_name_token)) {
mario@83: // if (token != identifier_token)
mario@83: // * NOTE: if we replace the above uncommented conditions with
mario@13: * the simple test of (token != identifier_token), then
mario@13: * 'MOD' et al must be removed from the
mario@13: * library_symbol_table as a default function name!
mario@83: * //
etisserant@0: yylval.ID=strdup(yytext);
mario@81: // fprintf(stderr, "returning token %d\n", token);
etisserant@0: return token;
etisserant@0: }
mario@83: // otherwise, leave it for the other lexical parser rules...
mario@81: // fprintf(stderr, "rejecting\n");
etisserant@0: REJECT;
etisserant@0: }
mario@83: */
etisserant@0:
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** N O W D O T H E K E Y W O R D S *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0:
etisserant@0:
mjsousa@934: REF {if (get_opt_ref_standard_extensions()) return REF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
mjsousa@934: DREF {if (get_opt_ref_standard_extensions()) return DREF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
mjsousa@934: REF_TO {if (get_opt_ref_standard_extensions()) return REF_TO; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
mjsousa@934: NULL {if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
mjsousa@873:
mario@82: EN return EN; /* Keyword */
mario@82: ENO return ENO; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B 1.2.1 - Numeric Literals */
etisserant@0: /******************************/
mario@82: TRUE return TRUE; /* Keyword */
msousa@257: BOOL#1 return boolean_true_literal_token;
msousa@257: BOOL#TRUE return boolean_true_literal_token;
msousa@257: SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257:
mario@82: FALSE return FALSE; /* Keyword */
msousa@257: BOOL#0 return boolean_false_literal_token;
msousa@257: BOOL#FALSE return boolean_false_literal_token;
msousa@257: SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
mario@82: t# return T_SHARP; /* Delimiter */
mario@82: T# return T_SHARP; /* Delimiter */
mario@82: TIME return TIME; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /************************************/
etisserant@0: /* B 1.2.3.2 - Time of day and Date */
etisserant@0: /************************************/
mario@82: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
mario@82: TOD return TIME_OF_DAY; /* Keyword (Data Type) */
mario@82: DATE return DATE; /* Keyword (Data Type) */
mario@82: d# return D_SHARP; /* Delimiter */
mario@82: D# return D_SHARP; /* Delimiter */
mario@82: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
mario@82: DT return DATE_AND_TIME; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /***********************************/
etisserant@0: /* B 1.3.1 - Elementary Data Types */
etisserant@0: /***********************************/
msousa@257: BOOL return BOOL; /* Keyword (Data Type) */
msousa@257:
mario@82: BYTE return BYTE; /* Keyword (Data Type) */
mario@82: WORD return WORD; /* Keyword (Data Type) */
mario@82: DWORD return DWORD; /* Keyword (Data Type) */
mario@82: LWORD return LWORD; /* Keyword (Data Type) */
etisserant@0:
msousa@257: SINT return SINT; /* Keyword (Data Type) */
msousa@257: INT return INT; /* Keyword (Data Type) */
msousa@257: DINT return DINT; /* Keyword (Data Type) */
msousa@257: LINT return LINT; /* Keyword (Data Type) */
msousa@257:
msousa@257: USINT return USINT; /* Keyword (Data Type) */
msousa@257: UINT return UINT; /* Keyword (Data Type) */
msousa@257: UDINT return UDINT; /* Keyword (Data Type) */
msousa@257: ULINT return ULINT; /* Keyword (Data Type) */
msousa@257:
msousa@257: REAL return REAL; /* Keyword (Data Type) */
msousa@257: LREAL return LREAL; /* Keyword (Data Type) */
msousa@257:
msousa@257: WSTRING return WSTRING; /* Keyword (Data Type) */
msousa@257: STRING return STRING; /* Keyword (Data Type) */
msousa@257:
msousa@257: TIME return TIME; /* Keyword (Data Type) */
msousa@257: DATE return DATE; /* Keyword (Data Type) */
msousa@257: DT return DT; /* Keyword (Data Type) */
msousa@257: TOD return TOD; /* Keyword (Data Type) */
msousa@257: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
msousa@257: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
msousa@257:
mjsousa@1014: /* A non-standard extension! */
mjsousa@1014: VOID {if (runtime_options.allow_void_datatype) {return VOID;} else {REJECT;}}
mjsousa@1014:
mjsousa@1014:
msousa@257: /*****************************************************************/
msousa@257: /* Keywords defined in "Safety Software Technical Specification" */
msousa@257: /*****************************************************************/
msousa@257: /*
msousa@257: * NOTE: The following keywords are define in
msousa@257: * "Safety Software Technical Specification,
msousa@257: * Part 1: Concepts and Function Blocks,
msousa@257: * Version 1.0 – Official Release"
msousa@257: * written by PLCopen - Technical Committee 5
msousa@257: *
msousa@257: * We only support these extensions and keywords
msousa@257: * if the apropriate command line option is given.
msousa@257: */
msousa@257: SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}}
msousa@257:
msousa@257: SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}}
msousa@257: SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}}
msousa@257: SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}}
msousa@257: SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}}
msousa@257:
msousa@257: SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
msousa@257: SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}}
msousa@257:
msousa@257: SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
msousa@257: SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}}
msousa@257: SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}}
msousa@257: SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}}
msousa@257:
msousa@257: SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}}
msousa@257: SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}}
msousa@257: SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}}
msousa@257: SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}}
msousa@257:
msousa@257: /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
msousa@257: /* so it is best not to support them at all... */
msousa@257: /*
msousa@257: SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}}
msousa@257: SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}}
msousa@257: */
msousa@257:
msousa@257: SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}}
msousa@257: SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}}
msousa@257: SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}}
msousa@257: SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}}
msousa@257: SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
msousa@257: SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}}
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.3.2 - Generic data types */
etisserant@0: /********************************/
etisserant@0: /* Strangely, the following symbols do not seem to be required! */
etisserant@0: /* But we include them so they become reserved words, and do not
etisserant@0: * get passed up to bison as an identifier...
etisserant@0: */
mario@82: ANY return ANY; /* Keyword (Data Type) */
mario@82: ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */
mario@82: ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */
mario@82: ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */
mario@82: ANY_NUM return ANY_NUM; /* Keyword (Data Type) */
mario@82: ANY_REAL return ANY_REAL; /* Keyword (Data Type) */
mario@82: ANY_INT return ANY_INT; /* Keyword (Data Type) */
mario@82: ANY_BIT return ANY_BIT; /* Keyword (Data Type) */
mario@82: ANY_STRING return ANY_STRING; /* Keyword (Data Type) */
mario@82: ANY_DATE return ANY_DATE; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.3.3 - Derived data types */
etisserant@0: /********************************/
mario@82: ":=" return ASSIGN; /* Delimiter */
mario@82: ".." return DOTDOT; /* Delimiter */
mario@82: TYPE return TYPE; /* Keyword */
mario@82: END_TYPE return END_TYPE; /* Keyword */
mario@82: ARRAY return ARRAY; /* Keyword */
mario@82: OF return OF; /* Keyword */
mario@82: STRUCT return STRUCT; /* Keyword */
mario@82: END_STRUCT return END_STRUCT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*********************/
etisserant@0: /* B 1.4 - Variables */
etisserant@0: /*********************/
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
mario@82: VAR_INPUT return VAR_INPUT; /* Keyword */
mario@82: VAR_OUTPUT return VAR_OUTPUT; /* Keyword */
mario@82: VAR_IN_OUT return VAR_IN_OUT; /* Keyword */
mario@82: VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */
mario@82: VAR_GLOBAL return VAR_GLOBAL; /* Keyword */
mario@82: END_VAR return END_VAR; /* Keyword */
mario@82: RETAIN return RETAIN; /* Keyword */
mario@82: NON_RETAIN return NON_RETAIN; /* Keyword */
mario@82: R_EDGE return R_EDGE; /* Keyword */
mario@82: F_EDGE return F_EDGE; /* Keyword */
mario@82: AT return AT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /***********************/
etisserant@0: /* B 1.5.1 - Functions */
etisserant@0: /***********************/
mjsousa@1010: /* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command.
mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: * after changing to the body_state.
mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
mjsousa@1010: * the input stream even in the presence of buggy code!
mjsousa@1010: */
mjsousa@1010: FUNCTION return FUNCTION; /* Keyword */
mjsousa@1010: END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; /* Keyword */ /* see Note above */
mjsousa@1010: VAR return VAR; /* Keyword */
mjsousa@1010: CONSTANT return CONSTANT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* B 1.5.2 - Function Blocks */
etisserant@0: /*****************************/
mjsousa@1010: /* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command.
mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: * after changing to the body_state.
mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
mjsousa@1010: * the input stream even in the presence of buggy code!
mjsousa@1010: */
mjsousa@1010: FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */
mjsousa@1010: END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; /* Keyword */ /* see Note above */
mjsousa@1010: VAR_TEMP return VAR_TEMP; /* Keyword */
mjsousa@1010: VAR return VAR; /* Keyword */
mjsousa@1010: NON_RETAIN return NON_RETAIN; /* Keyword */
mjsousa@1010: END_VAR return END_VAR; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /**********************/
etisserant@0: /* B 1.5.3 - Programs */
etisserant@0: /**********************/
mjsousa@1010: /* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command.
mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force
mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state
mjsousa@1010: * after changing to the body_state.
mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
mjsousa@1010: * the input stream even in the presence of buggy code!
mjsousa@1010: */
mjsousa@1010: PROGRAM return PROGRAM; /* Keyword */
mjsousa@1010: END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; /* Keyword */ /* see Note above */
etisserant@0:
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B 1.6 Sequential Function Chart elements */
etisserant@0: /********************************************/
etisserant@0: /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
etisserant@0: .* as other identifiers that may be used as variable names inside IL and ST programs.
etisserant@0: * They will have to be handled when we include parsing of SFC... For now, simply
etisserant@0: * ignore them!
etisserant@0: */
etisserant@1:
mario@82: ACTION return ACTION; /* Keyword */
mario@82: END_ACTION return END_ACTION; /* Keyword */
mario@82:
mario@82: TRANSITION return TRANSITION; /* Keyword */
mario@82: END_TRANSITION return END_TRANSITION; /* Keyword */
mario@82: FROM return FROM; /* Keyword */
mario@82: TO return TO; /* Keyword */
mario@82:
mario@82: INITIAL_STEP return INITIAL_STEP; /* Keyword */
mario@82: STEP return STEP; /* Keyword */
mario@82: END_STEP return END_STEP; /* Keyword */
etisserant@0:
mario@74: /* PRIORITY is not a keyword, so we only return it when
mario@74: * it is explicitly required and we are not expecting any identifiers
mario@74: * that could also use the same letter sequence (i.e. an identifier: piority)
mario@74: */
mario@86: PRIORITY return PRIORITY;
mario@74:
mario@68: {
etisserant@0: L return L;
etisserant@0: D return D;
etisserant@0: SD return SD;
etisserant@0: DS return DS;
etisserant@0: SL return SL;
etisserant@0: N return N;
etisserant@0: P return P;
Laurent@627: P0 return P0;
Laurent@627: P1 return P1;
etisserant@0: R return R;
etisserant@0: S return S;
etisserant@1: }
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.7 Configuration elements */
etisserant@0: /********************************/
mjsousa@1010: /* Note: The following END_CONFIGURATION rule will never get to be used, as we have
mjsousa@1010: * another identical rule above (closer to the rules handling the transitions
mjsousa@1010: * of the main state machine) that will always execute before this one.
mjsousa@1010: * Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command.
mjsousa@1010: * This is nt strictly necessary, but I place it here so it follwos the same
mjsousa@1010: * pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK
mjsousa@1010: */
mjsousa@1010: CONFIGURATION return CONFIGURATION; /* Keyword */
mjsousa@1010: END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; /* Keyword */ /* see 2 Notes above! */
mjsousa@1010: TASK return TASK; /* Keyword */
mjsousa@1010: RESOURCE return RESOURCE; /* Keyword */
mjsousa@1010: ON return ON; /* Keyword */
mjsousa@1010: END_RESOURCE return END_RESOURCE; /* Keyword */
mjsousa@1010: VAR_CONFIG return VAR_CONFIG; /* Keyword */
mjsousa@1010: VAR_ACCESS return VAR_ACCESS; /* Keyword */
mjsousa@1010: END_VAR return END_VAR; /* Keyword */
mjsousa@1010: WITH return WITH; /* Keyword */
mjsousa@1010: PROGRAM return PROGRAM; /* Keyword */
mjsousa@1010: RETAIN return RETAIN; /* Keyword */
mjsousa@1010: NON_RETAIN return NON_RETAIN; /* Keyword */
mjsousa@1010: READ_WRITE return READ_WRITE; /* Keyword */
mjsousa@1010: READ_ONLY return READ_ONLY; /* Keyword */
mario@74:
mario@74: /* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when
mario@74: * it is explicitly required and we are not expecting any identifiers
mario@74: * that could also use the same letter sequence (i.e. an identifier: piority, ...)
mario@74: */
mario@74: {
etisserant@0: PRIORITY return PRIORITY;
etisserant@0: SINGLE return SINGLE;
etisserant@0: INTERVAL return INTERVAL;
mario@74: }
etisserant@0:
etisserant@0: /***********************************/
etisserant@0: /* B 2.1 Instructions and Operands */
etisserant@0: /***********************************/
lbessard@3: \n return EOL;
etisserant@0:
etisserant@0:
etisserant@0: /*******************/
etisserant@0: /* B 2.2 Operators */
etisserant@0: /*******************/
etisserant@0: /* NOTE: we can't have flex return the same token for
etisserant@0: * ANDN and &N, neither for AND and &, since
etisserant@0: * AND and ANDN are considered valid variable
etisserant@0: * function or functionblock type names!
etisserant@0: * This means that the parser may decide that the
etisserant@0: * AND or ANDN strings found in the source code
etisserant@0: * are being used as variable names
etisserant@0: * and not as operators, and will therefore transform
etisserant@0: * these tokens into indentifier tokens!
etisserant@0: * We can't have the parser thinking that the source
etisserant@0: * code contained the string AND (which may be interpreted
etisserant@0: * as a vairable name) when in reality the source code
etisserant@0: * merely contained the character &, so we use two
etisserant@0: * different tokens for & and AND (and similarly
etisserant@0: * ANDN and &N)!
etisserant@0: */
mario@68: /* The following tokens clash with ST expression operators and Standard Functions */
mario@73: /* They are also keywords! */
mario@82: AND return AND; /* Keyword */
mario@82: MOD return MOD; /* Keyword */
mario@82: OR return OR; /* Keyword */
mario@82: XOR return XOR; /* Keyword */
mario@82: NOT return NOT; /* Keyword */
mario@68:
mario@68: /* The following tokens clash with Standard Functions */
mario@82: /* They are keywords because they are a function name */
mario@73: {
mario@82: ADD return ADD; /* Keyword (Standard Function) */
mario@82: DIV return DIV; /* Keyword (Standard Function) */
mario@82: EQ return EQ; /* Keyword (Standard Function) */
mario@82: GE return GE; /* Keyword (Standard Function) */
mario@82: GT return GT; /* Keyword (Standard Function) */
mario@82: LE return LE; /* Keyword (Standard Function) */
mario@82: LT return LT; /* Keyword (Standard Function) */
mario@82: MUL return MUL; /* Keyword (Standard Function) */
mario@82: NE return NE; /* Keyword (Standard Function) */
mario@82: SUB return SUB; /* Keyword (Standard Function) */
mario@73: }
mario@68:
mario@68: /* The following tokens clash with SFC action qualifiers */
mario@82: /* They are not keywords! */
mario@73: {
mario@68: S return S;
mario@68: R return R;
mario@73: }
mario@68:
mario@68: /* The following tokens clash with ST expression operators */
mario@82: & return AND2; /* NOT a Delimiter! */
mario@68:
mario@68: /* The following tokens have no clashes */
mario@82: /* They are not keywords! */
mario@73: {
etisserant@0: LD return LD;
etisserant@0: LDN return LDN;
etisserant@0: ST return ST;
etisserant@0: STN return STN;
etisserant@0: S1 return S1;
etisserant@0: R1 return R1;
etisserant@0: CLK return CLK;
etisserant@0: CU return CU;
etisserant@0: CD return CD;
etisserant@0: PV return PV;
etisserant@0: IN return IN;
etisserant@0: PT return PT;
etisserant@0: ANDN return ANDN;
etisserant@0: &N return ANDN2;
etisserant@0: ORN return ORN;
etisserant@0: XORN return XORN;
etisserant@0: CAL return CAL;
etisserant@0: CALC return CALC;
etisserant@0: CALCN return CALCN;
etisserant@0: RET return RET;
etisserant@0: RETC return RETC;
etisserant@0: RETCN return RETCN;
etisserant@0: JMP return JMP;
etisserant@0: JMPC return JMPC;
etisserant@0: JMPCN return JMPCN;
mario@73: }
etisserant@0:
etisserant@0: /***********************/
etisserant@0: /* B 3.1 - Expressions */
etisserant@0: /***********************/
mario@82: "**" return OPER_EXP; /* NOT a Delimiter! */
mario@82: "<>" return OPER_NE; /* NOT a Delimiter! */
mario@82: ">=" return OPER_GE; /* NOT a Delimiter! */
mario@82: "<=" return OPER_LE; /* NOT a Delimiter! */
mario@82: & return AND2; /* NOT a Delimiter! */
mario@82: AND return AND; /* Keyword */
mario@82: XOR return XOR; /* Keyword */
mario@82: OR return OR; /* Keyword */
mario@82: NOT return NOT; /* Keyword */
mario@82: MOD return MOD; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B 3.2.2 Subprogram Control Statements */
etisserant@0: /*****************************************/
mario@82: := return ASSIGN; /* Delimiter */
mario@82: => return SENDTO; /* Delimiter */
mario@82: RETURN return RETURN; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 3.2.3 Selection Statements */
etisserant@0: /********************************/
mario@82: IF return IF; /* Keyword */
mario@82: THEN return THEN; /* Keyword */
mario@82: ELSIF return ELSIF; /* Keyword */
mario@82: ELSE return ELSE; /* Keyword */
mario@82: END_IF return END_IF; /* Keyword */
mario@82:
mario@82: CASE return CASE; /* Keyword */
mario@82: OF return OF; /* Keyword */
mario@82: ELSE return ELSE; /* Keyword */
mario@82: END_CASE return END_CASE; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 3.2.4 Iteration Statements */
etisserant@0: /********************************/
mario@82: FOR return FOR; /* Keyword */
mario@82: TO return TO; /* Keyword */
mario@82: BY return BY; /* Keyword */
mario@82: DO return DO; /* Keyword */
mario@82: END_FOR return END_FOR; /* Keyword */
mario@82:
mario@82: WHILE return WHILE; /* Keyword */
mario@82: DO return DO; /* Keyword */
mario@82: END_WHILE return END_WHILE; /* Keyword */
mario@82:
mario@82: REPEAT return REPEAT; /* Keyword */
mario@82: UNTIL return UNTIL; /* Keyword */
mario@82: END_REPEAT return END_REPEAT; /* Keyword */
mario@82:
mario@82: EXIT return EXIT; /* Keyword */
etisserant@0:
etisserant@0:
msousa@257:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** N O W W O R K W I T H V A L U E S *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0:
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1 Directly Represented Variables */
etisserant@0: /********************************************/
lbessard@175: {direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
etisserant@0:
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;}
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;}
msousa@547: {interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
msousa@547: {erroneous_interval} {return erroneous_interval_token;}
msousa@547:
msousa@547: {
msousa@547: {integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
msousa@547: {integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
msousa@547: {integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
msousa@547: {integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
msousa@547: {integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
msousa@547: {fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
msousa@547: {fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
msousa@547: {fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
msousa@547: {fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
msousa@547: {fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
msousa@547:
msousa@547: _ /* do nothing - eat it up!*/
msousa@616: \# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
msousa@616: . {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
msousa@547: \n {ERROR;}
msousa@547: }
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2 Character Strings */
etisserant@0: /*******************************/
etisserant@0: {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
etisserant@0: {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
etisserant@0:
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1 Numeric literals */
etisserant@0: /******************************/
etisserant@0: {integer} {yylval.ID=strdup(yytext); return integer_token;}
etisserant@0: {real} {yylval.ID=strdup(yytext); return real_token;}
etisserant@0: {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;}
etisserant@0: {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;}
etisserant@0: {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;}
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
mjsousa@866: {identifier}/({st_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
mjsousa@866: {identifier}/({il_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
etisserant@0: {identifier} {yylval.ID=strdup(yytext);
mario@75: // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
etisserant@0: return get_identifier_token(yytext);}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** T H E L E F T O V E R S . . . *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0:
etisserant@0: /* do the single character tokens...
etisserant@0: *
etisserant@0: * e.g.: ':' '(' ')' '+' '*' ...
etisserant@0: */
etisserant@0: . {return yytext[0];}
etisserant@0:
etisserant@0:
etisserant@0: %%
etisserant@0:
etisserant@0:
msousa@757: /*************************/
msousa@757: /* Tracking Functions... */
msousa@757: /*************************/
msousa@757:
mjsousa@880: #define MAX_LINE_LENGTH 1024
msousa@757:
msousa@757: tracking_t *GetNewTracking(FILE* in_file) {
msousa@757: tracking_t* new_env = new tracking_t;
msousa@757: new_env->eof = 0;
msousa@757: new_env->lineNumber = 0;
msousa@757: new_env->currentChar = 0;
msousa@757: new_env->lineLength = 0;
msousa@757: new_env->currentTokenStart = 0;
mjsousa@879: new_env->buffer = (char*)malloc(MAX_LINE_LENGTH);
msousa@757: new_env->in_file = in_file;
msousa@757: return new_env;
msousa@757: }
msousa@757:
msousa@757:
mjsousa@879: void FreeTracking(tracking_t *tracking) {
mjsousa@879: free(tracking->buffer);
mjsousa@879: delete tracking;
mjsousa@879: }
mjsousa@879:
mjsousa@879:
msousa@757: /* GetNextChar: reads a character from input */
msousa@757: int GetNextChar(char *b, int maxBuffer) {
msousa@757: char *p;
msousa@757:
msousa@757: if ( current_tracking->eof )
msousa@757: return 0;
msousa@757:
msousa@757: while ( current_tracking->currentChar >= current_tracking->lineLength ) {
msousa@757: current_tracking->currentChar = 0;
msousa@757: current_tracking->currentTokenStart = 1;
msousa@757: current_tracking->eof = false;
msousa@757:
mjsousa@879: p = fgets(current_tracking->buffer, MAX_LINE_LENGTH, current_tracking->in_file);
msousa@757: if ( p == NULL ) {
msousa@757: if ( ferror(current_tracking->in_file) )
msousa@757: return 0;
msousa@757: current_tracking->eof = true;
msousa@757: return 0;
msousa@757: }
msousa@757:
msousa@757: current_tracking->lineLength = strlen(current_tracking->buffer);
mjsousa@880:
mjsousa@880: /* only increment line number if the buffer was big enough to read the whole line! */
mjsousa@880: char last_char = current_tracking->buffer[current_tracking->lineLength - 1];
mjsousa@880: if (('\n' == last_char) || ('\r' == last_char)) // '\r' ---> CR, '\n' ---> LF
mjsousa@880: current_tracking->lineNumber++;
msousa@757: }
msousa@757:
msousa@757: b[0] = current_tracking->buffer[current_tracking->currentChar];
msousa@757: if (b[0] == ' ' || b[0] == '\t')
msousa@757: current_tracking->currentTokenStart++;
msousa@757: current_tracking->currentChar++;
msousa@757:
msousa@757: return b[0]==0?0:1;
msousa@757: }
msousa@757:
msousa@757:
msousa@757:
msousa@757:
etisserant@0: /***********************************/
etisserant@0: /* Utility function definitions... */
etisserant@0: /***********************************/
etisserant@0:
etisserant@0: /* print the include file stack to stderr... */
etisserant@0: void print_include_stack(void) {
etisserant@0: int i;
etisserant@0:
etisserant@0: if ((include_stack_ptr - 1) >= 0)
etisserant@0: fprintf (stderr, "in file ");
etisserant@0: for (i = include_stack_ptr - 1; i >= 0; i--)
lbessard@136: fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
etisserant@0: }
etisserant@0:
etisserant@0:
msousa@756:
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: void handle_include_file_(FILE *filehandle, const char *filename) {
msousa@756: if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
msousa@756: fprintf(stderr, "Includes nested too deeply\n");
msousa@756: exit( 1 );
msousa@756: }
msousa@756:
msousa@756: yyin = filehandle;
msousa@756:
msousa@756: include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
msousa@756: include_stack[include_stack_ptr].env = current_tracking;
msousa@756: include_stack[include_stack_ptr].filename = current_filename;
msousa@756:
msousa@756: current_filename = strdup(filename);
msousa@756: current_tracking = GetNewTracking(yyin);
msousa@756: include_stack_ptr++;
msousa@756:
msousa@756: /* switch input buffer to new file... */
msousa@756: yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756: /* insert the code (in ) into the source code we are parsing.
msousa@756: * This is done by creating an artificial file with that new source code, and then 'including' the file
msousa@756: */
msousa@757: void include_string_(const char *source_code) {
msousa@756: FILE *tmp_file = tmpfile();
msousa@756:
msousa@756: if(tmp_file == NULL) {
msousa@756: perror("Error creating temp file.");
msousa@756: exit(EXIT_FAILURE);
msousa@756: }
msousa@756:
msousa@756: fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
msousa@756: rewind(tmp_file);
msousa@756:
msousa@756: /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
msousa@756: handle_include_file_(tmp_file, "");
msousa@756: //fclose(tmp_file); /* do NOT close file. It must only be closed when we finish reading from it! */
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756: /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
msousa@756: void include_file(const char *filename) {
msousa@756: FILE *filehandle = NULL;
msousa@756:
msousa@756: for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
msousa@756: char *full_name;
msousa@756: full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
msousa@756: if (full_name == NULL) {
msousa@756: fprintf(stderr, "Out of memory!\n");
msousa@756: exit( 1 );
msousa@756: }
msousa@756: filehandle = fopen(full_name, "r");
msousa@756: free(full_name);
msousa@756: }
msousa@756:
msousa@756: if (NULL == filehandle) {
msousa@756: fprintf(stderr, "Error opening included file %s\n", filename);
msousa@756: exit( 1 );
msousa@756: }
msousa@756:
msousa@756: /* now process the new file... */
msousa@756: handle_include_file_(filehandle, filename);
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756:
msousa@756:
etisserant@0: /* return all the text in the current token back to the input stream, except the first n chars. */
etisserant@0: void unput_text(unsigned int n) {
etisserant@0: /* it seems that flex has a bug in that it will not correctly count the line numbers
etisserant@0: * if we return newlines back to the input stream. These newlines will be re-counted
etisserant@0: * a second time when they are processed again by flex.
etisserant@0: * We therefore determine how many newlines are in the text we are returning,
etisserant@0: * and decrement the line counter acordingly...
etisserant@0: */
mjsousa@879: /*
mjsousa@879: unsigned int i;
lbessard@136:
etisserant@0: for (i = n; i < strlen(yytext); i++)
etisserant@0: if (yytext[i] == '\n')
mjsousa@879: current_tracking->lineNumber--;
mjsousa@879: */
etisserant@0: /* now return all the text back to the input stream... */
etisserant@0: yyless(n);
etisserant@0: }
etisserant@0:
etisserant@0:
msousa@547: /* return all the text in the current token back to the input stream,
msousa@547: * but first return to the stream an additional character to mark the end of the token.
msousa@547: */
msousa@547: void unput_and_mark(const char c) {
msousa@547: char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */
msousa@547: unput(c);
msousa@547: for (int i = yyleng-1; i >= 0; i--)
msousa@547: unput(yycopy[i]);
msousa@547:
msousa@547: free(yycopy);
msousa@547: }
msousa@547:
msousa@547:
msousa@547:
mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
mjsousa@1016: * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule.
mjsousa@1016: * However, we must store any text we consume in every rule, so we can push it back into the buffer
mjsousa@1016: * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
mjsousa@1016: * the body_state.
mjsousa@1016: */
mjsousa@1016: /* The buffer used by the body_state state */
mjsousa@1016: char *bodystate_buffer = NULL;
mjsousa@1016:
mjsousa@1016: /* append text to bodystate_buffer */
mjsousa@1016: void append_bodystate_buffer(const char *text) {
mjsousa@1020: //printf("<<>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
mjsousa@1016: long int old_len = 0;
mjsousa@1016: if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer);
mjsousa@1016: bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1);
mjsousa@1016: if (NULL == bodystate_buffer) ERROR;
mjsousa@1016: strcpy(bodystate_buffer + old_len, text);
mjsousa@1016: //printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer);
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016: /* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */
mjsousa@1016: void unput_bodystate_buffer(void) {
mjsousa@1016: if (NULL == bodystate_buffer) ERROR;
mjsousa@1016: //printf("<<>>\n%s\n", bodystate_buffer);
mjsousa@1016:
mjsousa@1016: for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--)
mjsousa@1016: unput(bodystate_buffer[i]);
mjsousa@1016:
mjsousa@1016: free(bodystate_buffer);
mjsousa@1016: bodystate_buffer = NULL;
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016:
mjsousa@1016: /* Return true if bodystate_buffer is empty */
mjsousa@1016: int isempty_bodystate_buffer(void) {
mjsousa@1016: return (NULL == bodystate_buffer);
mjsousa@1016: }
mjsousa@1016:
mjsousa@1016:
mjsousa@1016:
mjsousa@1016:
etisserant@0: /* Called by flex when it reaches the end-of-file */
etisserant@0: int yywrap(void)
etisserant@0: {
etisserant@0: /* We reached the end of the input file... */
etisserant@0:
etisserant@0: /* Should we continue with another file? */
etisserant@0: /* If so:
etisserant@0: * open the new file...
etisserant@0: * return 0;
etisserant@0: */
etisserant@0:
msousa@737: /* to stop processing...
etisserant@0: * return 1;
etisserant@0: */
etisserant@0:
etisserant@0: return 1; /* Stop scanning at end of input file. */
etisserant@0: }
etisserant@0:
etisserant@0:
etisserant@0:
msousa@757: /*******************************/
msousa@757: /* Public Interface for Bison. */
msousa@757: /*******************************/
msousa@757:
msousa@757: /* The following functions will be called from inside bison code! */
msousa@757:
msousa@757: void include_string(const char *source_code) {include_string_(source_code);}
msousa@757:
msousa@757:
msousa@757: /* Tell flex which file to parse. This function will not imediately start parsing the file.
msousa@757: * To parse the file, you then need to call yyparse()
msousa@757: *
mjsousa@761: * Returns NULL on error opening the file (and a valid errno), or 0 on success.
mjsousa@761: * Caller must close the file!
mjsousa@761: */
mjsousa@761: FILE *parse_file(const char *filename) {
msousa@757: FILE *filehandle = NULL;
msousa@757:
mjsousa@761: if((filehandle = fopen(filename, "r")) != NULL) {
mjsousa@761: yyin = filehandle;
mjsousa@761: current_filename = strdup(filename);
mjsousa@761: current_tracking = GetNewTracking(yyin);
mjsousa@761: }
mjsousa@761: return filehandle;
msousa@757: }
msousa@757:
msousa@757:
msousa@757:
msousa@757:
msousa@757:
msousa@757:
etisserant@0: /*************************************/
etisserant@0: /* Include a main() function to test */
etisserant@0: /* the token parsing by flex.... */
etisserant@0: /*************************************/
etisserant@0: #ifdef TEST_MAIN
etisserant@0:
etisserant@0: #include "../util/symtable.hh"
etisserant@0:
etisserant@0: yystype yylval;
etisserant@0: YYLTYPE yylloc;
etisserant@0:
etisserant@0:
mario@15:
mario@15:
etisserant@0: int get_identifier_token(const char *identifier_str) {return 0;}
lbessard@175: int get_direct_variable_token(const char *direct_variable_str) {return 0;}
etisserant@0:
etisserant@0:
etisserant@0: int main(int argc, char **argv) {
etisserant@0:
etisserant@0: FILE *in_file;
etisserant@0: int res;
lbessard@136:
etisserant@0: if (argc == 1) {
etisserant@0: /* Work as an interactive (command line) parser... */
etisserant@0: while((res=yylex()))
etisserant@0: fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
etisserant@0: } else {
etisserant@0: /* Work as non-interactive (file) parser... */
etisserant@0: if((in_file = fopen(argv[1], "r")) == NULL) {
etisserant@0: char *errmsg = strdup2("Error opening main file ", argv[1]);
etisserant@0: perror(errmsg);
etisserant@0: free(errmsg);
etisserant@0: return -1;
etisserant@0: }
etisserant@0:
etisserant@0: /* parse the file... */
etisserant@0: yyin = in_file;
etisserant@0: current_filename = argv[1];
etisserant@0: while(1) {
etisserant@0: res=yylex();
etisserant@0: fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
etisserant@0: }
etisserant@0: }
lbessard@136:
lbessard@136: return 0;
etisserant@0:
etisserant@0: }
etisserant@0: #endif