etisserant@0: /* msousa@264: * matiec - a compiler for the programming languages defined in IEC 61131-3 msousa@264: * msousa@264: * Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt) msousa@264: * msousa@264: * This program is free software: you can redistribute it and/or modify msousa@264: * it under the terms of the GNU General Public License as published by mjsousa@866: * the Free Software Foundation, either version 3 of thest_whitespaceLicense, or msousa@264: * (at your option) any later version. msousa@264: * msousa@264: * This program is distributed in the hope that it will be useful, msousa@264: * but WITHOUT ANY WARRANTY; without even the implied warranty of msousa@264: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the msousa@264: * GNU General Public License for more details. msousa@264: * msousa@264: * You should have received a copy of the GNU General Public License msousa@264: * along with this program. If not, see . msousa@264: * etisserant@0: * etisserant@0: * This code is made available on the understanding that it will not be etisserant@0: * used in safety-critical situations without a full and competent review. etisserant@0: */ etisserant@0: etisserant@0: /* msousa@264: * An IEC 61131-3 compiler. etisserant@0: * etisserant@0: * Based on the etisserant@0: * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10) etisserant@0: * etisserant@0: */ etisserant@0: etisserant@0: /* etisserant@0: * Stage 1 etisserant@0: * ======= etisserant@0: * etisserant@0: * This file contains the lexical tokens definitions, from which etisserant@0: * the flex utility will generate a lexical parser function. etisserant@0: */ etisserant@0: etisserant@0: etisserant@0: etisserant@0: etisserant@0: /*****************************/ etisserant@0: /* Lexical Parser Options... */ etisserant@0: /*****************************/ etisserant@0: etisserant@0: /* The lexical analyser will never work in interactive mode, etisserant@0: * i.e., it will only process programs saved to files, and never etisserant@0: * programs being written inter-actively by the user. etisserant@0: * This option saves the resulting parser from calling the etisserant@0: * isatty() function, that seems to be generating some compile etisserant@0: * errors under some (older?) versions of flex. etisserant@0: */ etisserant@0: %option never-interactive etisserant@0: etisserant@0: /* Have the lexical analyser use a 'char *yytext' instead of an etisserant@0: * array of char 'char yytext[??]' to store the lexical token. etisserant@0: */ etisserant@0: %pointer etisserant@0: etisserant@0: etisserant@0: /* Have the lexical analyser ignore the case of letters. etisserant@0: * This will occur for all the tokens and keywords, but etisserant@0: * the resulting text handed up to the syntax parser etisserant@0: * will not be changed, and keep the original case etisserant@0: * of the letters in the input file. etisserant@0: */ etisserant@0: %option case-insensitive etisserant@0: etisserant@0: /* Have the generated lexical analyser keep track of the etisserant@0: * line number it is currently analysing. etisserant@0: * This is used to pass up to the syntax parser etisserant@0: * the number of the line on which the current etisserant@0: * token was found. It will enable the syntax parser etisserant@0: * to generate more informatve error messages... etisserant@0: */ etisserant@0: %option yylineno etisserant@0: etisserant@0: /* required for the use of the yy_pop_state() and etisserant@0: * yy_push_state() functions etisserant@0: */ etisserant@0: %option stack etisserant@0: etisserant@0: /* The '%option stack' also requests the inclusion of etisserant@0: * the yy_top_state(), however this function is not etisserant@0: * currently being used. This means that the compiler etisserant@0: * is complaining about the existance of this function. etisserant@0: * The following option removes the yy_top_state() etisserant@0: * function from the resulting c code, so the compiler etisserant@0: * no longer complains. etisserant@0: */ etisserant@0: %option noyy_top_state etisserant@0: msousa@547: /* We will be using unput() in our flex code, so we cannot set the following option!... */ msousa@547: /* msousa@267: %option nounput msousa@547: */ msousa@267: andrej@1050: /* The '%option debug' makes the generated scanner run in andrej@1050: * debug mode. andrej@1050: %option debug andrej@1050: */ andrej@1050: etisserant@0: /**************************************************/ etisserant@0: /* External Variable and Function declarations... */ etisserant@0: /**************************************************/ etisserant@0: etisserant@0: etisserant@0: %{ etisserant@0: /* Define TEST_MAIN to include a main() function. etisserant@0: * Useful for testing the parser generated by flex. etisserant@0: */ etisserant@0: /* etisserant@0: #define TEST_MAIN etisserant@0: */ etisserant@0: /* If lexical parser is compiled by itself, we need to define the following etisserant@0: * constant to some string. Under normal circumstances LIBDIRECTORY is set etisserant@0: * in the syntax parser header file... etisserant@0: */ etisserant@0: #ifdef TEST_MAIN etisserant@40: #define DEFAULT_LIBDIR "just_testing" etisserant@0: #endif etisserant@0: etisserant@0: etisserant@0: etisserant@0: /* Required for strdup() */ etisserant@0: #include etisserant@0: etisserant@0: /* Required only for the declaration of abstract syntax classes etisserant@0: * (class symbol_c; class token_c; class list_c;) etisserant@0: * These will not be used in flex, but the token type union defined Edouard@822: * in iec_bison.hh contains pointers to these classes, so we must include etisserant@0: * it here. etisserant@0: */ etisserant@0: #include "../absyntax/absyntax.hh" etisserant@0: mario@15: Edouard@822: /* iec_bison.hh is generated by bison. etisserant@0: * Contains the definition of the token constants, and the etisserant@0: * token value type YYSTYPE (in our case, a 'const char *') etisserant@0: */ Edouard@822: #include "iec_bison.hh" mario@15: #include "stage1_2_priv.hh" mario@15: etisserant@0: etisserant@0: /* Variable defined by the bison parser, etisserant@0: * where the value of the tokens will be stored etisserant@0: */ etisserant@0: extern YYSTYPE yylval; etisserant@0: etisserant@0: /* The name of the file currently being parsed... etisserant@0: * Note that flex accesses and updates this global variable msousa@757: * apropriately whenever it comes across an (*#include *) directive... msousa@757: */ msousa@757: const char *current_filename = NULL; msousa@757: mario@15: etisserant@0: etisserant@0: /* Variable defined by the bison parser. etisserant@0: * It must be initialised with the location etisserant@0: * of the token being parsed. etisserant@0: * This is only needed if we want to keep etisserant@0: * track of the locations, in order to give etisserant@0: * more meaningful error messages! etisserant@0: */ conti@415: /* conti@415: *extern YYLTYPE yylloc; conti@415: b*/ lbessard@136: #define YY_INPUT(buf,result,max_size) {\ lbessard@136: result = GetNextChar(buf, max_size);\ lbessard@136: if ( result <= 0 )\ lbessard@136: result = YY_NULL;\ lbessard@136: } lbessard@136: msousa@287: etisserant@0: /* Macro that is executed for every action. etisserant@0: * We use it to pass the location of the token etisserant@0: * back to the bison parser... etisserant@0: */ lbessard@136: #define YY_USER_ACTION {\ msousa@287: yylloc.first_line = current_tracking->lineNumber; \ msousa@287: yylloc.first_column = current_tracking->currentTokenStart; \ msousa@287: yylloc.first_file = current_filename; \ msousa@287: yylloc.first_order = current_order; \ msousa@287: yylloc.last_line = current_tracking->lineNumber; \ msousa@287: yylloc.last_column = current_tracking->currentChar - 1; \ msousa@287: yylloc.last_file = current_filename; \ msousa@287: yylloc.last_order = current_order; \ msousa@287: current_tracking->currentTokenStart = current_tracking->currentChar; \ msousa@287: current_order++; \ etisserant@0: } etisserant@0: mjsousa@879: mjsousa@879: etisserant@0: /* Since this lexical parser we defined only works in ASCII based etisserant@0: * systems, we might as well make sure it is being compiled on etisserant@0: * one... etisserant@0: * Lets check a few random characters... etisserant@0: */ etisserant@0: #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \ etisserant@0: ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B)) etisserant@0: #error This lexical analyser is not portable to a non ASCII based system. etisserant@0: #endif etisserant@0: etisserant@0: etisserant@0: /* Function only called from within flex, but defined etisserant@0: * in iec.y! lbessard@3: * We declare it here... etisserant@0: * etisserant@0: * Search for a symbol in either of the two symbol tables etisserant@0: * and return the token id of the first symbol found. etisserant@0: * Searches first in the variables, and only if not found etisserant@0: * does it continue searching in the library elements etisserant@0: */ etisserant@0: //token_id_t get_identifier_token(const char *identifier_str); etisserant@0: int get_identifier_token(const char *identifier_str); etisserant@0: %} etisserant@0: etisserant@0: etisserant@0: /***************************************************/ etisserant@0: /* Forward Declaration of functions defined later. */ etisserant@0: /***************************************************/ etisserant@0: etisserant@0: %{ etisserant@0: /* return all the text in the current token back to the input stream. */ etisserant@0: void unput_text(unsigned int n); msousa@547: /* return all the text in the current token back to the input stream, msousa@547: * but first return to the stream an additional character to mark the end of the token. msousa@547: */ msousa@547: void unput_and_mark(const char c); msousa@756: msousa@756: void include_file(const char *include_filename); msousa@757: mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION mjsousa@1016: * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule. mjsousa@1016: * However, we must store any text we consume in every rule, so we can push it back into the buffer mjsousa@1016: * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by mjsousa@1016: * the body_state. mjsousa@1016: */ mjsousa@1016: void append_bodystate_buffer(const char *yytext); mjsousa@1016: void unput_bodystate_buffer(void); mjsousa@1016: int isempty_bodystate_buffer(void); mjsousa@1016: msousa@757: int GetNextChar(char *b, int maxBuffer); etisserant@0: %} etisserant@0: etisserant@0: etisserant@0: etisserant@0: /****************************/ etisserant@0: /* Lexical Parser States... */ etisserant@0: /****************************/ etisserant@0: etisserant@0: /* NOTE: Our psrser can parse st or il code, intermixed etisserant@0: * within the same file. etisserant@0: * With IL we come across the issue of the EOL (end of line) token. etisserant@0: * ST, and the declaration parts of IL do not use this token! etisserant@0: * If the lexical analyser were to issue this token during ST etisserant@0: * language parsing, or during the declaration of data types, etisserant@0: * function headers, etc. in IL, the syntax parser would crash. etisserant@0: * etisserant@0: * We can solve this issue using one of three methods: etisserant@0: * (1) Augment all the syntax that does not accept the EOL etisserant@0: * token to simply ignore it. This makes the syntax etisserant@0: * definition (in iec.y) very cluttered! etisserant@0: * (2) Let the lexical parser figure out which language etisserant@0: * it is parsing, and decide whether or not to issue etisserant@0: * the EOL token. This requires the lexical parser etisserant@0: * to have knowledge of the syntax!, making for a poor etisserant@0: * overall organisation of the code. It would also make it etisserant@0: * very difficult to understand the lexical parser as it etisserant@0: * would use several states, and a state machine to transition etisserant@0: * between the states. The state transitions would be etisserant@0: * intermingled with the lexical parser defintion! etisserant@0: * (3) Use a mixture of (1) and (2). The lexical analyser etisserant@0: * merely distinguishes between function headers and function etisserant@0: * bodies, but no longer makes a distinction between il and etisserant@0: * st language bodies. When parsing a body, it will return etisserant@0: * the EOL token. In other states '\n' will be ignored as etisserant@0: * whitespace. etisserant@0: * The ST language syntax has been augmented in the syntax etisserant@0: * parser configuration to ignore any EOL tokens that it may etisserant@0: * come across! etisserant@0: * This option has both drawbacks of option (1) and (2), but etisserant@0: * much less intensely. etisserant@0: * The syntax that gets cluttered is limited to the ST statements etisserant@0: * (which is rather limited, compared to the function headers and etisserant@0: * data type declarations, etc...), while the state machine in etisserant@0: * the lexical parser becomes very simple. All state transitions etisserant@0: * can be handled within the lexical parser by itself, and can be etisserant@0: * easily identified. Thus knowledge of the syntax required by etisserant@0: * the lexical parser is very limited! etisserant@0: * etisserant@0: * Amazingly enough, I (Mario) got to implement option (3) etisserant@0: * at first, requiring two basic states, decl and body. etisserant@0: * The lexical parser will enter the body state when etisserant@0: * it is parsing the body of a function/program/function block. The etisserant@0: * state transition is done when we find a VAR_END that is not followed etisserant@0: * by a VAR! This is the syntax knowledge that gets included in the etisserant@0: * lexical analyser with this option! etisserant@0: * Unfortunately, getting the st syntax parser to ignore EOL anywhere etisserant@0: * where they might appear leads to conflicts. This is due to the fact etisserant@0: * that the syntax parser uses the single look-ahead token to remove etisserant@0: * possible conflicts. When we insert a possible EOL, the single etisserant@0: * look ahead token becomes the EOL, which means the potential conflicts etisserant@0: * could no longer be resolved. etisserant@0: * Removing these conflicts would make the st syntax parser very convoluted, etisserant@0: * and adding the extraneous EOL would make it very cluttered. etisserant@0: * This option was therefore dropped in favour of another! etisserant@0: * etisserant@0: * I ended up implementing (2). Unfortunately the lexical analyser can etisserant@0: * not easily distinguish between il and st code, since function etisserant@0: * calls in il are very similar to function block calls in st. etisserant@0: * We therefore use an extra 'body' state. When the lexical parser etisserant@0: * finds that last END_VAR, it enters the body state. This state etisserant@0: * must figure out what language is being parsed from the first few mario@68: * tokens, and switch to the correct state (st, il or sfc) according to the etisserant@0: * language. This means that we insert quite a bit of knowledge of the etisserant@0: * syntax of the languages into the lexical parser. This is ugly, but it etisserant@0: * works, and at least it is possible to keep all the state changes together etisserant@0: * to make it easier to remove them later on if need be. mario@68: * Once the language being parsed has been identified, mario@68: * the body state returns any matched text back to the buffer with unput(), mario@68: * to be later matched correctly by the apropriate language parser (st, il or sfc). mario@68: * mario@68: * Aditionally, in sfc state it may further recursively enter the body state mario@68: * once again. This is because an sfc body may contain ACTIONS, which are then mario@68: * written in one of the three languages (ST, IL or SFC), so once again we need mario@68: * to figure out which language the ACTION in the SFC was written in. We already mario@68: * ahve all that done in the body state, so we recursively transition to the body mario@68: * state once again. mario@68: * Note that in this case, when coming out of the st/il state (whichever language mario@68: * the action was written in) the sfc state will become active again. This is done by mario@68: * pushing and poping the previously active state! mario@68: * mario@68: * The sfc_qualifier_state is required because when parsing actions within an mario@68: * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order mario@68: * to bison to work correctly, these qualifiers must be returned as tokens. However, mario@68: * these tokens are not reserved keywords, which means it should be possible to mario@68: * define variables/functions/FBs with any of these names (including mario@68: * S and R which are special because they are also IL operators). So, when we are not mario@68: * expecting any action qualifiers, flex does not return these tokens, and is free mario@68: * to interpret them as previously defined variables/functions/... as the case may be. mario@68: * msousa@547: * The time_literal_state is required because TIME# literals are decomposed into msousa@547: * portions, and wewant to send these portions one by one to bison. Each poertion will msousa@547: * represent the value in days/hours/minutes/seconds/ms. msousa@547: * Unfortunately, some of these portions may also be lexically analysed as an identifier. So, msousa@547: * we need to disable lexical identification of identifiers while parsing TIME# literals! msousa@547: * e.g.: TIME#55d_4h_56m msousa@547: * We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m' msousa@547: * Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h', msousa@547: * and return that identifier instead! So, we added this state! msousa@547: * mjsousa@952: * The ignore_pou_state state is only used when bison says it is doing the pre-parsing. mjsousa@952: * During pre-parsing, the main state machine will only transition between mjsousa@952: * INITIAL and ignore_pou_state, and from here back to INITIAL. All other mjsousa@952: * transitions are inhibited. This inhibition is actually just enforced by making mjsousa@952: * sure that the INITIAL ---> ignore_pou_state transition is tested before all other mjsousa@952: * transitions coming out of INITIAL state. All other transitions are unaffected, as they mjsousa@952: * never get a chance to be evaluated when bison is doing pre-parsing. mjsousa@952: * Pre-parsing is a first quick scan through the whole input source code simply mjsousa@952: * to determine the list of POUs and datatypes that will be defined in that mjsousa@952: * code. Basically, the objective is to fill up the previously_declared_xxxxx mjsousa@952: * maps, without processing the code itself. Once these maps have been filled up, mjsousa@952: * bison will throw away the AST (abstract syntax tree) created up to that point, mjsousa@952: * and scan through the same source code again, but this time creating a correct AST. mjsousa@952: * This pre-scan allows the source code to reference POUs and datatypes that are mjsousa@952: * only declared after they are used! mjsousa@868: * mjsousa@952: * mjsousa@952: * Here is a main state machine... mjsousa@952: * --+ mjsousa@952: * | these states are mjsousa@952: * +------------> get_pou_name_state ----> ignore_pou_state | only active mjsousa@952: * | | | when bison is mjsousa@952: * | ------------------------------------------+ | doing the mjsousa@952: * | | | pre-parsing!! mjsousa@952: * | v --+ mjsousa@868: * +---> INITIAL <-------> config mjsousa@868: * | \ mjsousa@868: * | V mjsousa@868: * | header_state mjsousa@868: * | | mjsousa@868: * | V mjsousa@868: * vardecl_list_state <------> var_decl mjsousa@868: * ^ | mjsousa@868: * | | [using push()] mjsousa@868: * | | mjsousa@868: * | V mjsousa@868: * | body, mjsousa@868: * | | mjsousa@868: * | | mjsousa@868: * | ------------------- mjsousa@868: * | | | | mjsousa@868: * | v v v mjsousa@868: * | st il sfc mjsousa@868: * | | | | [using pop() when leaving st/il/sfc => goes to vardecl_list_state] mjsousa@868: * | | | | mjsousa@868: * ----------------------- mjsousa@868: * mjsousa@868: * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then mjsousa@868: * we also push() to the body state. This means that sometimes, when pop()ing mjsousa@868: * from st and il, the state machine may return to the sfc state! mjsousa@868: * - The transitions form sfc to body will be decided by bison, which will mjsousa@868: * tell flex to do the transition by calling cmd_goto_body_state(). mjsousa@868: * mjsousa@866: * etisserant@0: * Possible state changes are: mjsousa@952: * INITIAL -> goto(ignore_pou_state) mjsousa@952: * (This transition state is only used when bison says it is doing the pre-parsing.) mjsousa@952: * (This transition takes precedence over all other transitions!) mjsousa@952: * (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found) mjsousa@952: * mario@68: * INITIAL -> goto(config_state) mario@68: * (when a CONFIGURATION is found) mjsousa@866: * mjsousa@866: * INITIAL -> goto(header_state) mjsousa@866: * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found) mjsousa@952: * mjsousa@866: * header_state -> goto(vardecl_list_state) mjsousa@866: * (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration) mjsousa@866: * mjsousa@866: * vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state) mjsousa@866: * (when a VAR token is found) mjsousa@866: * vardecl_state -> pop() to (vardecl_list_state) mjsousa@866: * (when a END_VAR token is found) mjsousa@866: * mjsousa@868: * vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state) mjsousa@866: * (when the last END_VAR is found!) mjsousa@866: * mjsousa@868: * body_state -> goto(sfc_state) mario@68: * (when it figures out it is parsing sfc language) mjsousa@868: * body_state -> goto(st_state) mario@68: * (when it figures out it is parsing st language) mjsousa@868: * body_state -> goto(il_state) mario@68: * (when it figures out it is parsing il language) mjsousa@868: * st_state -> pop() to vardecl_list_state mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, mario@68: * END_ACTION or END_TRANSITION is found) mjsousa@868: * il_state -> pop() to vardecl_list_state mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, mario@68: * END_ACTION or END_TRANSITION is found) mjsousa@868: * sfc_state -> pop() to vardecl_list_state mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) mjsousa@866: * mjsousa@952: * ignore_pou_state -> goto(INITIAL) mjsousa@952: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found) mjsousa@868: * vardecl_list_state -> goto(INITIAL) mjsousa@952: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) mjsousa@952: * config_state -> goto(INITIAL) mjsousa@952: * (when a END_CONFIGURATION is found) mjsousa@866: * mjsousa@866: * mjsousa@866: * sfc_state -> push current state(sfc_state); goto(body_state) mario@68: * (when parsing an action. This transition is requested by bison) mjsousa@866: * sfc_state -> push current state(sfc_state); goto(sfc_qualifier_state) mario@68: * (when expecting an action qualifier. This transition is requested by bison) mjsousa@866: * sfc_qualifier_state -> pop() to sfc_state mario@68: * (when no longer expecting an action qualifier. This transition is requested by bison) mjsousa@866: * mario@74: * config_state -> push(config_state); goto(task_init_state) mario@74: * (when parsing a task initialisation. This transition is requested by bison) mario@74: * task_init_state -> pop() mario@74: * (when no longer parsing task initialisation parameters. This transition is requested by bison) mario@74: * mjsousa@866: * mjsousa@866: * There is another secondary state machine for parsing comments, another for file_includes, mjsousa@866: * and yet another for time literals. mario@74: */ mario@68: mario@68: mjsousa@952: /* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */ mjsousa@952: %x ignore_pou_state mjsousa@952: %x get_pou_name_state mjsousa@952: etisserant@0: /* we are parsing a configuration. */ lbessard@3: %s config_state etisserant@0: mario@74: /* Inside a configuration, we are parsing a task initialisation parameters */ mario@74: /* This means that PRIORITY, SINGLE and INTERVAL must be handled as mario@74: * tokens, and not as possible identifiers. Note that the above words mario@74: * are not keywords. mario@74: */ mario@74: %s task_init_state mario@74: mjsousa@866: /* we are looking for the first VAR inside a function's, program's or function block's declaration */ mjsousa@868: /* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */ mjsousa@866: %s header_state mjsousa@866: mjsousa@866: /* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */ mjsousa@866: %x vardecl_list_state mjsousa@866: /* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */ mjsousa@866: %s vardecl_state etisserant@0: mjsousa@868: /* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */ mario@68: %x body_state etisserant@0: etisserant@0: /* we are parsing il code -> flex must return the EOL tokens! */ lbessard@3: %s il_state etisserant@0: etisserant@0: /* we are parsing st code -> flex must not return the EOL tokens! */ lbessard@3: %s st_state etisserant@0: mario@68: /* we are parsing sfc code -> flex must not return the EOL tokens! */ lbessard@3: %s sfc_state etisserant@0: mario@68: /* we are parsing sfc code, and expecting an action qualifier. */ mario@68: %s sfc_qualifier_state etisserant@0: mario@86: /* we are parsing sfc code, and expecting the priority token. */ mario@86: %s sfc_priority_state etisserant@0: msousa@547: /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */ msousa@547: %x time_literal_state mario@75: mjsousa@866: /* we are parsing a comment. */ mjsousa@866: %x comment_state mjsousa@866: mario@75: etisserant@0: /*******************/ etisserant@0: /* File #include's */ etisserant@0: /*******************/ etisserant@0: etisserant@0: /* We extend the IEC 61131-3 standard syntax to allow inclusion etisserant@0: * of other files, using the IEC 61131-3 pragma directive... etisserant@0: * The accepted syntax is: etisserant@0: * {#include ""} etisserant@0: */ etisserant@0: etisserant@0: /* the "include" states are used for picking up the name of an include file */ etisserant@0: %x include_beg etisserant@0: %x include_filename etisserant@0: %x include_end etisserant@0: etisserant@0: etisserant@0: file_include_pragma_filename [^\"]* mjsousa@866: file_include_pragma_beg "{#include"{st_whitespace}\" mjsousa@866: file_include_pragma_end \"{st_whitespace}"}" etisserant@0: file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end} etisserant@0: etisserant@0: etisserant@0: %{ mjsousa@879: mjsousa@879: /* A counter to track the order by which each token is processed. mjsousa@879: * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token). mjsousa@879: * i.e.. it may get incremented by more than one between two consecutive tokens. mjsousa@879: * This is due to the fact that the counter gets incremented every 'user action' in flex, mjsousa@879: * however not every user action will result in a token being passed to bison. mjsousa@879: * Nevertheless this is still OK, as we are only interested in the relative mjsousa@879: * ordering of tokens... mjsousa@879: */ mjsousa@879: static long int current_order = 0; mjsousa@879: etisserant@0: typedef struct { msousa@757: int eof; msousa@757: int lineNumber; msousa@757: int currentChar; msousa@757: int lineLength; msousa@757: int currentTokenStart; msousa@757: char *buffer; msousa@757: FILE *in_file; msousa@757: } tracking_t; msousa@757: mjsousa@879: /* A forward declaration of a function defined at the end of this file. */ mjsousa@879: void FreeTracking(tracking_t *tracking); mjsousa@879: mjsousa@879: mjsousa@879: #define MAX_INCLUDE_DEPTH 16 mjsousa@879: msousa@757: typedef struct { etisserant@0: YY_BUFFER_STATE buffer_state; msousa@757: tracking_t *env; etisserant@0: const char *filename; etisserant@0: } include_stack_t; etisserant@0: msousa@757: tracking_t *current_tracking = NULL; etisserant@0: include_stack_t include_stack[MAX_INCLUDE_DEPTH]; etisserant@0: int include_stack_ptr = 0; etisserant@0: etisserant@0: const char *INCLUDE_DIRECTORIES[] = { etisserant@40: DEFAULT_LIBDIR, etisserant@40: ".", etisserant@40: "/lib", etisserant@40: "/usr/lib", etisserant@40: "/usr/lib/iec", etisserant@0: NULL /* must end with NULL!! */ etisserant@0: }; etisserant@0: %} etisserant@0: etisserant@0: etisserant@0: etisserant@0: /*****************************/ etisserant@0: /* Prelimenary constructs... */ etisserant@0: /*****************************/ etisserant@0: mjsousa@866: /* PRAGMAS */ mjsousa@866: /* ======= */ msousa@267: /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...), msousa@267: * especially the prototypes of Functions and FBs defined in the standard msousa@267: * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax msousa@267: * with two pragmas to indicate that the code is to be parsed (going through the msousa@267: * lexical, syntactical, and semantic analysers), but no code is to be generated. msousa@267: * msousa@267: * The accepted syntax is: msousa@267: * {no_code_generation begin} msousa@267: * ... prototypes ... msousa@267: * {no_code_generation end} msousa@267: * msousa@267: * When parsing these prototypes the abstract syntax tree will be populated as usual, msousa@267: * allowing the semantic analyser to correctly analyse the semantics of calls to these msousa@267: * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code msousa@267: * between the above two pragmas. msousa@267: */ msousa@267: msousa@267: disable_code_generation_pragma "{disable code generation}" msousa@267: enable_code_generation_pragma "{enable code generation}" msousa@267: msousa@267: msousa@267: /* Any other pragma... */ mjsousa@869: pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}") mjsousa@868: mjsousa@868: mjsousa@866: mjsousa@866: /* COMMENTS */ mjsousa@866: /* ======== */ mjsousa@866: mjsousa@866: /* In order to allow nested comments, comments are handled by a specific comment_state state */ mjsousa@866: /* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state. mjsousa@866: * Whenever a "*)" is found, we pop a state off the stack mjsousa@866: */ mjsousa@866: mjsousa@866: /* comments... */ mjsousa@866: comment_beg "(*" mjsousa@866: comment_end "*)" mjsousa@866: mjsousa@866: /* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB mjsousa@866: * invocations with the 'NOT =>' syntax (which needs two look ahead mjsousa@866: * tokens to be parsed correctly - and bison being LALR(1) only supports one). mjsousa@866: * The current work around requires flex to completely parse the ' =>' mjsousa@866: * sequence. This sequence includes whitespace and/or comments between the mjsousa@866: * and the "=>" token. mjsousa@866: * mjsousa@866: * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context, mjsousa@866: * which means we can not use the comment_state method of specifying/finding and ignoring mjsousa@866: * comments. mjsousa@866: * mjsousa@866: * For this reason only, we must also define what a complete comment looks like, so mjsousa@866: * it may be used in this rule. Since the rule uses the whitespace_or_comment mjsousa@866: * construct as trailing context, this definition of comment must not use any mjsousa@866: * trailing context either. mjsousa@866: * mjsousa@866: * Aditionally, it is not possible to define nested comments in flex without the use of mjsousa@866: * states, so for this particular location, we do NOT support nested comments. mjsousa@866: */ etisserant@0: /* NOTE: this seemingly unnecessary complex definition is required etisserant@0: * to be able to eat up comments such as: etisserant@0: * '(* Testing... ! ***** ******)' etisserant@0: * without using the trailing context command in flex (/{context}) etisserant@0: * since {comment} itself will later be used with etisserant@0: * trailing context ({comment}/{context}) etisserant@0: */ etisserant@0: not_asterisk [^*] etisserant@0: not_close_parenthesis_nor_asterisk [^*)] etisserant@0: asterisk "*" mjsousa@866: comment_text ({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk}) etisserant@0: comment "(*"({comment_text}*)({asterisk}+)")" etisserant@0: etisserant@0: mjsousa@866: mjsousa@866: /* 3.1 Whitespace */ mjsousa@866: /* ============== */ etisserant@0: /* mjsousa@866: * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4) mjsousa@866: * mjsousa@866: * Whitespace definition includes the newline character. mjsousa@866: * mjsousa@866: * However, the standard is inconsistent in that in IL the newline character mjsousa@866: * is considered a token (EOL - end of line). mjsousa@866: * In our implementation we therefore have two definitions of whitespace mjsousa@866: * - one for ST, that includes the newline character mjsousa@866: * - one for IL without the newline character. mjsousa@866: * Additionally, when parsing IL, the newline character is treated as the EOL token. mjsousa@866: * This requires the use of a state machine in the lexical parser that needs at least mjsousa@866: * some knowledge of the syntax itself. mjsousa@866: * mjsousa@866: * NOTE: Our definition of whitespace will only work in ASCII! mjsousa@866: * etisserant@0: * NOTE: we cannot use etisserant@0: * st_whitespace [:space:]* etisserant@0: * since we use {st_whitespace} as trailing context. In our case etisserant@0: * this would not constitute "dangerous trailing context", but the etisserant@0: * lexical generator (i.e. flex) does not know this (since it does etisserant@0: * not know which characters belong to the set [:space:]), and will etisserant@0: * generate a "dangerous trailing context" warning! etisserant@0: * We use this alternative just to stop the flex utility from etisserant@0: * generating the invalid (in this case) warning... etisserant@0: */ etisserant@0: mjsousa@866: st_whitespace [ \f\n\r\t\v]* mjsousa@866: il_whitespace [ \f\r\t\v]* mjsousa@866: mjsousa@866: st_whitespace_or_pragma_or_commentX ({st_whitespace})|({pragma})|({comment}) mjsousa@866: il_whitespace_or_pragma_or_commentX ({il_whitespace})|({pragma})|({comment}) mjsousa@866: mjsousa@866: st_whitespace_or_pragma_or_comment {st_whitespace_or_pragma_or_commentX}* mjsousa@866: il_whitespace_or_pragma_or_comment {il_whitespace_or_pragma_or_commentX}* mjsousa@866: mjsousa@866: mjsousa@866: mjsousa@866: qualified_identifier {identifier}(\.{identifier})+ etisserant@0: etisserant@0: etisserant@0: etisserant@0: /*****************************************/ etisserant@0: /* B.1.1 Letters, digits and identifiers */ etisserant@0: /*****************************************/ etisserant@0: /* NOTE: The following definitions only work if the host computer etisserant@0: * is using the ASCII maping. For e.g., with EBCDIC [A-Z] etisserant@0: * contains non-alphabetic characters! etisserant@0: * The correct way of doing it would be to use etisserant@0: * the [:upper:] etc... definitions. etisserant@0: * etisserant@0: * Unfortunately, further on we need all printable etisserant@0: * characters (i.e. [:print:]), but excluding '$'. etisserant@0: * Flex does not allow sets to be composed by excluding etisserant@0: * elements. Sets may only be constructed by adding new etisserant@0: * elements, which means that we have to revert to etisserant@0: * [\x20\x21\x23\x25\x26\x28-x7E] for the definition etisserant@0: * of the printable characters with the required exceptions. etisserant@0: * The above also implies the use of ASCII, but now we have etisserant@0: * no way to work around it| etisserant@0: * etisserant@0: * The conclusion is that our parser is limited to ASCII etisserant@0: * based host computers!! etisserant@0: */ etisserant@0: letter [A-Za-z] etisserant@0: digit [0-9] etisserant@0: octal_digit [0-7] etisserant@0: hex_digit {digit}|[A-F] etisserant@0: identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*) etisserant@0: etisserant@0: /*******************/ etisserant@0: /* B.1.2 Constants */ etisserant@0: /*******************/ etisserant@0: etisserant@0: /******************************/ etisserant@0: /* B.1.2.1 Numeric literals */ etisserant@0: /******************************/ etisserant@0: integer {digit}((_?{digit})*) msousa@547: msousa@547: /* Some helper symbols for parsing TIME literals... */ msousa@547: integer_0_59 (0(_?))*([0-5](_?))?{digit} msousa@547: integer_0_19 (0(_?))*([0-1](_?))?{digit} msousa@547: integer_20_23 (0(_?))*2(_?)[0-3] msousa@547: integer_0_23 {integer_0_19}|{integer_20_23} msousa@547: integer_0_999 {digit}((_?{digit})?)((_?{digit})?) msousa@547: msousa@547: etisserant@0: binary_integer 2#{bit}((_?{bit})*) etisserant@0: bit [0-1] etisserant@0: octal_integer 8#{octal_digit}((_?{octal_digit})*) etisserant@0: hex_integer 16#{hex_digit}((_?{hex_digit})*) etisserant@0: exponent [Ee]([+-]?){integer} etisserant@0: /* The correct definition for real would be: etisserant@0: * real {integer}\.{integer}({exponent}?) etisserant@0: * etisserant@0: * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as: etisserant@0: * fixed_point {integer}\.{integer} etisserant@0: * etisserant@0: * This means that {integer}\.{integer} could be interpreted etisserant@0: * as either a fixed_point or a real. etisserant@0: * I have opted to interpret {integer}\.{integer} as a fixed_point. etisserant@0: * In order to do this, the definition of real has been changed to: etisserant@0: * real {integer}\.{integer}{exponent} etisserant@0: * etisserant@0: * This means that the syntax parser now needs to define a real to be etisserant@0: * either a real_token or a fixed_point_token! etisserant@0: */ etisserant@0: real {integer}\.{integer}{exponent} etisserant@0: etisserant@0: etisserant@0: /*******************************/ etisserant@0: /* B.1.2.2 Character Strings */ etisserant@0: /*******************************/ etisserant@0: /* etisserant@0: common_character_representation := etisserant@0: etisserant@0: |'$$' etisserant@0: |'$L'|'$N'|'$P'|'$R'|'$T' etisserant@0: |'$l'|'$n'|'$p'|'$r'|'$t' etisserant@0: etisserant@0: NOTE: $ = 0x24 etisserant@0: " = 0x22 etisserant@0: ' = 0x27 etisserant@0: etisserant@0: printable chars in ASCII: 0x20-0x7E etisserant@0: */ etisserant@0: etisserant@0: esc_char_u $L|$N|$P|$R|$T etisserant@0: esc_char_l $l|$n|$p|$r|$t etisserant@0: esc_char $$|{esc_char_u}|{esc_char_l} etisserant@0: double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit}) etisserant@0: single_byte_char (${hex_digit}{hex_digit}) etisserant@0: etisserant@0: /* WARNING: etisserant@0: * This definition is only valid in ASCII... etisserant@0: * etisserant@0: * Flex includes the function print_char() that defines etisserant@0: * all printable characters portably (i.e. whatever character etisserant@0: * encoding is currently being used , ASCII, EBCDIC, etc...) etisserant@0: * Unfortunately, we cannot generate the definition of etisserant@0: * common_character_representation portably, since flex etisserant@0: * does not allow definition of sets by subtracting etisserant@0: * elements in one set from another set. etisserant@0: * This means we must build up the defintion of etisserant@0: * common_character_representation using only set addition, etisserant@0: * which leaves us with the only choice of defining the etisserant@0: * characters non-portably... etisserant@0: */ etisserant@0: common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char} etisserant@0: double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation} etisserant@0: single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation} etisserant@0: etisserant@0: etisserant@0: double_byte_character_string \"({double_byte_character_representation}*)\" etisserant@0: single_byte_character_string '({single_byte_character_representation}*)' etisserant@0: etisserant@0: etisserant@0: /************************/ etisserant@0: /* B 1.2.3.1 - Duration */ etisserant@0: /************************/ etisserant@0: fixed_point {integer}\.{integer} etisserant@0: msousa@547: msousa@547: /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration, msousa@547: * as its definition does not match the standard's text. msousa@547: * IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that msousa@547: * definition instead! msousa@547: * msousa@547: * duration::= ('T' | 'TIME') '#' ['+'|'-'] interval msousa@547: * interval::= days | hours | minutes | seconds | milliseconds msousa@547: * fixed_point ::= integer [ '.' integer] msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ] msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ] msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ] msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ] msousa@547: * milliseconds ::= fixed_point 'ms' msousa@547: * msousa@547: * msousa@547: * The original IEC 61131-3 v2 definition is: msousa@547: * duration ::= ('T' | 'TIME') '#' ['-'] interval msousa@547: * interval ::= days | hours | minutes | seconds | milliseconds msousa@547: * fixed_point ::= integer [ '.' integer] msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] hours msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] minutes msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds msousa@547: * milliseconds ::= fixed_point 'ms' msousa@547: */ msousa@547: msousa@547: interval_ms_X ({integer_0_999}(\.{integer})?)ms msousa@686: interval_s_X {integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s) msousa@686: interval_m_X {integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m) msousa@686: interval_h_X {integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h) msousa@547: msousa@547: interval_ms {integer}ms|({fixed_point}ms) msousa@547: interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s) msousa@547: interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m) msousa@547: interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h) msousa@547: interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d) msousa@547: msousa@547: interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d} msousa@547: msousa@686: msousa@547: /* to help provide nice error messages, we also parse an incorrect but plausible interval... */ msousa@547: /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not msousa@547: * be able to parse any other legal lexcial construct (besides a legal interval, but that msousa@547: * is OK as this rule will appear _after_ the rule to parse legal intervals!). msousa@547: */ msousa@547: fixed_point_or_integer {fixed_point}|{integer} msousa@547: erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)? etisserant@0: etisserant@0: /********************************************/ etisserant@0: /* B.1.4.1 Directly Represented Variables */ etisserant@0: /********************************************/ etisserant@0: /* The correct definition, if the standard were to be followed... */ mario@11: mario@11: location_prefix [IQM] mario@11: size_prefix [XBWDL] mario@11: direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*) mario@11: etisserant@0: etisserant@0: /* For the MatPLC, we will accept % etisserant@0: * as a direct variable, this being mapped onto the MatPLC point etisserant@0: * named etisserant@0: */ etisserant@0: /* TODO: we should not restrict it to only the accepted syntax etisserant@0: * of as specified by the standard. MatPLC point names etisserant@0: * have a more permissive syntax. etisserant@0: * etisserant@0: * e.g. "P__234" etisserant@0: * Is a valid MatPLC point name, but not a valid !! etisserant@0: * The same happens with names such as "333", "349+23", etc... etisserant@0: * How can we handle these more expressive names in our case? etisserant@0: * Remember that some direct variable may remain anonymous, with etisserant@0: * declarations such as: etisserant@0: * VAR etisserant@0: * AT %I3 : BYTE := 255; etisserant@0: * END_VAR mario@11: * in which case we are currently using "%I3" as the variable mario@11: * name. mario@11: */ msousa@547: /* direct_variable_matplc %{identifier} */ msousa@547: /* direct_variable {direct_variable_standard}|{direct_variable_matplc} */ msousa@547: direct_variable {direct_variable_standard} etisserant@0: etisserant@0: /******************************************/ etisserant@0: /* B 1.4.3 - Declaration & Initialisation */ etisserant@0: /******************************************/ etisserant@0: incompl_location %[IQM]\* etisserant@0: etisserant@0: etisserant@0: etisserant@0: etisserant@0: %% etisserant@0: /* fprintf(stderr, "flex: state %d\n", YY_START); */ etisserant@0: etisserant@0: /*****************************************************/ etisserant@0: /*****************************************************/ etisserant@0: /*****************************************************/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /***** F I R S T T H I N G S F I R S T *****/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /*****************************************************/ etisserant@0: /*****************************************************/ etisserant@0: /*****************************************************/ etisserant@0: mario@68: /***********************************************************/ mario@68: /* Handle requests sent by bison for flex to change state. */ mario@68: /***********************************************************/ mario@13: if (get_goto_body_state()) { mario@68: yy_push_state(body_state); mario@13: rst_goto_body_state(); mario@6: } lbessard@3: mario@68: if (get_goto_sfc_qualifier_state()) { mario@68: yy_push_state(sfc_qualifier_state); mario@68: rst_goto_sfc_qualifier_state(); mario@68: } mario@68: mario@86: if (get_goto_sfc_priority_state()) { mario@86: yy_push_state(sfc_priority_state); mario@86: rst_goto_sfc_priority_state(); mario@86: } mario@86: mario@74: if (get_goto_task_init_state()) { mario@74: yy_push_state(task_init_state); mario@74: rst_goto_task_init_state(); mario@74: } mario@74: mario@68: if (get_pop_state()) { mario@68: yy_pop_state(); mario@68: rst_pop_state(); mario@68: } mario@68: mario@68: /***************************/ etisserant@0: /* Handle the pragmas! */ mario@68: /***************************/ etisserant@0: etisserant@0: /* We start off by searching for the pragmas we handle in the lexical parser. */ etisserant@0: {file_include_pragma} unput_text(0); yy_push_state(include_beg); etisserant@0: msousa@267: /* Pragmas sent to syntax analyser (bison) */ mjsousa@1016: /* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks. mjsousa@1016: * We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state. mjsousa@1016: * This is because the pragmas are stored in a statement_list or instruction_list (in bison), mjsousa@1016: * but these lists must start with the special tokens start_IL_body_token/start_ST_body_token. mjsousa@1016: * This means that these special tokens must be generated (by the body_state) before processing mjsousa@1016: * the pragme => we cannot process the trailing pragmas in the vardecl_list_state state. mjsousa@1016: */ mjsousa@1016: {disable_code_generation_pragma} return disable_code_generation_pragma_token; mjsousa@1016: {enable_code_generation_pragma} return enable_code_generation_pragma_token; mjsousa@1016: {disable_code_generation_pragma}/(VAR) return disable_code_generation_pragma_token; mjsousa@1016: {enable_code_generation_pragma}/(VAR) return enable_code_generation_pragma_token; mjsousa@1016: {disable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ mjsousa@1016: {enable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ etisserant@0: /* Any other pragma we find, we just pass it up to the syntax parser... */ mario@68: /* Note that the state is exclusive, so we have to include it here too. */ mjsousa@1016: {pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ etisserant@0: {pragma} {/* return the pragmma without the enclosing '{' and '}' */ mjsousa@868: int cut = yytext[1]=='{'?2:1; Edouard@634: yytext[strlen(yytext)-cut] = '\0'; Edouard@634: yylval.ID=strdup(yytext+cut); etisserant@0: return pragma_token; etisserant@0: } mjsousa@1016: {pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */ Laurent@701: int cut = yytext[1]=='{'?2:1; mjsousa@866: yytext[strlen(yytext)-cut] = '\0'; Laurent@701: yylval.ID=strdup(yytext+cut); etisserant@0: return pragma_token; etisserant@0: } etisserant@0: etisserant@0: etisserant@0: /*********************************/ etisserant@0: /* Handle the file includes! */ etisserant@0: /*********************************/ etisserant@0: {file_include_pragma_beg} BEGIN(include_filename); etisserant@0: etisserant@0: {file_include_pragma_filename} { msousa@756: /* set the internal state variables of lexical analyser to process a new include file */ msousa@756: include_file(yytext); etisserant@0: /* switch to whatever state was active before the include file */ etisserant@0: yy_pop_state(); etisserant@0: /* now process the new file... */ etisserant@0: } etisserant@0: etisserant@0: mjsousa@761: <> { /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules mjsousa@761: * when checking for syntax errors in the input source code. mjsousa@761: * This means that in reality flex will be asked to carry on reading the input mjsousa@761: * even after it has reached the end of all (including the main) input files. mjsousa@761: * In other owrds, we will be called to return more tokens, even after we have mjsousa@761: * already returned an END_OF_INPUT token. In this case, we must carry on returning mjsousa@761: * more END_OF_INPUT tokens. mjsousa@761: * mjsousa@761: * However, in the above case we will be asked to carry on reading more tokens mjsousa@761: * from the main input file, after we have reached the end. For this to work mjsousa@761: * correctly, we cannot close the main input file! mjsousa@761: * mjsousa@761: * This is why we WILL be called with include_stack_ptr == 0 multiple times, mjsousa@761: * and why we must handle it as a special case mjsousa@761: * that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again. mjsousa@761: * mjsousa@761: * As a corollory, flex can never safely close the main input file, and we must ask mjsousa@761: * bison to close it! mario@76: */ mario@76: if (include_stack_ptr == 0) { mjsousa@761: // fclose(yyin); // Must not do this!! mjsousa@879: // FreeTracking(current_tracking); // Must not do this!! mario@73: /* yyterminate() terminates the scanner and returns a 0 to the mario@73: * scanner's caller, indicating "all done". mario@73: * mario@73: * Our syntax parser (written with bison) has the token mario@73: * END_OF_INPUT associated to the value 0, so even though mario@73: * we don't explicitly return the token END_OF_INPUT mario@73: * calling yyterminate() is equivalent to doing that. mario@73: */ etisserant@0: yyterminate(); msousa@737: } else { mjsousa@761: fclose(yyin); mjsousa@879: FreeTracking(current_tracking); lbessard@136: --include_stack_ptr; etisserant@0: yy_delete_buffer(YY_CURRENT_BUFFER); etisserant@0: yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state); lbessard@136: current_tracking = include_stack[include_stack_ptr].env; etisserant@0: /* removing constness of char *. This is safe actually, etisserant@0: * since the only real const char * that is stored on the stack is etisserant@1: * the first one (i.e. the one that gets stored in include_stack[0], etisserant@0: * which is never free'd! etisserant@0: */ msousa@286: /* NOTE: We do __NOT__ free the malloc()'d memory since msousa@286: * pointers to this filename will be kept by many objects msousa@286: * in the abstract syntax tree. msousa@286: * This will later be used to provide correct error msousa@286: * messages during semantic analysis (stage 3) msousa@286: */ msousa@286: /* free((char *)current_filename); */ etisserant@0: current_filename = include_stack[include_stack_ptr].filename; etisserant@0: yy_push_state(include_end); etisserant@0: } etisserant@0: } etisserant@0: etisserant@0: {file_include_pragma_end} yy_pop_state(); msousa@756: /* handle the artificial file includes created by include_string(), which do not end with a '}' */ msousa@756: . unput_text(0); yy_pop_state(); etisserant@0: etisserant@0: etisserant@0: /*********************************/ etisserant@0: /* Handle all the state changes! */ etisserant@0: /*********************************/ etisserant@0: mjsousa@866: /* INITIAL -> header_state */ etisserant@0: { mjsousa@1016: FUNCTION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION; mjsousa@1016: FUNCTION_BLOCK{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK; mjsousa@1016: PROGRAM{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM; mjsousa@1016: CONFIGURATION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION; mjsousa@1016: } mjsousa@1016: mjsousa@1016: { mjsousa@1016: {identifier} BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token; mjsousa@1016: . BEGIN(ignore_pou_state); unput_text(0); mjsousa@1016: } mjsousa@1016: mjsousa@1016: { mjsousa@1016: END_FUNCTION unput_text(0); BEGIN(INITIAL); mjsousa@1016: END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL); mjsousa@1016: END_PROGRAM unput_text(0); BEGIN(INITIAL); mjsousa@1016: END_CONFIGURATION unput_text(0); BEGIN(INITIAL); mjsousa@1016: .|\n {}/* Ignore text inside POU! (including the '\n' character!)) */ mjsousa@1016: } mjsousa@1016: mjsousa@1016: mjsousa@1016: /* header_state -> (vardecl_list_state) */ mjsousa@1016: /* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain mjsousa@1016: * at least one VAR_XXX block. mjsousa@1016: * How about functions that do not declare variables, and go directly to the body_state??? etisserant@0: * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION etisserant@0: * must have at least one input argument, so a correct declaration will have at least etisserant@0: * one VAR_INPUT ... VAR_END construct! etisserant@0: * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK etisserant@0: * must have at least one input argument, so a correct declaration will have at least etisserant@0: * one VAR_INPUT ... VAR_END construct! etisserant@0: * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input etisserant@0: * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END etisserant@0: * construct! etisserant@0: * etisserant@0: * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or mario@68: * FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state. etisserant@0: * If the code has an error, and no VAR_END before the body, we will simply mjsousa@1016: * continue in the state, until the end of the FUNCTION, FUNCTION_BLOCK etisserant@0: * or PROGAM. mjsousa@1016: * mjsousa@1016: * WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function mjsousa@1016: * may be declared with no Input, Output or IN_OUT variables. This means that the above mjsousa@1016: * assumption is no longer valid. mjsousa@1016: * To make things simpler (i.e. so we do not need to change the transition conditions in the flex state machine), mjsousa@1016: * when using this non-standard extension matiec requires that Functions must include at least one mjsousa@1016: * VAR .. END_VAR block. This implies that the above assumption remains valid! mjsousa@1016: * This limitation of requiring a VAR .. END_VAR block is not really very limiting, as a function mjsousa@1016: * with no input and output parameters will probably need to do some 'work', and for that it will mjsousa@1016: * probably need some local variables declared in a VAR .. END_VAR block. mjsousa@1016: * Note however that in the extreme it might make sense to have a function with no variables whatsoever mjsousa@1016: * (e.g.: a function that only calls other functions that all return VOID - another non standard extension!). mjsousa@1016: * For now we do not consider this!! etisserant@0: */ mjsousa@866: { mjsousa@868: VAR | /* execute the next rule's action, i.e. fall-through! */ mjsousa@868: VAR_INPUT | mjsousa@868: VAR_OUTPUT | mjsousa@868: VAR_IN_OUT | mjsousa@868: VAR_EXTERNAL | mjsousa@868: VAR_GLOBAL | mjsousa@868: VAR_TEMP | mjsousa@868: VAR_CONFIG | mjsousa@1016: VAR_ACCESS unput_text(0); /* printf("\nChanging to vardecl_list_state\n") */; BEGIN(vardecl_list_state); mjsousa@868: } mjsousa@868: mjsousa@868: mjsousa@868: /* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */ mjsousa@866: { mjsousa@868: VAR_INPUT | /* execute the next rule's action, i.e. fall-through! */ mjsousa@868: VAR_OUTPUT | mjsousa@868: VAR_IN_OUT | mjsousa@868: VAR_EXTERNAL | mjsousa@868: VAR_GLOBAL | mjsousa@868: VAR_TEMP | mjsousa@868: VAR_CONFIG | mjsousa@868: VAR_ACCESS | mjsousa@866: VAR unput_text(0); yy_push_state(vardecl_state); mjsousa@868: mjsousa@1010: END_FUNCTION unput_text(0); BEGIN(INITIAL); mjsousa@1010: END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL); mjsousa@1010: END_PROGRAM unput_text(0); BEGIN(INITIAL); mjsousa@868: mjsousa@1016: . unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");/* anything else, just change to body_state! */ mjsousa@868: } mjsousa@868: mjsousa@868: mjsousa@868: /* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */ mjsousa@866: { mjsousa@948: END_VAR yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */ mjsousa@866: } mjsousa@866: etisserant@0: mjsousa@868: /* body_state -> (il_state | st_state | sfc_state) */ mario@68: { mjsousa@1020: {st_whitespace} {/* In body state we do not process any tokens, mjsousa@1020: * we simply store them for later processing! mjsousa@1020: * NOTE: all whitespace in the begining mjsousa@1020: * of body_state must be removed so we can mjsousa@1020: * detect ':=' in the beginning of TRANSACTION mjsousa@1020: * conditions preceded by whitespace. mjsousa@1020: * => only add to bodystate_buffer when not in beginning. mjsousa@1020: */ mjsousa@1020: if (!isempty_bodystate_buffer()) mjsousa@1020: append_bodystate_buffer(yytext); mjsousa@1020: } mjsousa@1016: /* 'INITIAL_STEP' always used in beginning of SFCs !! */ mjsousa@1016: INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(sfc_state);} mjsousa@1016: else {append_bodystate_buffer(yytext);} mjsousa@1016: } mjsousa@1016: mjsousa@1016: /* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */ mjsousa@1016: := { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */ mjsousa@1016: else {append_bodystate_buffer(yytext);} mjsousa@1016: } mjsousa@1016: andrej@1031: /* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */ mjsousa@1016: END_ACTION | /* execute the next rule's action, i.e. fall-through! */ mjsousa@1016: END_FUNCTION | mjsousa@1016: END_FUNCTION_BLOCK | andrej@1031: END_TRANSITION | mjsousa@1016: END_PROGRAM { append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;} mjsousa@1016: .|\n { append_bodystate_buffer(yytext); mjsousa@1016: if (strcmp(yytext, ";") == 0) mjsousa@1016: {unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;} mjsousa@1016: } mjsousa@1016: /* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */ mjsousa@1016: RETURN | /* execute the next rule's action, i.e. fall-through! */ mjsousa@1016: IF | mjsousa@1016: CASE | mjsousa@1016: FOR | mjsousa@1016: WHILE | mjsousa@1016: EXIT | mjsousa@1016: REPEAT { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(st_state); return start_ST_body_token;} mjsousa@1016: else {append_bodystate_buffer(yytext);} mjsousa@1016: } mjsousa@1016: mario@68: } /* end of body_state lexical parser */ lbessard@3: mjsousa@866: mjsousa@868: /* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */ lbessard@3: { lbessard@3: END_FUNCTION yy_pop_state(); unput_text(0); lbessard@3: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); lbessard@3: END_PROGRAM yy_pop_state(); unput_text(0); lbessard@3: END_TRANSITION yy_pop_state(); unput_text(0); mario@6: END_ACTION yy_pop_state(); unput_text(0); lbessard@3: } lbessard@3: mjsousa@868: /* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */ lbessard@4: { lbessard@4: END_FUNCTION yy_pop_state(); unput_text(0); lbessard@4: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); lbessard@4: END_PROGRAM yy_pop_state(); unput_text(0); lbessard@4: } lbessard@4: etisserant@0: /* config -> INITIAL */ etisserant@0: END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; etisserant@0: etisserant@0: etisserant@0: etisserant@0: /***************************************/ etisserant@0: /* Next is to to remove all whitespace */ etisserant@0: /***************************************/ etisserant@0: /* NOTE: pragmas are handled right at the beginning... */ etisserant@0: mjsousa@866: /* The whitespace */ mjsousa@1016: {st_whitespace} /* Eat any whitespace */ mjsousa@866: {il_whitespace} /* Eat any whitespace */ mjsousa@1020: /* NOTE: Due to the need of having the following rule have higher priority, mjsousa@1020: * the following rule was moved to an earlier position in this file. mjsousa@1020: {st_whitespace} {...} mjsousa@1020: */ mjsousa@866: mjsousa@866: /* The comments */ mjsousa@952: {comment_beg} yy_push_state(comment_state); mjsousa@867: {comment_beg} yy_push_state(comment_state); mjsousa@866: { mjsousa@867: {comment_beg} {if (get_opt_nested_comments()) yy_push_state(comment_state);} mjsousa@867: {comment_end} yy_pop_state(); mjsousa@867: . /* Ignore text inside comment! */ mjsousa@867: \n /* Ignore text inside comment! */ mjsousa@866: } msousa@267: etisserant@0: /*****************************************/ etisserant@0: /* B.1.1 Letters, digits and identifiers */ etisserant@0: /*****************************************/ etisserant@0: /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens etisserant@0: * On the other hand, the spec does not define them as keywords, etisserant@0: * which means they may be re-used for variable names, etc...! etisserant@0: * The syntax parser already caters for the possibility of these etisserant@0: * tokens being used for variable names in their declarations. etisserant@0: * When they are declared, they will be added to the variable symbol table! etisserant@0: * Further appearances of these tokens must no longer be parsed etisserant@0: * as R1_tokens etc..., but rather as variable_name_tokens! etisserant@0: * etisserant@0: * That is why the first thing we do with identifiers, even before etisserant@0: * checking whether they may be a 'keyword', is to check whether etisserant@0: * they have been previously declared as a variable name, etisserant@0: * mario@13: * However, we have a dilema! Should we here also check for mario@13: * prev_declared_derived_function_name_token? mario@13: * If we do, then the 'MOD' default library function (defined in mario@13: * the standard) will always be returned as a function name, and mario@13: * it will therefore not be possible to use it as an operator as mario@13: * in the following ST expression 'X := Y MOD Z;' ! mario@13: * If we don't, then even it will not be possible to use 'MOD' mario@13: * as a funtion as in 'X := MOD(Y, Z);' mario@13: * We solve this by NOT testing for function names here, and mario@13: * handling this function and keyword clash in bison! etisserant@0: */ mjsousa@1016: /* NOTE: The following code has been commented out as most users do not want matiec mjsousa@1016: * to allow the use of 'R1', 'IN' ... IL operators as identifiers, mjsousa@1016: * even though a literal reading of the standard allows this. mjsousa@1016: * We could add this as a commadnd line option, but it is not yet done. mjsousa@1016: * For now we just comment out the code, but leave it the commented code mjsousa@1016: * in so we can re-activate quickly (without having to go through old commits mjsousa@1016: * in the mercurial repository to figure out the missing code! mjsousa@1016: */ mario@83: /* etisserant@0: {identifier} {int token = get_identifier_token(yytext); mario@81: // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); etisserant@0: if ((token == prev_declared_variable_name_token) || mario@13: // (token == prev_declared_derived_function_name_token) || // DO NOT add this condition! etisserant@0: (token == prev_declared_fb_name_token)) { mario@83: // if (token != identifier_token) mario@83: // * NOTE: if we replace the above uncommented conditions with mario@13: * the simple test of (token != identifier_token), then mario@13: * 'MOD' et al must be removed from the mario@13: * library_symbol_table as a default function name! mario@83: * // etisserant@0: yylval.ID=strdup(yytext); mario@81: // fprintf(stderr, "returning token %d\n", token); etisserant@0: return token; etisserant@0: } mario@83: // otherwise, leave it for the other lexical parser rules... mario@81: // fprintf(stderr, "rejecting\n"); etisserant@0: REJECT; etisserant@0: } mario@83: */ etisserant@0: etisserant@0: /******************************************************/ etisserant@0: /******************************************************/ etisserant@0: /******************************************************/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /***** N O W D O T H E K E Y W O R D S *****/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /******************************************************/ etisserant@0: /******************************************************/ etisserant@0: /******************************************************/ etisserant@0: etisserant@0: mjsousa@934: REF {if (get_opt_ref_standard_extensions()) return REF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ mjsousa@934: DREF {if (get_opt_ref_standard_extensions()) return DREF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ mjsousa@934: REF_TO {if (get_opt_ref_standard_extensions()) return REF_TO; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ mjsousa@934: NULL {if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ mjsousa@873: mario@82: EN return EN; /* Keyword */ mario@82: ENO return ENO; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /******************************/ etisserant@0: /* B 1.2.1 - Numeric Literals */ etisserant@0: /******************************/ mario@82: TRUE return TRUE; /* Keyword */ msousa@257: BOOL#1 return boolean_true_literal_token; msousa@257: BOOL#TRUE return boolean_true_literal_token; msousa@257: SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ msousa@257: SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ msousa@257: mario@82: FALSE return FALSE; /* Keyword */ msousa@257: BOOL#0 return boolean_false_literal_token; msousa@257: BOOL#FALSE return boolean_false_literal_token; msousa@257: SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ msousa@257: SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ etisserant@0: etisserant@0: etisserant@0: /************************/ etisserant@0: /* B 1.2.3.1 - Duration */ etisserant@0: /************************/ mario@82: t# return T_SHARP; /* Delimiter */ mario@82: T# return T_SHARP; /* Delimiter */ mario@82: TIME return TIME; /* Keyword (Data Type) */ etisserant@0: etisserant@0: etisserant@0: /************************************/ etisserant@0: /* B 1.2.3.2 - Time of day and Date */ etisserant@0: /************************************/ mario@82: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ mario@82: TOD return TIME_OF_DAY; /* Keyword (Data Type) */ mario@82: DATE return DATE; /* Keyword (Data Type) */ mario@82: d# return D_SHARP; /* Delimiter */ mario@82: D# return D_SHARP; /* Delimiter */ mario@82: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ mario@82: DT return DATE_AND_TIME; /* Keyword (Data Type) */ etisserant@0: etisserant@0: etisserant@0: /***********************************/ etisserant@0: /* B 1.3.1 - Elementary Data Types */ etisserant@0: /***********************************/ msousa@257: BOOL return BOOL; /* Keyword (Data Type) */ msousa@257: mario@82: BYTE return BYTE; /* Keyword (Data Type) */ mario@82: WORD return WORD; /* Keyword (Data Type) */ mario@82: DWORD return DWORD; /* Keyword (Data Type) */ mario@82: LWORD return LWORD; /* Keyword (Data Type) */ etisserant@0: msousa@257: SINT return SINT; /* Keyword (Data Type) */ msousa@257: INT return INT; /* Keyword (Data Type) */ msousa@257: DINT return DINT; /* Keyword (Data Type) */ msousa@257: LINT return LINT; /* Keyword (Data Type) */ msousa@257: msousa@257: USINT return USINT; /* Keyword (Data Type) */ msousa@257: UINT return UINT; /* Keyword (Data Type) */ msousa@257: UDINT return UDINT; /* Keyword (Data Type) */ msousa@257: ULINT return ULINT; /* Keyword (Data Type) */ msousa@257: msousa@257: REAL return REAL; /* Keyword (Data Type) */ msousa@257: LREAL return LREAL; /* Keyword (Data Type) */ msousa@257: msousa@257: WSTRING return WSTRING; /* Keyword (Data Type) */ msousa@257: STRING return STRING; /* Keyword (Data Type) */ msousa@257: msousa@257: TIME return TIME; /* Keyword (Data Type) */ msousa@257: DATE return DATE; /* Keyword (Data Type) */ msousa@257: DT return DT; /* Keyword (Data Type) */ msousa@257: TOD return TOD; /* Keyword (Data Type) */ msousa@257: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ msousa@257: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ msousa@257: mjsousa@1014: /* A non-standard extension! */ mjsousa@1014: VOID {if (runtime_options.allow_void_datatype) {return VOID;} else {REJECT;}} mjsousa@1014: mjsousa@1014: msousa@257: /*****************************************************************/ msousa@257: /* Keywords defined in "Safety Software Technical Specification" */ msousa@257: /*****************************************************************/ msousa@257: /* msousa@257: * NOTE: The following keywords are define in msousa@257: * "Safety Software Technical Specification, msousa@257: * Part 1: Concepts and Function Blocks, msousa@257: * Version 1.0 – Official Release" msousa@257: * written by PLCopen - Technical Committee 5 msousa@257: * msousa@257: * We only support these extensions and keywords msousa@257: * if the apropriate command line option is given. msousa@257: */ msousa@257: SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}} msousa@257: msousa@257: SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}} msousa@257: SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}} msousa@257: SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}} msousa@257: SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}} msousa@257: msousa@257: SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} msousa@257: SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}} msousa@257: msousa@257: SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} msousa@257: SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}} msousa@257: SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}} msousa@257: SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}} msousa@257: msousa@257: SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}} msousa@257: SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}} msousa@257: SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}} msousa@257: SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}} msousa@257: msousa@257: /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */ msousa@257: /* so it is best not to support them at all... */ msousa@257: /* msousa@257: SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}} msousa@257: SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}} msousa@257: */ msousa@257: msousa@257: SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}} msousa@257: SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}} msousa@257: SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}} msousa@257: SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}} msousa@257: SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}} msousa@257: SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}} etisserant@0: etisserant@0: /********************************/ etisserant@0: /* B 1.3.2 - Generic data types */ etisserant@0: /********************************/ etisserant@0: /* Strangely, the following symbols do not seem to be required! */ etisserant@0: /* But we include them so they become reserved words, and do not etisserant@0: * get passed up to bison as an identifier... etisserant@0: */ mario@82: ANY return ANY; /* Keyword (Data Type) */ mario@82: ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */ mario@82: ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */ mario@82: ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */ mario@82: ANY_NUM return ANY_NUM; /* Keyword (Data Type) */ mario@82: ANY_REAL return ANY_REAL; /* Keyword (Data Type) */ mario@82: ANY_INT return ANY_INT; /* Keyword (Data Type) */ mario@82: ANY_BIT return ANY_BIT; /* Keyword (Data Type) */ mario@82: ANY_STRING return ANY_STRING; /* Keyword (Data Type) */ mario@82: ANY_DATE return ANY_DATE; /* Keyword (Data Type) */ etisserant@0: etisserant@0: etisserant@0: /********************************/ etisserant@0: /* B 1.3.3 - Derived data types */ etisserant@0: /********************************/ mario@82: ":=" return ASSIGN; /* Delimiter */ mario@82: ".." return DOTDOT; /* Delimiter */ mario@82: TYPE return TYPE; /* Keyword */ mario@82: END_TYPE return END_TYPE; /* Keyword */ mario@82: ARRAY return ARRAY; /* Keyword */ mario@82: OF return OF; /* Keyword */ mario@82: STRUCT return STRUCT; /* Keyword */ mario@82: END_STRUCT return END_STRUCT; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /*********************/ etisserant@0: /* B 1.4 - Variables */ etisserant@0: /*********************/ etisserant@0: etisserant@0: /******************************************/ etisserant@0: /* B 1.4.3 - Declaration & Initialisation */ etisserant@0: /******************************************/ mario@82: VAR_INPUT return VAR_INPUT; /* Keyword */ mario@82: VAR_OUTPUT return VAR_OUTPUT; /* Keyword */ mario@82: VAR_IN_OUT return VAR_IN_OUT; /* Keyword */ mario@82: VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */ mario@82: VAR_GLOBAL return VAR_GLOBAL; /* Keyword */ mario@82: END_VAR return END_VAR; /* Keyword */ mario@82: RETAIN return RETAIN; /* Keyword */ mario@82: NON_RETAIN return NON_RETAIN; /* Keyword */ mario@82: R_EDGE return R_EDGE; /* Keyword */ mario@82: F_EDGE return F_EDGE; /* Keyword */ mario@82: AT return AT; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /***********************/ etisserant@0: /* B 1.5.1 - Functions */ etisserant@0: /***********************/ mjsousa@1010: /* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command. mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state mjsousa@1010: * after changing to the body_state. mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with mjsousa@1010: * the input stream even in the presence of buggy code! mjsousa@1010: */ mjsousa@1010: FUNCTION return FUNCTION; /* Keyword */ mjsousa@1010: END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; /* Keyword */ /* see Note above */ mjsousa@1010: VAR return VAR; /* Keyword */ mjsousa@1010: CONSTANT return CONSTANT; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /*****************************/ etisserant@0: /* B 1.5.2 - Function Blocks */ etisserant@0: /*****************************/ mjsousa@1010: /* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command. mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state mjsousa@1010: * after changing to the body_state. mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with mjsousa@1010: * the input stream even in the presence of buggy code! mjsousa@1010: */ mjsousa@1010: FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */ mjsousa@1010: END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; /* Keyword */ /* see Note above */ mjsousa@1010: VAR_TEMP return VAR_TEMP; /* Keyword */ mjsousa@1010: VAR return VAR; /* Keyword */ mjsousa@1010: NON_RETAIN return NON_RETAIN; /* Keyword */ mjsousa@1010: END_VAR return END_VAR; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /**********************/ etisserant@0: /* B 1.5.3 - Programs */ etisserant@0: /**********************/ mjsousa@1010: /* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command. mjsousa@1016: * This is necessary in case the input program being parsed has syntax errors that force mjsousa@1010: * flex's main state machine to never change to the il_state or the st_state mjsousa@1010: * after changing to the body_state. mjsousa@1010: * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with mjsousa@1010: * the input stream even in the presence of buggy code! mjsousa@1010: */ mjsousa@1010: PROGRAM return PROGRAM; /* Keyword */ mjsousa@1010: END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; /* Keyword */ /* see Note above */ etisserant@0: etisserant@0: etisserant@0: /********************************************/ etisserant@0: /* B 1.6 Sequential Function Chart elements */ etisserant@0: /********************************************/ etisserant@0: /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well etisserant@0: .* as other identifiers that may be used as variable names inside IL and ST programs. etisserant@0: * They will have to be handled when we include parsing of SFC... For now, simply etisserant@0: * ignore them! etisserant@0: */ etisserant@1: mario@82: ACTION return ACTION; /* Keyword */ mario@82: END_ACTION return END_ACTION; /* Keyword */ mario@82: mario@82: TRANSITION return TRANSITION; /* Keyword */ mario@82: END_TRANSITION return END_TRANSITION; /* Keyword */ mario@82: FROM return FROM; /* Keyword */ mario@82: TO return TO; /* Keyword */ mario@82: mario@82: INITIAL_STEP return INITIAL_STEP; /* Keyword */ mario@82: STEP return STEP; /* Keyword */ mario@82: END_STEP return END_STEP; /* Keyword */ etisserant@0: mario@74: /* PRIORITY is not a keyword, so we only return it when mario@74: * it is explicitly required and we are not expecting any identifiers mario@74: * that could also use the same letter sequence (i.e. an identifier: piority) mario@74: */ mario@86: PRIORITY return PRIORITY; mario@74: mario@68: { etisserant@0: L return L; etisserant@0: D return D; etisserant@0: SD return SD; etisserant@0: DS return DS; etisserant@0: SL return SL; etisserant@0: N return N; etisserant@0: P return P; Laurent@627: P0 return P0; Laurent@627: P1 return P1; etisserant@0: R return R; etisserant@0: S return S; etisserant@1: } etisserant@0: etisserant@0: etisserant@0: /********************************/ etisserant@0: /* B 1.7 Configuration elements */ etisserant@0: /********************************/ mjsousa@1010: /* Note: The following END_CONFIGURATION rule will never get to be used, as we have mjsousa@1010: * another identical rule above (closer to the rules handling the transitions mjsousa@1010: * of the main state machine) that will always execute before this one. mjsousa@1010: * Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command. mjsousa@1010: * This is nt strictly necessary, but I place it here so it follwos the same mjsousa@1010: * pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK mjsousa@1010: */ mjsousa@1010: CONFIGURATION return CONFIGURATION; /* Keyword */ mjsousa@1010: END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; /* Keyword */ /* see 2 Notes above! */ mjsousa@1010: TASK return TASK; /* Keyword */ mjsousa@1010: RESOURCE return RESOURCE; /* Keyword */ mjsousa@1010: ON return ON; /* Keyword */ mjsousa@1010: END_RESOURCE return END_RESOURCE; /* Keyword */ mjsousa@1010: VAR_CONFIG return VAR_CONFIG; /* Keyword */ mjsousa@1010: VAR_ACCESS return VAR_ACCESS; /* Keyword */ mjsousa@1010: END_VAR return END_VAR; /* Keyword */ mjsousa@1010: WITH return WITH; /* Keyword */ mjsousa@1010: PROGRAM return PROGRAM; /* Keyword */ mjsousa@1010: RETAIN return RETAIN; /* Keyword */ mjsousa@1010: NON_RETAIN return NON_RETAIN; /* Keyword */ mjsousa@1010: READ_WRITE return READ_WRITE; /* Keyword */ mjsousa@1010: READ_ONLY return READ_ONLY; /* Keyword */ mario@74: mario@74: /* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when mario@74: * it is explicitly required and we are not expecting any identifiers mario@74: * that could also use the same letter sequence (i.e. an identifier: piority, ...) mario@74: */ mario@74: { etisserant@0: PRIORITY return PRIORITY; etisserant@0: SINGLE return SINGLE; etisserant@0: INTERVAL return INTERVAL; mario@74: } etisserant@0: etisserant@0: /***********************************/ etisserant@0: /* B 2.1 Instructions and Operands */ etisserant@0: /***********************************/ lbessard@3: \n return EOL; etisserant@0: etisserant@0: etisserant@0: /*******************/ etisserant@0: /* B 2.2 Operators */ etisserant@0: /*******************/ etisserant@0: /* NOTE: we can't have flex return the same token for etisserant@0: * ANDN and &N, neither for AND and &, since etisserant@0: * AND and ANDN are considered valid variable etisserant@0: * function or functionblock type names! etisserant@0: * This means that the parser may decide that the etisserant@0: * AND or ANDN strings found in the source code etisserant@0: * are being used as variable names etisserant@0: * and not as operators, and will therefore transform etisserant@0: * these tokens into indentifier tokens! etisserant@0: * We can't have the parser thinking that the source etisserant@0: * code contained the string AND (which may be interpreted etisserant@0: * as a vairable name) when in reality the source code etisserant@0: * merely contained the character &, so we use two etisserant@0: * different tokens for & and AND (and similarly etisserant@0: * ANDN and &N)! etisserant@0: */ mario@68: /* The following tokens clash with ST expression operators and Standard Functions */ mario@73: /* They are also keywords! */ mario@82: AND return AND; /* Keyword */ mario@82: MOD return MOD; /* Keyword */ mario@82: OR return OR; /* Keyword */ mario@82: XOR return XOR; /* Keyword */ mario@82: NOT return NOT; /* Keyword */ mario@68: mario@68: /* The following tokens clash with Standard Functions */ mario@82: /* They are keywords because they are a function name */ mario@73: { mario@82: ADD return ADD; /* Keyword (Standard Function) */ mario@82: DIV return DIV; /* Keyword (Standard Function) */ mario@82: EQ return EQ; /* Keyword (Standard Function) */ mario@82: GE return GE; /* Keyword (Standard Function) */ mario@82: GT return GT; /* Keyword (Standard Function) */ mario@82: LE return LE; /* Keyword (Standard Function) */ mario@82: LT return LT; /* Keyword (Standard Function) */ mario@82: MUL return MUL; /* Keyword (Standard Function) */ mario@82: NE return NE; /* Keyword (Standard Function) */ mario@82: SUB return SUB; /* Keyword (Standard Function) */ mario@73: } mario@68: mario@68: /* The following tokens clash with SFC action qualifiers */ mario@82: /* They are not keywords! */ mario@73: { mario@68: S return S; mario@68: R return R; mario@73: } mario@68: mario@68: /* The following tokens clash with ST expression operators */ mario@82: & return AND2; /* NOT a Delimiter! */ mario@68: mario@68: /* The following tokens have no clashes */ mario@82: /* They are not keywords! */ mario@73: { etisserant@0: LD return LD; etisserant@0: LDN return LDN; etisserant@0: ST return ST; etisserant@0: STN return STN; etisserant@0: S1 return S1; etisserant@0: R1 return R1; etisserant@0: CLK return CLK; etisserant@0: CU return CU; etisserant@0: CD return CD; etisserant@0: PV return PV; etisserant@0: IN return IN; etisserant@0: PT return PT; etisserant@0: ANDN return ANDN; etisserant@0: &N return ANDN2; etisserant@0: ORN return ORN; etisserant@0: XORN return XORN; etisserant@0: CAL return CAL; etisserant@0: CALC return CALC; etisserant@0: CALCN return CALCN; etisserant@0: RET return RET; etisserant@0: RETC return RETC; etisserant@0: RETCN return RETCN; etisserant@0: JMP return JMP; etisserant@0: JMPC return JMPC; etisserant@0: JMPCN return JMPCN; mario@73: } etisserant@0: etisserant@0: /***********************/ etisserant@0: /* B 3.1 - Expressions */ etisserant@0: /***********************/ mario@82: "**" return OPER_EXP; /* NOT a Delimiter! */ mario@82: "<>" return OPER_NE; /* NOT a Delimiter! */ mario@82: ">=" return OPER_GE; /* NOT a Delimiter! */ mario@82: "<=" return OPER_LE; /* NOT a Delimiter! */ mario@82: & return AND2; /* NOT a Delimiter! */ mario@82: AND return AND; /* Keyword */ mario@82: XOR return XOR; /* Keyword */ mario@82: OR return OR; /* Keyword */ mario@82: NOT return NOT; /* Keyword */ mario@82: MOD return MOD; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /*****************************************/ etisserant@0: /* B 3.2.2 Subprogram Control Statements */ etisserant@0: /*****************************************/ mario@82: := return ASSIGN; /* Delimiter */ mario@82: => return SENDTO; /* Delimiter */ mario@82: RETURN return RETURN; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /********************************/ etisserant@0: /* B 3.2.3 Selection Statements */ etisserant@0: /********************************/ mario@82: IF return IF; /* Keyword */ mario@82: THEN return THEN; /* Keyword */ mario@82: ELSIF return ELSIF; /* Keyword */ mario@82: ELSE return ELSE; /* Keyword */ mario@82: END_IF return END_IF; /* Keyword */ mario@82: mario@82: CASE return CASE; /* Keyword */ mario@82: OF return OF; /* Keyword */ mario@82: ELSE return ELSE; /* Keyword */ mario@82: END_CASE return END_CASE; /* Keyword */ etisserant@0: etisserant@0: etisserant@0: /********************************/ etisserant@0: /* B 3.2.4 Iteration Statements */ etisserant@0: /********************************/ mario@82: FOR return FOR; /* Keyword */ mario@82: TO return TO; /* Keyword */ mario@82: BY return BY; /* Keyword */ mario@82: DO return DO; /* Keyword */ mario@82: END_FOR return END_FOR; /* Keyword */ mario@82: mario@82: WHILE return WHILE; /* Keyword */ mario@82: DO return DO; /* Keyword */ mario@82: END_WHILE return END_WHILE; /* Keyword */ mario@82: mario@82: REPEAT return REPEAT; /* Keyword */ mario@82: UNTIL return UNTIL; /* Keyword */ mario@82: END_REPEAT return END_REPEAT; /* Keyword */ mario@82: mario@82: EXIT return EXIT; /* Keyword */ etisserant@0: etisserant@0: msousa@257: etisserant@0: etisserant@0: etisserant@0: etisserant@0: /********************************************************/ etisserant@0: /********************************************************/ etisserant@0: /********************************************************/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /***** N O W W O R K W I T H V A L U E S *****/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /********************************************************/ etisserant@0: /********************************************************/ etisserant@0: /********************************************************/ etisserant@0: etisserant@0: etisserant@0: /********************************************/ etisserant@0: /* B.1.4.1 Directly Represented Variables */ etisserant@0: /********************************************/ lbessard@175: {direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);} etisserant@0: etisserant@0: etisserant@0: /******************************************/ etisserant@0: /* B 1.4.3 - Declaration & Initialisation */ etisserant@0: /******************************************/ etisserant@0: {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;} etisserant@0: etisserant@0: etisserant@0: /************************/ etisserant@0: /* B 1.2.3.1 - Duration */ etisserant@0: /************************/ etisserant@0: {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;} msousa@547: {interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);} msousa@547: {erroneous_interval} {return erroneous_interval_token;} msousa@547: msousa@547: { msousa@547: {integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} msousa@547: {integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} msousa@547: {integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} msousa@547: {integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} msousa@547: {integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} msousa@547: {fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} msousa@547: {fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} msousa@547: {fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} msousa@547: {fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} msousa@547: {fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} msousa@547: msousa@547: _ /* do nothing - eat it up!*/ msousa@616: \# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;} msousa@616: . {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;} msousa@547: \n {ERROR;} msousa@547: } etisserant@0: /*******************************/ etisserant@0: /* B.1.2.2 Character Strings */ etisserant@0: /*******************************/ etisserant@0: {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;} etisserant@0: {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;} etisserant@0: etisserant@0: etisserant@0: /******************************/ etisserant@0: /* B.1.2.1 Numeric literals */ etisserant@0: /******************************/ etisserant@0: {integer} {yylval.ID=strdup(yytext); return integer_token;} etisserant@0: {real} {yylval.ID=strdup(yytext); return real_token;} etisserant@0: {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;} etisserant@0: {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;} etisserant@0: {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;} etisserant@0: etisserant@0: etisserant@0: /*****************************************/ etisserant@0: /* B.1.1 Letters, digits and identifiers */ etisserant@0: /*****************************************/ mjsousa@866: {identifier}/({st_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} mjsousa@866: {identifier}/({il_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} etisserant@0: {identifier} {yylval.ID=strdup(yytext); mario@75: // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext)); etisserant@0: return get_identifier_token(yytext);} etisserant@0: etisserant@0: etisserant@0: etisserant@0: etisserant@0: etisserant@0: etisserant@0: /************************************************/ etisserant@0: /************************************************/ etisserant@0: /************************************************/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /***** T H E L E F T O V E R S . . . *****/ etisserant@0: /***** *****/ etisserant@0: /***** *****/ etisserant@0: /************************************************/ etisserant@0: /************************************************/ etisserant@0: /************************************************/ etisserant@0: etisserant@0: /* do the single character tokens... etisserant@0: * etisserant@0: * e.g.: ':' '(' ')' '+' '*' ... etisserant@0: */ etisserant@0: . {return yytext[0];} etisserant@0: etisserant@0: etisserant@0: %% etisserant@0: etisserant@0: msousa@757: /*************************/ msousa@757: /* Tracking Functions... */ msousa@757: /*************************/ msousa@757: mjsousa@880: #define MAX_LINE_LENGTH 1024 msousa@757: msousa@757: tracking_t *GetNewTracking(FILE* in_file) { msousa@757: tracking_t* new_env = new tracking_t; msousa@757: new_env->eof = 0; msousa@757: new_env->lineNumber = 0; msousa@757: new_env->currentChar = 0; msousa@757: new_env->lineLength = 0; msousa@757: new_env->currentTokenStart = 0; mjsousa@879: new_env->buffer = (char*)malloc(MAX_LINE_LENGTH); msousa@757: new_env->in_file = in_file; msousa@757: return new_env; msousa@757: } msousa@757: msousa@757: mjsousa@879: void FreeTracking(tracking_t *tracking) { mjsousa@879: free(tracking->buffer); mjsousa@879: delete tracking; mjsousa@879: } mjsousa@879: mjsousa@879: msousa@757: /* GetNextChar: reads a character from input */ msousa@757: int GetNextChar(char *b, int maxBuffer) { msousa@757: char *p; msousa@757: msousa@757: if ( current_tracking->eof ) msousa@757: return 0; msousa@757: msousa@757: while ( current_tracking->currentChar >= current_tracking->lineLength ) { msousa@757: current_tracking->currentChar = 0; msousa@757: current_tracking->currentTokenStart = 1; msousa@757: current_tracking->eof = false; msousa@757: mjsousa@879: p = fgets(current_tracking->buffer, MAX_LINE_LENGTH, current_tracking->in_file); msousa@757: if ( p == NULL ) { msousa@757: if ( ferror(current_tracking->in_file) ) msousa@757: return 0; msousa@757: current_tracking->eof = true; msousa@757: return 0; msousa@757: } msousa@757: msousa@757: current_tracking->lineLength = strlen(current_tracking->buffer); mjsousa@880: mjsousa@880: /* only increment line number if the buffer was big enough to read the whole line! */ mjsousa@880: char last_char = current_tracking->buffer[current_tracking->lineLength - 1]; mjsousa@880: if (('\n' == last_char) || ('\r' == last_char)) // '\r' ---> CR, '\n' ---> LF mjsousa@880: current_tracking->lineNumber++; msousa@757: } msousa@757: msousa@757: b[0] = current_tracking->buffer[current_tracking->currentChar]; msousa@757: if (b[0] == ' ' || b[0] == '\t') msousa@757: current_tracking->currentTokenStart++; msousa@757: current_tracking->currentChar++; msousa@757: msousa@757: return b[0]==0?0:1; msousa@757: } msousa@757: msousa@757: msousa@757: msousa@757: etisserant@0: /***********************************/ etisserant@0: /* Utility function definitions... */ etisserant@0: /***********************************/ etisserant@0: etisserant@0: /* print the include file stack to stderr... */ etisserant@0: void print_include_stack(void) { etisserant@0: int i; etisserant@0: etisserant@0: if ((include_stack_ptr - 1) >= 0) etisserant@0: fprintf (stderr, "in file "); etisserant@0: for (i = include_stack_ptr - 1; i >= 0; i--) lbessard@136: fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber); etisserant@0: } etisserant@0: etisserant@0: msousa@756: msousa@756: /* set the internal state variables of lexical analyser to process a new include file */ msousa@756: void handle_include_file_(FILE *filehandle, const char *filename) { msousa@756: if (include_stack_ptr >= MAX_INCLUDE_DEPTH) { msousa@756: fprintf(stderr, "Includes nested too deeply\n"); msousa@756: exit( 1 ); msousa@756: } msousa@756: msousa@756: yyin = filehandle; msousa@756: msousa@756: include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER; msousa@756: include_stack[include_stack_ptr].env = current_tracking; msousa@756: include_stack[include_stack_ptr].filename = current_filename; msousa@756: msousa@756: current_filename = strdup(filename); msousa@756: current_tracking = GetNewTracking(yyin); msousa@756: include_stack_ptr++; msousa@756: msousa@756: /* switch input buffer to new file... */ msousa@756: yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE)); msousa@756: } msousa@756: msousa@756: msousa@756: msousa@756: /* insert the code (in ) into the source code we are parsing. msousa@756: * This is done by creating an artificial file with that new source code, and then 'including' the file msousa@756: */ msousa@757: void include_string_(const char *source_code) { msousa@756: FILE *tmp_file = tmpfile(); msousa@756: msousa@756: if(tmp_file == NULL) { msousa@756: perror("Error creating temp file."); msousa@756: exit(EXIT_FAILURE); msousa@756: } msousa@756: msousa@756: fwrite((void *)source_code, 1, strlen(source_code), tmp_file); msousa@756: rewind(tmp_file); msousa@756: msousa@756: /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */ msousa@756: handle_include_file_(tmp_file, ""); msousa@756: //fclose(tmp_file); /* do NOT close file. It must only be closed when we finish reading from it! */ msousa@756: } msousa@756: msousa@756: msousa@756: msousa@756: /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */ msousa@756: void include_file(const char *filename) { msousa@756: FILE *filehandle = NULL; msousa@756: msousa@756: for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) { msousa@756: char *full_name; msousa@756: full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename); msousa@756: if (full_name == NULL) { msousa@756: fprintf(stderr, "Out of memory!\n"); msousa@756: exit( 1 ); msousa@756: } msousa@756: filehandle = fopen(full_name, "r"); msousa@756: free(full_name); msousa@756: } msousa@756: msousa@756: if (NULL == filehandle) { msousa@756: fprintf(stderr, "Error opening included file %s\n", filename); msousa@756: exit( 1 ); msousa@756: } msousa@756: msousa@756: /* now process the new file... */ msousa@756: handle_include_file_(filehandle, filename); msousa@756: } msousa@756: msousa@756: msousa@756: msousa@756: msousa@756: etisserant@0: /* return all the text in the current token back to the input stream, except the first n chars. */ etisserant@0: void unput_text(unsigned int n) { etisserant@0: /* it seems that flex has a bug in that it will not correctly count the line numbers etisserant@0: * if we return newlines back to the input stream. These newlines will be re-counted etisserant@0: * a second time when they are processed again by flex. etisserant@0: * We therefore determine how many newlines are in the text we are returning, etisserant@0: * and decrement the line counter acordingly... etisserant@0: */ mjsousa@879: /* mjsousa@879: unsigned int i; lbessard@136: etisserant@0: for (i = n; i < strlen(yytext); i++) etisserant@0: if (yytext[i] == '\n') mjsousa@879: current_tracking->lineNumber--; mjsousa@879: */ etisserant@0: /* now return all the text back to the input stream... */ etisserant@0: yyless(n); etisserant@0: } etisserant@0: etisserant@0: msousa@547: /* return all the text in the current token back to the input stream, msousa@547: * but first return to the stream an additional character to mark the end of the token. msousa@547: */ msousa@547: void unput_and_mark(const char c) { msousa@547: char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */ msousa@547: unput(c); msousa@547: for (int i = yyleng-1; i >= 0; i--) msousa@547: unput(yycopy[i]); msousa@547: msousa@547: free(yycopy); msousa@547: } msousa@547: msousa@547: msousa@547: mjsousa@1016: /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION mjsousa@1016: * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule. mjsousa@1016: * However, we must store any text we consume in every rule, so we can push it back into the buffer mjsousa@1016: * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by mjsousa@1016: * the body_state. mjsousa@1016: */ mjsousa@1016: /* The buffer used by the body_state state */ mjsousa@1016: char *bodystate_buffer = NULL; mjsousa@1016: mjsousa@1016: /* append text to bodystate_buffer */ mjsousa@1016: void append_bodystate_buffer(const char *text) { mjsousa@1020: //printf("<<>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL"); mjsousa@1016: long int old_len = 0; mjsousa@1016: if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer); mjsousa@1016: bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1); mjsousa@1016: if (NULL == bodystate_buffer) ERROR; mjsousa@1016: strcpy(bodystate_buffer + old_len, text); mjsousa@1016: //printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer); mjsousa@1016: } mjsousa@1016: mjsousa@1016: /* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */ mjsousa@1016: void unput_bodystate_buffer(void) { mjsousa@1016: if (NULL == bodystate_buffer) ERROR; mjsousa@1016: //printf("<<>>\n%s\n", bodystate_buffer); mjsousa@1016: mjsousa@1016: for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--) mjsousa@1016: unput(bodystate_buffer[i]); mjsousa@1016: mjsousa@1016: free(bodystate_buffer); mjsousa@1016: bodystate_buffer = NULL; mjsousa@1016: } mjsousa@1016: mjsousa@1016: mjsousa@1016: /* Return true if bodystate_buffer is empty */ mjsousa@1016: int isempty_bodystate_buffer(void) { mjsousa@1016: return (NULL == bodystate_buffer); mjsousa@1016: } mjsousa@1016: mjsousa@1016: mjsousa@1016: mjsousa@1016: etisserant@0: /* Called by flex when it reaches the end-of-file */ etisserant@0: int yywrap(void) etisserant@0: { etisserant@0: /* We reached the end of the input file... */ etisserant@0: etisserant@0: /* Should we continue with another file? */ etisserant@0: /* If so: etisserant@0: * open the new file... etisserant@0: * return 0; etisserant@0: */ etisserant@0: msousa@737: /* to stop processing... etisserant@0: * return 1; etisserant@0: */ etisserant@0: etisserant@0: return 1; /* Stop scanning at end of input file. */ etisserant@0: } etisserant@0: etisserant@0: etisserant@0: msousa@757: /*******************************/ msousa@757: /* Public Interface for Bison. */ msousa@757: /*******************************/ msousa@757: msousa@757: /* The following functions will be called from inside bison code! */ msousa@757: msousa@757: void include_string(const char *source_code) {include_string_(source_code);} msousa@757: msousa@757: msousa@757: /* Tell flex which file to parse. This function will not imediately start parsing the file. msousa@757: * To parse the file, you then need to call yyparse() msousa@757: * mjsousa@761: * Returns NULL on error opening the file (and a valid errno), or 0 on success. mjsousa@761: * Caller must close the file! mjsousa@761: */ mjsousa@761: FILE *parse_file(const char *filename) { msousa@757: FILE *filehandle = NULL; msousa@757: mjsousa@761: if((filehandle = fopen(filename, "r")) != NULL) { mjsousa@761: yyin = filehandle; mjsousa@761: current_filename = strdup(filename); mjsousa@761: current_tracking = GetNewTracking(yyin); mjsousa@761: } mjsousa@761: return filehandle; msousa@757: } msousa@757: msousa@757: msousa@757: msousa@757: msousa@757: msousa@757: etisserant@0: /*************************************/ etisserant@0: /* Include a main() function to test */ etisserant@0: /* the token parsing by flex.... */ etisserant@0: /*************************************/ etisserant@0: #ifdef TEST_MAIN etisserant@0: etisserant@0: #include "../util/symtable.hh" etisserant@0: etisserant@0: yystype yylval; etisserant@0: YYLTYPE yylloc; etisserant@0: etisserant@0: mario@15: mario@15: etisserant@0: int get_identifier_token(const char *identifier_str) {return 0;} lbessard@175: int get_direct_variable_token(const char *direct_variable_str) {return 0;} etisserant@0: etisserant@0: etisserant@0: int main(int argc, char **argv) { etisserant@0: etisserant@0: FILE *in_file; etisserant@0: int res; lbessard@136: etisserant@0: if (argc == 1) { etisserant@0: /* Work as an interactive (command line) parser... */ etisserant@0: while((res=yylex())) etisserant@0: fprintf(stderr, "(line %d)token: %d\n", yylineno, res); etisserant@0: } else { etisserant@0: /* Work as non-interactive (file) parser... */ etisserant@0: if((in_file = fopen(argv[1], "r")) == NULL) { etisserant@0: char *errmsg = strdup2("Error opening main file ", argv[1]); etisserant@0: perror(errmsg); etisserant@0: free(errmsg); etisserant@0: return -1; etisserant@0: } etisserant@0: etisserant@0: /* parse the file... */ etisserant@0: yyin = in_file; etisserant@0: current_filename = argv[1]; etisserant@0: while(1) { etisserant@0: res=yylex(); etisserant@0: fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID); etisserant@0: } etisserant@0: } lbessard@136: lbessard@136: return 0; etisserant@0: etisserant@0: } etisserant@0: #endif