/* * matiec - a compiler for the programming languages defined in IEC 61131-3 * * Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of thest_whitespaceLicense, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * * This code is made available on the understanding that it will not be * used in safety-critical situations without a full and competent review. */ /* * An IEC 61131-3 compiler. * * Based on the * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10) * */ /* * Stage 1 * ======= * * This file contains the lexical tokens definitions, from which * the flex utility will generate a lexical parser function. */ /*****************************/ /* Lexical Parser Options... */ /*****************************/ /* The lexical analyser will never work in interactive mode, * i.e., it will only process programs saved to files, and never * programs being written inter-actively by the user. * This option saves the resulting parser from calling the * isatty() function, that seems to be generating some compile * errors under some (older?) versions of flex. */ %option never-interactive /* Have the lexical analyser use a 'char *yytext' instead of an * array of char 'char yytext[??]' to store the lexical token. */ %pointer /* Have the lexical analyser ignore the case of letters. * This will occur for all the tokens and keywords, but * the resulting text handed up to the syntax parser * will not be changed, and keep the original case * of the letters in the input file. */ %option case-insensitive /* Have the generated lexical analyser keep track of the * line number it is currently analysing. * This is used to pass up to the syntax parser * the number of the line on which the current * token was found. It will enable the syntax parser * to generate more informatve error messages... */ %option yylineno /* required for the use of the yy_pop_state() and * yy_push_state() functions */ %option stack /* The '%option stack' also requests the inclusion of * the yy_top_state(), however this function is not * currently being used. This means that the compiler * is complaining about the existance of this function. * The following option removes the yy_top_state() * function from the resulting c code, so the compiler * no longer complains. */ %option noyy_top_state /* We will be using unput() in our flex code, so we cannot set the following option!... */ /* %option nounput */ /* The '%option debug' makes the generated scanner run in * debug mode. %option debug */ /**************************************************/ /* External Variable and Function declarations... */ /**************************************************/ %{ /* Define TEST_MAIN to include a main() function. * Useful for testing the parser generated by flex. */ /* #define TEST_MAIN */ /* If lexical parser is compiled by itself, we need to define the following * constant to some string. Under normal circumstances LIBDIRECTORY is set * in the syntax parser header file... */ #ifdef TEST_MAIN #define DEFAULT_LIBDIR "just_testing" #endif /* Required for strdup() */ #include /* Required only for the declaration of abstract syntax classes * (class symbol_c; class token_c; class list_c;) * These will not be used in flex, but the token type union defined * in iec_bison.hh contains pointers to these classes, so we must include * it here. */ #include "../absyntax/absyntax.hh" /* iec_bison.hh is generated by bison. * Contains the definition of the token constants, and the * token value type YYSTYPE (in our case, a 'const char *') */ #include "iec_bison.hh" #include "stage1_2_priv.hh" /* Variable defined by the bison parser, * where the value of the tokens will be stored */ extern YYSTYPE yylval; /* The name of the file currently being parsed... * Note that flex accesses and updates this global variable * apropriately whenever it comes across an (*#include *) directive... */ const char *current_filename = NULL; /* Variable defined by the bison parser. * It must be initialised with the location * of the token being parsed. * This is only needed if we want to keep * track of the locations, in order to give * more meaningful error messages! */ /* *extern YYLTYPE yylloc; b*/ #define YY_INPUT(buf,result,max_size) {\ result = GetNextChar(buf, max_size);\ if ( result <= 0 )\ result = YY_NULL;\ } /* Macro that is executed for every action. * We use it to pass the location of the token * back to the bison parser... */ #define YY_USER_ACTION {\ previous_tracking =*current_tracking; \ yylloc.first_line = current_tracking->lineNumber; \ yylloc.first_column = current_tracking->currentChar; \ yylloc.first_file = current_filename; \ yylloc.first_order = current_order; \ \ UpdateTracking(yytext); \ \ yylloc.last_line = current_tracking->lineNumber; \ yylloc.last_column = current_tracking->currentChar - 1; \ yylloc.last_file = current_filename; \ yylloc.last_order = current_order; \ \ current_tracking->currentTokenStart = current_tracking->currentChar; \ current_order++; \ } /* Since this lexical parser we defined only works in ASCII based * systems, we might as well make sure it is being compiled on * one... * Lets check a few random characters... */ #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \ ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B)) #error This lexical analyser is not portable to a non ASCII based system. #endif /* Function only called from within flex, but defined * in iec.y! * We declare it here... * * Search for a symbol in either of the two symbol tables * and return the token id of the first symbol found. * Searches first in the variables, and only if not found * does it continue searching in the library elements */ //token_id_t get_identifier_token(const char *identifier_str); int get_identifier_token(const char *identifier_str); %} /***************************************************/ /* Forward Declaration of functions defined later. */ /***************************************************/ %{ void UpdateTracking(const char *text); /* return the character back to the input stream. */ void unput_char(const char c); /* return all the text in the current token back to the input stream. */ void unput_text(int n); /* return all the text in the current token back to the input stream, * but first return to the stream an additional character to mark the end of the token. */ void unput_and_mark(const char mark_char); void include_file(const char *include_filename); /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule. * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by * the body_state. */ void append_bodystate_buffer(const char *text, int is_whitespace = 0); void unput_bodystate_buffer(void); int isempty_bodystate_buffer(void); void del_bodystate_buffer(void); int GetNextChar(char *b, int maxBuffer); %} /****************************/ /* Lexical Parser States... */ /****************************/ /* NOTE: Our psrser can parse st or il code, intermixed * within the same file. * With IL we come across the issue of the EOL (end of line) token. * ST, and the declaration parts of IL do not use this token! * If the lexical analyser were to issue this token during ST * language parsing, or during the declaration of data types, * function headers, etc. in IL, the syntax parser would crash. * * We can solve this issue using one of three methods: * (1) Augment all the syntax that does not accept the EOL * token to simply ignore it. This makes the syntax * definition (in iec.y) very cluttered! * (2) Let the lexical parser figure out which language * it is parsing, and decide whether or not to issue * the EOL token. This requires the lexical parser * to have knowledge of the syntax!, making for a poor * overall organisation of the code. It would also make it * very difficult to understand the lexical parser as it * would use several states, and a state machine to transition * between the states. The state transitions would be * intermingled with the lexical parser defintion! * (3) Use a mixture of (1) and (2). The lexical analyser * merely distinguishes between function headers and function * bodies, but no longer makes a distinction between il and * st language bodies. When parsing a body, it will return * the EOL token. In other states '\n' will be ignored as * whitespace. * The ST language syntax has been augmented in the syntax * parser configuration to ignore any EOL tokens that it may * come across! * This option has both drawbacks of option (1) and (2), but * much less intensely. * The syntax that gets cluttered is limited to the ST statements * (which is rather limited, compared to the function headers and * data type declarations, etc...), while the state machine in * the lexical parser becomes very simple. All state transitions * can be handled within the lexical parser by itself, and can be * easily identified. Thus knowledge of the syntax required by * the lexical parser is very limited! * * Amazingly enough, I (Mario) got to implement option (3) * at first, requiring two basic states, decl and body. * The lexical parser will enter the body state when * it is parsing the body of a function/program/function block. The * state transition is done when we find a VAR_END that is not followed * by a VAR! This is the syntax knowledge that gets included in the * lexical analyser with this option! * Unfortunately, getting the st syntax parser to ignore EOL anywhere * where they might appear leads to conflicts. This is due to the fact * that the syntax parser uses the single look-ahead token to remove * possible conflicts. When we insert a possible EOL, the single * look ahead token becomes the EOL, which means the potential conflicts * could no longer be resolved. * Removing these conflicts would make the st syntax parser very convoluted, * and adding the extraneous EOL would make it very cluttered. * This option was therefore dropped in favour of another! * * I ended up implementing (2). Unfortunately the lexical analyser can * not easily distinguish between il and st code, since function * calls in il are very similar to function block calls in st. * We therefore use an extra 'body' state. When the lexical parser * finds that last END_VAR, it enters the body state. This state * must figure out what language is being parsed from the first few * tokens, and switch to the correct state (st, il or sfc) according to the * language. This means that we insert quite a bit of knowledge of the * syntax of the languages into the lexical parser. This is ugly, but it * works, and at least it is possible to keep all the state changes together * to make it easier to remove them later on if need be. * Once the language being parsed has been identified, * the body state returns any matched text back to the buffer with unput(), * to be later matched correctly by the apropriate language parser (st, il or sfc). * * Aditionally, in sfc state it may further recursively enter the body state * once again. This is because an sfc body may contain ACTIONS, which are then * written in one of the three languages (ST, IL or SFC), so once again we need * to figure out which language the ACTION in the SFC was written in. We already * ahve all that done in the body state, so we recursively transition to the body * state once again. * Note that in this case, when coming out of the st/il state (whichever language * the action was written in) the sfc state will become active again. This is done by * pushing and poping the previously active state! * * The sfc_qualifier_state is required because when parsing actions within an * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order * to bison to work correctly, these qualifiers must be returned as tokens. However, * these tokens are not reserved keywords, which means it should be possible to * define variables/functions/FBs with any of these names (including * S and R which are special because they are also IL operators). So, when we are not * expecting any action qualifiers, flex does not return these tokens, and is free * to interpret them as previously defined variables/functions/... as the case may be. * * The time_literal_state is required because TIME# literals are decomposed into * portions, and wewant to send these portions one by one to bison. Each poertion will * represent the value in days/hours/minutes/seconds/ms. * Unfortunately, some of these portions may also be lexically analysed as an identifier. So, * we need to disable lexical identification of identifiers while parsing TIME# literals! * e.g.: TIME#55d_4h_56m * We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m' * Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h', * and return that identifier instead! So, we added this state! * * The ignore_pou_state state is only used when bison says it is doing the pre-parsing. * During pre-parsing, the main state machine will only transition between * INITIAL and ignore_pou_state, and from here back to INITIAL. All other * transitions are inhibited. This inhibition is actually just enforced by making * sure that the INITIAL ---> ignore_pou_state transition is tested before all other * transitions coming out of INITIAL state. All other transitions are unaffected, as they * never get a chance to be evaluated when bison is doing pre-parsing. * Pre-parsing is a first quick scan through the whole input source code simply * to determine the list of POUs and datatypes that will be defined in that * code. Basically, the objective is to fill up the previously_declared_xxxxx * maps, without processing the code itself. Once these maps have been filled up, * bison will throw away the AST (abstract syntax tree) created up to that point, * and scan through the same source code again, but this time creating a correct AST. * This pre-scan allows the source code to reference POUs and datatypes that are * only declared after they are used! * * * Here is a main state machine... * --+ * | these states are * +------------> get_pou_name_state ----> ignore_pou_state | only active * | | | when bison is * | ------------------------------------------+ | doing the * | | | pre-parsing!! * | v --+ * +---> INITIAL <-------> config * | \ * | V * | header_state * | | * | V * vardecl_list_state <------> var_decl * ^ | * | | [using push()] * | | * | V * | body, * | | * | | * | ------------------- * | | | | * | v v v * | st il sfc * | | | | [using pop() when leaving st/il/sfc => goes to vardecl_list_state] * | | | | * ----------------------- * * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then * we also push() to the body state. This means that sometimes, when pop()ing * from st and il, the state machine may return to the sfc state! * - The transitions form sfc to body will be decided by bison, which will * tell flex to do the transition by calling cmd_goto_body_state(). * * * Possible state changes are: * INITIAL -> goto(ignore_pou_state) * (This transition state is only used when bison says it is doing the pre-parsing.) * (This transition takes precedence over all other transitions!) * (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found) * * INITIAL -> goto(config_state) * (when a CONFIGURATION is found) * * INITIAL -> goto(header_state) * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found) * * header_state -> goto(vardecl_list_state) * (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration) * * vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state) * (when a VAR token is found) * vardecl_state -> pop() to (vardecl_list_state) * (when a END_VAR token is found) * * vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state) * (when the last END_VAR is found!) * * body_state -> goto(sfc_state) * (when it figures out it is parsing sfc language) * body_state -> goto(st_state) * (when it figures out it is parsing st language) * body_state -> goto(il_state) * (when it figures out it is parsing il language) * st_state -> pop() to vardecl_list_state * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, * END_ACTION or END_TRANSITION is found) * il_state -> pop() to vardecl_list_state * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, * END_ACTION or END_TRANSITION is found) * sfc_state -> pop() to vardecl_list_state * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) * * ignore_pou_state -> goto(INITIAL) * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found) * vardecl_list_state -> goto(INITIAL) * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found) * config_state -> goto(INITIAL) * (when a END_CONFIGURATION is found) * * * sfc_state -> push current state(sfc_state); goto(body_state) * (when parsing an action. This transition is requested by bison) * sfc_state -> push current state(sfc_state); goto(sfc_qualifier_state) * (when expecting an action qualifier. This transition is requested by bison) * sfc_qualifier_state -> pop() to sfc_state * (when no longer expecting an action qualifier. This transition is requested by bison) * * config_state -> push(config_state); goto(task_init_state) * (when parsing a task initialisation. This transition is requested by bison) * task_init_state -> pop() * (when no longer parsing task initialisation parameters. This transition is requested by bison) * * * There is another secondary state machine for parsing comments, another for file_includes, * and yet another for time literals. */ /* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */ %x ignore_pou_state %x get_pou_name_state /* we are parsing a configuration. */ %s config_state /* Inside a configuration, we are parsing a task initialisation parameters */ /* This means that PRIORITY, SINGLE and INTERVAL must be handled as * tokens, and not as possible identifiers. Note that the above words * are not keywords. */ %s task_init_state /* we are looking for the first VAR inside a function's, program's or function block's declaration */ /* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */ %s header_state /* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */ %x vardecl_list_state /* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */ %s vardecl_state /* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */ %x body_state /* we are parsing il code -> flex must return the EOL tokens! */ %s il_state /* we are parsing st code -> flex must not return the EOL tokens! */ %s st_state /* we are parsing sfc code -> flex must not return the EOL tokens! */ %s sfc_state /* we are parsing sfc code, and expecting an action qualifier. */ %s sfc_qualifier_state /* we are parsing sfc code, and expecting the priority token. */ %s sfc_priority_state /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */ %x time_literal_state /* we are parsing a comment. */ %x comment_state /*******************/ /* File #include's */ /*******************/ /* We extend the IEC 61131-3 standard syntax to allow inclusion * of other files, using the IEC 61131-3 pragma directive... * The accepted syntax is: * {#include ""} */ /* the "include" states are used for picking up the name of an include file */ %x include_beg %x include_filename %x include_end file_include_pragma_filename [^\"]* file_include_pragma_beg "{#include"{st_whitespace}\" file_include_pragma_end \"{st_whitespace}"}" file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end} %{ /* A counter to track the order by which each token is processed. * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token). * i.e.. it may get incremented by more than one between two consecutive tokens. * This is due to the fact that the counter gets incremented every 'user action' in flex, * however not every user action will result in a token being passed to bison. * Nevertheless this is still OK, as we are only interested in the relative * ordering of tokens... */ static long int current_order = 0; typedef struct { int eof; int lineNumber; int currentChar; int lineLength; int currentTokenStart; FILE *in_file; } tracking_t; /* A forward declaration of a function defined at the end of this file. */ void FreeTracking(tracking_t *tracking); #define MAX_INCLUDE_DEPTH 16 typedef struct { YY_BUFFER_STATE buffer_state; tracking_t *env; const char *filename; } include_stack_t; tracking_t * current_tracking = NULL; tracking_t previous_tracking; include_stack_t include_stack[MAX_INCLUDE_DEPTH]; int include_stack_ptr = 0; const char *INCLUDE_DIRECTORIES[] = { DEFAULT_LIBDIR, ".", "/lib", "/usr/lib", "/usr/lib/iec", NULL /* must end with NULL!! */ }; %} /*****************************/ /* Prelimenary constructs... */ /*****************************/ /* PRAGMAS */ /* ======= */ /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...), * especially the prototypes of Functions and FBs defined in the standard * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax * with two pragmas to indicate that the code is to be parsed (going through the * lexical, syntactical, and semantic analysers), but no code is to be generated. * * The accepted syntax is: * {no_code_generation begin} * ... prototypes ... * {no_code_generation end} * * When parsing these prototypes the abstract syntax tree will be populated as usual, * allowing the semantic analyser to correctly analyse the semantics of calls to these * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code * between the above two pragmas. */ disable_code_generation_pragma "{disable code generation}" enable_code_generation_pragma "{enable code generation}" /* Any other pragma... */ pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}") /* COMMENTS */ /* ======== */ /* In order to allow nested comments, comments are handled by a specific comment_state state */ /* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state. * Whenever a "*)" is found, we pop a state off the stack */ /* comments... */ comment_beg "(*" comment_end "*)" /* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB * invocations with the 'NOT =>' syntax (which needs two look ahead * tokens to be parsed correctly - and bison being LALR(1) only supports one). * The current work around requires flex to completely parse the ' =>' * sequence. This sequence includes whitespace and/or comments between the * and the "=>" token. * * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context, * which means we can not use the comment_state method of specifying/finding and ignoring * comments. * * For this reason only, we must also define what a complete comment looks like, so * it may be used in this rule. Since the rule uses the whitespace_or_comment * construct as trailing context, this definition of comment must not use any * trailing context either. * * Aditionally, it is not possible to define nested comments in flex without the use of * states, so for this particular location, we do NOT support nested comments. */ /* NOTE: this seemingly unnecessary complex definition is required * to be able to eat up comments such as: * '(* Testing... ! ***** ******)' * without using the trailing context command in flex (/{context}) * since {comment} itself will later be used with * trailing context ({comment}/{context}) */ not_asterisk [^*] not_close_parenthesis_nor_asterisk [^*)] asterisk "*" comment_text ({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk}) comment "(*"({comment_text}*)({asterisk}+)")" /* 3.1 Whitespace */ /* ============== */ /* * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4) * * Whitespace definition includes the newline character. * * However, the standard is inconsistent in that in IL the newline character * is considered a token (EOL - end of line). * In our implementation we therefore have two definitions of whitespace * - one for ST, that includes the newline character * - one for IL without the newline character. * * IL whitespace is only active while parsing IL code, whereas ST whitespace * is used in all other circumstances. Additionally, when parsing IL, the newline * character is treated as the EOL token. * The above requires the use of a state machine in the lexical parser to track which * language is being parsed. This requires that the lexical parser (i.e. flex) * have some knowledge of the syntax itself. * * NOTE: Our definition of whitespace will only work in ASCII! * * NOTE: we cannot use * st_whitespace [:space:]* * since we use {st_whitespace} as trailing context. In our case * this would not constitute "dangerous trailing context", but the * lexical generator (i.e. flex) does not know this (since it does * not know which characters belong to the set [:space:]), and will * generate a "dangerous trailing context" warning! * We use this alternative just to stop the flex utility from * generating the invalid (in this case) warning... */ /* NOTE: il_whitespace_char is not currenty used, be we include it for completeness */ st_whitespace_char [ \f\n\r\t\v] il_whitespace_char [ \f\r\t\v] st_whitespace [ \f\n\r\t\v]* il_whitespace [ \f\r\t\v]* st_whitespace_or_pragma_or_commentX ({st_whitespace})|({pragma})|({comment}) il_whitespace_or_pragma_or_commentX ({il_whitespace})|({pragma})|({comment}) st_whitespace_or_pragma_or_comment {st_whitespace_or_pragma_or_commentX}* il_whitespace_or_pragma_or_comment {il_whitespace_or_pragma_or_commentX}* qualified_identifier {identifier}(\.{identifier})+ /*****************************************/ /* B.1.1 Letters, digits and identifiers */ /*****************************************/ /* NOTE: The following definitions only work if the host computer * is using the ASCII maping. For e.g., with EBCDIC [A-Z] * contains non-alphabetic characters! * The correct way of doing it would be to use * the [:upper:] etc... definitions. * * Unfortunately, further on we need all printable * characters (i.e. [:print:]), but excluding '$'. * Flex does not allow sets to be composed by excluding * elements. Sets may only be constructed by adding new * elements, which means that we have to revert to * [\x20\x21\x23\x25\x26\x28-x7E] for the definition * of the printable characters with the required exceptions. * The above also implies the use of ASCII, but now we have * no way to work around it| * * The conclusion is that our parser is limited to ASCII * based host computers!! */ letter [A-Za-z] digit [0-9] octal_digit [0-7] hex_digit {digit}|[A-F] identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*) /*******************/ /* B.1.2 Constants */ /*******************/ /******************************/ /* B.1.2.1 Numeric literals */ /******************************/ integer {digit}((_?{digit})*) /* Some helper symbols for parsing TIME literals... */ integer_0_59 (0(_?))*([0-5](_?))?{digit} integer_0_19 (0(_?))*([0-1](_?))?{digit} integer_20_23 (0(_?))*2(_?)[0-3] integer_0_23 {integer_0_19}|{integer_20_23} integer_0_999 {digit}((_?{digit})?)((_?{digit})?) binary_integer 2#{bit}((_?{bit})*) bit [0-1] octal_integer 8#{octal_digit}((_?{octal_digit})*) hex_integer 16#{hex_digit}((_?{hex_digit})*) exponent [Ee]([+-]?){integer} /* The correct definition for real would be: * real {integer}\.{integer}({exponent}?) * * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as: * fixed_point {integer}\.{integer} * * This means that {integer}\.{integer} could be interpreted * as either a fixed_point or a real. * I have opted to interpret {integer}\.{integer} as a fixed_point. * In order to do this, the definition of real has been changed to: * real {integer}\.{integer}{exponent} * * This means that the syntax parser now needs to define a real to be * either a real_token or a fixed_point_token! */ real {integer}\.{integer}{exponent} /*******************************/ /* B.1.2.2 Character Strings */ /*******************************/ /* common_character_representation := |'$$' |'$L'|'$N'|'$P'|'$R'|'$T' |'$l'|'$n'|'$p'|'$r'|'$t' NOTE: $ = 0x24 " = 0x22 ' = 0x27 printable chars in ASCII: 0x20-0x7E */ esc_char_u $L|$N|$P|$R|$T esc_char_l $l|$n|$p|$r|$t esc_char $$|{esc_char_u}|{esc_char_l} double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit}) single_byte_char (${hex_digit}{hex_digit}) /* WARNING: * This definition is only valid in ASCII... * * Flex includes the function print_char() that defines * all printable characters portably (i.e. whatever character * encoding is currently being used , ASCII, EBCDIC, etc...) * Unfortunately, we cannot generate the definition of * common_character_representation portably, since flex * does not allow definition of sets by subtracting * elements in one set from another set. * This means we must build up the defintion of * common_character_representation using only set addition, * which leaves us with the only choice of defining the * characters non-portably... */ common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char} double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation} single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation} double_byte_character_string \"({double_byte_character_representation}*)\" single_byte_character_string '({single_byte_character_representation}*)' /************************/ /* B 1.2.3.1 - Duration */ /************************/ fixed_point {integer}\.{integer} /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration, * as its definition does not match the standard's text. * IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that * definition instead! * * duration::= ('T' | 'TIME') '#' ['+'|'-'] interval * interval::= days | hours | minutes | seconds | milliseconds * fixed_point ::= integer [ '.' integer] * days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ] * hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ] * minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ] * seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ] * milliseconds ::= fixed_point 'ms' * * * The original IEC 61131-3 v2 definition is: * duration ::= ('T' | 'TIME') '#' ['-'] interval * interval ::= days | hours | minutes | seconds | milliseconds * fixed_point ::= integer [ '.' integer] * days ::= fixed_point 'd' | integer 'd' ['_'] hours * hours ::= fixed_point 'h' | integer 'h' ['_'] minutes * minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds * seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds * milliseconds ::= fixed_point 'ms' */ interval_ms_X ({integer_0_999}(\.{integer})?)ms interval_s_X {integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s) interval_m_X {integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m) interval_h_X {integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h) interval_ms {integer}ms|({fixed_point}ms) interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s) interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m) interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h) interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d) interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d} /* to help provide nice error messages, we also parse an incorrect but plausible interval... */ /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not * be able to parse any other legal lexcial construct (besides a legal interval, but that * is OK as this rule will appear _after_ the rule to parse legal intervals!). */ fixed_point_or_integer {fixed_point}|{integer} erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)? /********************************************/ /* B.1.4.1 Directly Represented Variables */ /********************************************/ /* The correct definition, if the standard were to be followed... */ location_prefix [IQM] size_prefix [XBWDL] direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*) /* For the MatPLC, we will accept % * as a direct variable, this being mapped onto the MatPLC point * named */ /* TODO: we should not restrict it to only the accepted syntax * of as specified by the standard. MatPLC point names * have a more permissive syntax. * * e.g. "P__234" * Is a valid MatPLC point name, but not a valid !! * The same happens with names such as "333", "349+23", etc... * How can we handle these more expressive names in our case? * Remember that some direct variable may remain anonymous, with * declarations such as: * VAR * AT %I3 : BYTE := 255; * END_VAR * in which case we are currently using "%I3" as the variable * name. */ /* direct_variable_matplc %{identifier} */ /* direct_variable {direct_variable_standard}|{direct_variable_matplc} */ direct_variable {direct_variable_standard} /******************************************/ /* B 1.4.3 - Declaration & Initialisation */ /******************************************/ incompl_location %[IQM]\* %% /* fprintf(stderr, "flex: state %d\n", YY_START); */ /*****************************************************/ /*****************************************************/ /*****************************************************/ /***** *****/ /***** *****/ /***** F I R S T T H I N G S F I R S T *****/ /***** *****/ /***** *****/ /*****************************************************/ /*****************************************************/ /*****************************************************/ /***********************************************************/ /* Handle requests sent by bison for flex to change state. */ /***********************************************************/ if (get_goto_body_state()) { yy_push_state(body_state); rst_goto_body_state(); } if (get_goto_sfc_qualifier_state()) { yy_push_state(sfc_qualifier_state); rst_goto_sfc_qualifier_state(); } if (get_goto_sfc_priority_state()) { yy_push_state(sfc_priority_state); rst_goto_sfc_priority_state(); } if (get_goto_task_init_state()) { yy_push_state(task_init_state); rst_goto_task_init_state(); } if (get_pop_state()) { yy_pop_state(); rst_pop_state(); } /***************************/ /* Handle the pragmas! */ /***************************/ /* We start off by searching for the pragmas we handle in the lexical parser. */ {file_include_pragma} unput_text(0); yy_push_state(include_beg); /* Pragmas sent to syntax analyser (bison) */ /* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks. * We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state. * This is because the pragmas are stored in a statement_list or instruction_list (in bison), * but these lists must start with the special tokens start_IL_body_token/start_ST_body_token. * This means that these special tokens must be generated (by the body_state) before processing * the pragme => we cannot process the trailing pragmas in the vardecl_list_state state. */ {disable_code_generation_pragma} return disable_code_generation_pragma_token; {enable_code_generation_pragma} return enable_code_generation_pragma_token; {disable_code_generation_pragma}/(VAR) return disable_code_generation_pragma_token; {enable_code_generation_pragma}/(VAR) return enable_code_generation_pragma_token; {disable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ {enable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ /* Any other pragma we find, we just pass it up to the syntax parser... */ /* Note that the state is exclusive, so we have to include it here too. */ {pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */ {pragma} {/* return the pragmma without the enclosing '{' and '}' */ int cut = yytext[1]=='{'?2:1; yytext[strlen(yytext)-cut] = '\0'; yylval.ID=strdup(yytext+cut); return pragma_token; } {pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */ int cut = yytext[1]=='{'?2:1; yytext[strlen(yytext)-cut] = '\0'; yylval.ID=strdup(yytext+cut); return pragma_token; } /*********************************/ /* Handle the file includes! */ /*********************************/ {file_include_pragma_beg} BEGIN(include_filename); {file_include_pragma_filename} { /* set the internal state variables of lexical analyser to process a new include file */ include_file(yytext); /* switch to whatever state was active before the include file */ yy_pop_state(); /* now process the new file... */ } <> { /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules * when checking for syntax errors in the input source code. * This means that in reality flex will be asked to carry on reading the input * even after it has reached the end of all (including the main) input files. * In other owrds, we will be called to return more tokens, even after we have * already returned an END_OF_INPUT token. In this case, we must carry on returning * more END_OF_INPUT tokens. * * However, in the above case we will be asked to carry on reading more tokens * from the main input file, after we have reached the end. For this to work * correctly, we cannot close the main input file! * * This is why we WILL be called with include_stack_ptr == 0 multiple times, * and why we must handle it as a special case * that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again. * * As a corollory, flex can never safely close the main input file, and we must ask * bison to close it! */ if (include_stack_ptr == 0) { // fclose(yyin); // Must not do this!! // FreeTracking(current_tracking); // Must not do this!! /* yyterminate() terminates the scanner and returns a 0 to the * scanner's caller, indicating "all done". * * Our syntax parser (written with bison) has the token * END_OF_INPUT associated to the value 0, so even though * we don't explicitly return the token END_OF_INPUT * calling yyterminate() is equivalent to doing that. */ yyterminate(); } else { fclose(yyin); FreeTracking(current_tracking); --include_stack_ptr; yy_delete_buffer(YY_CURRENT_BUFFER); yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state); current_tracking = include_stack[include_stack_ptr].env; /* removing constness of char *. This is safe actually, * since the only real const char * that is stored on the stack is * the first one (i.e. the one that gets stored in include_stack[0], * which is never free'd! */ /* NOTE: We do __NOT__ free the malloc()'d memory since * pointers to this filename will be kept by many objects * in the abstract syntax tree. * This will later be used to provide correct error * messages during semantic analysis (stage 3) */ /* free((char *)current_filename); */ current_filename = include_stack[include_stack_ptr].filename; yy_push_state(include_end); } } {file_include_pragma_end} yy_pop_state(); /* handle the artificial file includes created by include_string(), which do not end with a '}' */ . unput_text(0); yy_pop_state(); /*********************************/ /* Handle all the state changes! */ /*********************************/ /* INITIAL -> header_state */ { FUNCTION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION; FUNCTION_BLOCK{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK; PROGRAM{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM; CONFIGURATION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION; } { {identifier} BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token; . BEGIN(ignore_pou_state); unput_text(0); } { END_FUNCTION unput_text(0); BEGIN(INITIAL); END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL); END_PROGRAM unput_text(0); BEGIN(INITIAL); END_CONFIGURATION unput_text(0); BEGIN(INITIAL); .|\n {}/* Ignore text inside POU! (including the '\n' character!)) */ } /* header_state -> (vardecl_list_state) */ /* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain * at least one VAR_XXX block. * How about functions that do not declare variables, and go directly to the body_state??? * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION * must have at least one input argument, so a correct declaration will have at least * one VAR_INPUT ... VAR_END construct! * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK * must have at least one input argument, so a correct declaration will have at least * one VAR_INPUT ... VAR_END construct! * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END * construct! * * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or * FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state. * If the code has an error, and no VAR_END before the body, we will simply * continue in the state, until the end of the FUNCTION, FUNCTION_BLOCK * or PROGAM. * * WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function * may be declared with no Input, Output or IN_OUT variables. This means that the above * assumption is no longer valid. * * NOTE: Some code being parsed may be erroneous and not contain any VAR END_VAR block. * To generate error messages that make sense, the flex state machine should not get lost * in these situations. We therefore consider the possibility of finding * END_FUNCTION, END_FUNCTION_BLOCK or END_PROGRAM when inside the header_state. */ { VAR | /* execute the next rule's action, i.e. fall-through! */ VAR_INPUT | VAR_OUTPUT | VAR_IN_OUT | VAR_EXTERNAL | VAR_GLOBAL | VAR_TEMP | VAR_CONFIG | VAR_ACCESS unput_text(0); BEGIN(vardecl_list_state); END_FUNCTION | /* execute the next rule's action, i.e. fall-through! */ END_FUNCTION_BLOCK | END_PROGRAM unput_text(0); BEGIN(vardecl_list_state); /* Notice that we do NOT go directly to body_state, as that requires a push(). * If we were to puch to body_state here, then the corresponding pop() at the *end of body_state would return to header_state. * After this pop() header_state would not return to INITIAL as it should, but * would instead enter an infitie loop push()ing again to body_state */ } /* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */ { /* NOTE: vardecl_list_state is an exclusive state, i.e. when in this state * default rules do not apply! This means that when in this state identifiers * are not recognised! * NOTE: Notice that we only change to vardecl_state if the VAR*** is followed by * at least one whitespace. This is to dintinguish the VAR declaration * from identifiers starting with 'var' (e.g. a variable named 'varint') * NOTE: Notice that we cannot use st_whitespace here, as it can legally be empty. * We therefore use st_whitespace_char instead. */ VAR_INPUT{st_whitespace_char} | /* execute the next rule's action, i.e. fall-through! */ VAR_OUTPUT{st_whitespace_char} | VAR_IN_OUT{st_whitespace_char} | VAR_EXTERNAL{st_whitespace_char} | VAR_GLOBAL{st_whitespace_char} | VAR_TEMP{st_whitespace_char} | VAR_CONFIG{st_whitespace_char} | VAR_ACCESS{st_whitespace_char} | VAR{st_whitespace_char} unput_text(0); yy_push_state(vardecl_state); //printf("\nChanging to vardecl_state\n"); END_FUNCTION{st_whitespace} unput_text(0); BEGIN(INITIAL); END_FUNCTION_BLOCK{st_whitespace} unput_text(0); BEGIN(INITIAL); END_PROGRAM{st_whitespace} unput_text(0); BEGIN(INITIAL); /* NOTE: Handling of whitespace... * - Must come __before__ the next rule for any single character '.' * - If the rules were reversed, any whitespace with a single space (' ') * would be handled by the '.' rule instead of the {whitespace} rule! */ {st_whitespace} /* Eat any whitespace */ /* anything else, just change to body_state! */ . unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n"); } /* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */ { END_VAR yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */ } /* body_state -> (il_state | st_state | sfc_state) */ { {st_whitespace} {/* In body state we do not process any tokens, * we simply store them for later processing! * NOTE: we must return ALL text when in body_state, including * all comments and whitespace, so as not * to lose track of the line_number and column number * used when printing debugging messages. * NOTE: some of the following rules depend on the fact that * the body state buffer is either empty or only contains white space up to * that point. Since the vardecl_list_state will eat up all * whitespace before entering the body_state, the contents of the bodystate_buffer * will _never_ start with whitespace if the previous state was vardecl_list_state. * However, it is possible to enter the body_state from other states (e.g. when * parsing SFC code, that contains transitions or actions in other languages) */ append_bodystate_buffer(yytext, 1 /* is whitespace */); } /* 'INITIAL_STEP' always used in beginning of SFCs !! */ INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(sfc_state);} else {append_bodystate_buffer(yytext);} } /* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */ := { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */ else {append_bodystate_buffer(yytext);} } /* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */ END_ACTION | /* execute the next rule's action, i.e. fall-through! */ END_FUNCTION | END_FUNCTION_BLOCK | END_TRANSITION | END_PROGRAM { append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;} .|\n { append_bodystate_buffer(yytext); if (strcmp(yytext, ";") == 0) {unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;} } /* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */ RETURN | /* execute the next rule's action, i.e. fall-through! */ IF | CASE | FOR | WHILE | EXIT | REPEAT { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(st_state); return start_ST_body_token;} else {append_bodystate_buffer(yytext);} } } /* end of body_state lexical parser */ /* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */ { END_FUNCTION yy_pop_state(); unput_text(0); END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); END_PROGRAM yy_pop_state(); unput_text(0); END_TRANSITION yy_pop_state(); unput_text(0); END_ACTION yy_pop_state(); unput_text(0); } /* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */ { END_FUNCTION yy_pop_state(); unput_text(0); END_FUNCTION_BLOCK yy_pop_state(); unput_text(0); END_PROGRAM yy_pop_state(); unput_text(0); } /* config -> INITIAL */ END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; /***************************************/ /* Next is to to remove all whitespace */ /***************************************/ /* NOTE: pragmas are handled right at the beginning... */ /* The whitespace */ {st_whitespace} /* Eat any whitespace */ {il_whitespace} /* Eat any whitespace */ /* NOTE: Due to the need of having the following rule have higher priority, * the following rule was moved to an earlier position in this file. {st_whitespace} {...} */ /* The comments */ {comment_beg} yy_push_state(comment_state); {comment_beg} yy_push_state(comment_state); { {comment_beg} {if (get_opt_nested_comments()) yy_push_state(comment_state);} {comment_end} yy_pop_state(); . /* Ignore text inside comment! */ \n /* Ignore text inside comment! */ } /*****************************************/ /* B.1.1 Letters, digits and identifiers */ /*****************************************/ /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens * On the other hand, the spec does not define them as keywords, * which means they may be re-used for variable names, etc...! * The syntax parser already caters for the possibility of these * tokens being used for variable names in their declarations. * When they are declared, they will be added to the variable symbol table! * Further appearances of these tokens must no longer be parsed * as R1_tokens etc..., but rather as variable_name_tokens! * * That is why the first thing we do with identifiers, even before * checking whether they may be a 'keyword', is to check whether * they have been previously declared as a variable name, * * However, we have a dilema! Should we here also check for * prev_declared_derived_function_name_token? * If we do, then the 'MOD' default library function (defined in * the standard) will always be returned as a function name, and * it will therefore not be possible to use it as an operator as * in the following ST expression 'X := Y MOD Z;' ! * If we don't, then even it will not be possible to use 'MOD' * as a funtion as in 'X := MOD(Y, Z);' * We solve this by NOT testing for function names here, and * handling this function and keyword clash in bison! */ /* NOTE: The following code has been commented out as most users do not want matiec * to allow the use of 'R1', 'IN' ... IL operators as identifiers, * even though a literal reading of the standard allows this. * We could add this as a commadnd line option, but it is not yet done. * For now we just comment out the code, but leave it the commented code * in so we can re-activate quickly (without having to go through old commits * in the mercurial repository to figure out the missing code! */ /* {identifier} {int token = get_identifier_token(yytext); // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); if ((token == prev_declared_variable_name_token) || // (token == prev_declared_derived_function_name_token) || // DO NOT add this condition! (token == prev_declared_fb_name_token)) { // if (token != identifier_token) // * NOTE: if we replace the above uncommented conditions with * the simple test of (token != identifier_token), then * 'MOD' et al must be removed from the * library_symbol_table as a default function name! * // yylval.ID=strdup(yytext); // fprintf(stderr, "returning token %d\n", token); return token; } // otherwise, leave it for the other lexical parser rules... // fprintf(stderr, "rejecting\n"); REJECT; } */ /******************************************************/ /******************************************************/ /******************************************************/ /***** *****/ /***** *****/ /***** N O W D O T H E K E Y W O R D S *****/ /***** *****/ /***** *****/ /******************************************************/ /******************************************************/ /******************************************************/ REF {if (get_opt_ref_standard_extensions()) return REF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ DREF {if (get_opt_ref_standard_extensions()) return DREF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ REF_TO {if (get_opt_ref_standard_extensions()) return REF_TO; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ NULL {if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */ EN return EN; /* Keyword */ ENO return ENO; /* Keyword */ /******************************/ /* B 1.2.1 - Numeric Literals */ /******************************/ TRUE return TRUE; /* Keyword */ BOOL#1 return boolean_true_literal_token; BOOL#TRUE return boolean_true_literal_token; SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ FALSE return FALSE; /* Keyword */ BOOL#0 return boolean_false_literal_token; BOOL#FALSE return boolean_false_literal_token; SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ /************************/ /* B 1.2.3.1 - Duration */ /************************/ t# return T_SHARP; /* Delimiter */ T# return T_SHARP; /* Delimiter */ TIME return TIME; /* Keyword (Data Type) */ /************************************/ /* B 1.2.3.2 - Time of day and Date */ /************************************/ TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ TOD return TIME_OF_DAY; /* Keyword (Data Type) */ DATE return DATE; /* Keyword (Data Type) */ d# return D_SHARP; /* Delimiter */ D# return D_SHARP; /* Delimiter */ DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ DT return DATE_AND_TIME; /* Keyword (Data Type) */ /***********************************/ /* B 1.3.1 - Elementary Data Types */ /***********************************/ BOOL return BOOL; /* Keyword (Data Type) */ BYTE return BYTE; /* Keyword (Data Type) */ WORD return WORD; /* Keyword (Data Type) */ DWORD return DWORD; /* Keyword (Data Type) */ LWORD return LWORD; /* Keyword (Data Type) */ SINT return SINT; /* Keyword (Data Type) */ INT return INT; /* Keyword (Data Type) */ DINT return DINT; /* Keyword (Data Type) */ LINT return LINT; /* Keyword (Data Type) */ USINT return USINT; /* Keyword (Data Type) */ UINT return UINT; /* Keyword (Data Type) */ UDINT return UDINT; /* Keyword (Data Type) */ ULINT return ULINT; /* Keyword (Data Type) */ REAL return REAL; /* Keyword (Data Type) */ LREAL return LREAL; /* Keyword (Data Type) */ WSTRING return WSTRING; /* Keyword (Data Type) */ STRING return STRING; /* Keyword (Data Type) */ TIME return TIME; /* Keyword (Data Type) */ DATE return DATE; /* Keyword (Data Type) */ DT return DT; /* Keyword (Data Type) */ TOD return TOD; /* Keyword (Data Type) */ DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */ TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */ /* A non-standard extension! */ VOID {if (runtime_options.allow_void_datatype) {return VOID;} else {REJECT;}} /*****************************************************************/ /* Keywords defined in "Safety Software Technical Specification" */ /*****************************************************************/ /* * NOTE: The following keywords are define in * "Safety Software Technical Specification, * Part 1: Concepts and Function Blocks, * Version 1.0 – Official Release" * written by PLCopen - Technical Committee 5 * * We only support these extensions and keywords * if the apropriate command line option is given. */ SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}} SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}} SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}} SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}} SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}} SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}} SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}} SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}} SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}} SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}} SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}} SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}} SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}} SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}} /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */ /* so it is best not to support them at all... */ /* SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}} SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}} */ SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}} SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}} SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}} SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}} SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}} SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}} /********************************/ /* B 1.3.2 - Generic data types */ /********************************/ /* Strangely, the following symbols do not seem to be required! */ /* But we include them so they become reserved words, and do not * get passed up to bison as an identifier... */ ANY return ANY; /* Keyword (Data Type) */ ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */ ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */ ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */ ANY_NUM return ANY_NUM; /* Keyword (Data Type) */ ANY_REAL return ANY_REAL; /* Keyword (Data Type) */ ANY_INT return ANY_INT; /* Keyword (Data Type) */ ANY_BIT return ANY_BIT; /* Keyword (Data Type) */ ANY_STRING return ANY_STRING; /* Keyword (Data Type) */ ANY_DATE return ANY_DATE; /* Keyword (Data Type) */ /********************************/ /* B 1.3.3 - Derived data types */ /********************************/ ":=" return ASSIGN; /* Delimiter */ ".." return DOTDOT; /* Delimiter */ TYPE return TYPE; /* Keyword */ END_TYPE return END_TYPE; /* Keyword */ ARRAY return ARRAY; /* Keyword */ OF return OF; /* Keyword */ STRUCT return STRUCT; /* Keyword */ END_STRUCT return END_STRUCT; /* Keyword */ /*********************/ /* B 1.4 - Variables */ /*********************/ /******************************************/ /* B 1.4.3 - Declaration & Initialisation */ /******************************************/ VAR_INPUT return VAR_INPUT; /* Keyword */ VAR_OUTPUT return VAR_OUTPUT; /* Keyword */ VAR_IN_OUT return VAR_IN_OUT; /* Keyword */ VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */ VAR_GLOBAL return VAR_GLOBAL; /* Keyword */ END_VAR return END_VAR; /* Keyword */ RETAIN return RETAIN; /* Keyword */ NON_RETAIN return NON_RETAIN; /* Keyword */ R_EDGE return R_EDGE; /* Keyword */ F_EDGE return F_EDGE; /* Keyword */ AT return AT; /* Keyword */ /***********************/ /* B 1.5.1 - Functions */ /***********************/ /* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command. * This is necessary in case the input program being parsed has syntax errors that force * flex's main state machine to never change to the il_state or the st_state * after changing to the body_state. * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with * the input stream even in the presence of buggy code! */ FUNCTION return FUNCTION; /* Keyword */ END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; /* Keyword */ /* see Note above */ VAR return VAR; /* Keyword */ CONSTANT return CONSTANT; /* Keyword */ /*****************************/ /* B 1.5.2 - Function Blocks */ /*****************************/ /* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command. * This is necessary in case the input program being parsed has syntax errors that force * flex's main state machine to never change to the il_state or the st_state * after changing to the body_state. * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with * the input stream even in the presence of buggy code! */ FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */ END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; /* Keyword */ /* see Note above */ VAR_TEMP return VAR_TEMP; /* Keyword */ VAR return VAR; /* Keyword */ NON_RETAIN return NON_RETAIN; /* Keyword */ END_VAR return END_VAR; /* Keyword */ /**********************/ /* B 1.5.3 - Programs */ /**********************/ /* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command. * This is necessary in case the input program being parsed has syntax errors that force * flex's main state machine to never change to the il_state or the st_state * after changing to the body_state. * Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with * the input stream even in the presence of buggy code! */ PROGRAM return PROGRAM; /* Keyword */ END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; /* Keyword */ /* see Note above */ /********************************************/ /* B 1.6 Sequential Function Chart elements */ /********************************************/ /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well .* as other identifiers that may be used as variable names inside IL and ST programs. * They will have to be handled when we include parsing of SFC... For now, simply * ignore them! */ ACTION return ACTION; /* Keyword */ END_ACTION return END_ACTION; /* Keyword */ TRANSITION return TRANSITION; /* Keyword */ END_TRANSITION return END_TRANSITION; /* Keyword */ FROM return FROM; /* Keyword */ TO return TO; /* Keyword */ INITIAL_STEP return INITIAL_STEP; /* Keyword */ STEP return STEP; /* Keyword */ END_STEP return END_STEP; /* Keyword */ /* PRIORITY is not a keyword, so we only return it when * it is explicitly required and we are not expecting any identifiers * that could also use the same letter sequence (i.e. an identifier: piority) */ PRIORITY return PRIORITY; { L return L; D return D; SD return SD; DS return DS; SL return SL; N return N; P return P; P0 return P0; P1 return P1; R return R; S return S; } /********************************/ /* B 1.7 Configuration elements */ /********************************/ /* Note: The following END_CONFIGURATION rule will never get to be used, as we have * another identical rule above (closer to the rules handling the transitions * of the main state machine) that will always execute before this one. * Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command. * This is nt strictly necessary, but I place it here so it follwos the same * pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK */ CONFIGURATION return CONFIGURATION; /* Keyword */ END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; /* Keyword */ /* see 2 Notes above! */ TASK return TASK; /* Keyword */ RESOURCE return RESOURCE; /* Keyword */ ON return ON; /* Keyword */ END_RESOURCE return END_RESOURCE; /* Keyword */ VAR_CONFIG return VAR_CONFIG; /* Keyword */ VAR_ACCESS return VAR_ACCESS; /* Keyword */ END_VAR return END_VAR; /* Keyword */ WITH return WITH; /* Keyword */ PROGRAM return PROGRAM; /* Keyword */ RETAIN return RETAIN; /* Keyword */ NON_RETAIN return NON_RETAIN; /* Keyword */ READ_WRITE return READ_WRITE; /* Keyword */ READ_ONLY return READ_ONLY; /* Keyword */ /* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when * it is explicitly required and we are not expecting any identifiers * that could also use the same letter sequence (i.e. an identifier: piority, ...) */ { PRIORITY return PRIORITY; SINGLE return SINGLE; INTERVAL return INTERVAL; } /***********************************/ /* B 2.1 Instructions and Operands */ /***********************************/ \n return EOL; /*******************/ /* B 2.2 Operators */ /*******************/ /* NOTE: we can't have flex return the same token for * ANDN and &N, neither for AND and &, since * AND and ANDN are considered valid variable * function or functionblock type names! * This means that the parser may decide that the * AND or ANDN strings found in the source code * are being used as variable names * and not as operators, and will therefore transform * these tokens into indentifier tokens! * We can't have the parser thinking that the source * code contained the string AND (which may be interpreted * as a vairable name) when in reality the source code * merely contained the character &, so we use two * different tokens for & and AND (and similarly * ANDN and &N)! */ /* The following tokens clash with ST expression operators and Standard Functions */ /* They are also keywords! */ AND return AND; /* Keyword */ MOD return MOD; /* Keyword */ OR return OR; /* Keyword */ XOR return XOR; /* Keyword */ NOT return NOT; /* Keyword */ /* The following tokens clash with Standard Functions */ /* They are keywords because they are a function name */ { ADD return ADD; /* Keyword (Standard Function) */ DIV return DIV; /* Keyword (Standard Function) */ EQ return EQ; /* Keyword (Standard Function) */ GE return GE; /* Keyword (Standard Function) */ GT return GT; /* Keyword (Standard Function) */ LE return LE; /* Keyword (Standard Function) */ LT return LT; /* Keyword (Standard Function) */ MUL return MUL; /* Keyword (Standard Function) */ NE return NE; /* Keyword (Standard Function) */ SUB return SUB; /* Keyword (Standard Function) */ } /* The following tokens clash with SFC action qualifiers */ /* They are not keywords! */ { S return S; R return R; } /* The following tokens clash with ST expression operators */ & return AND2; /* NOT a Delimiter! */ /* The following tokens have no clashes */ /* They are not keywords! */ { LD return LD; LDN return LDN; ST return ST; STN return STN; S1 return S1; R1 return R1; CLK return CLK; CU return CU; CD return CD; PV return PV; IN return IN; PT return PT; ANDN return ANDN; &N return ANDN2; ORN return ORN; XORN return XORN; CAL return CAL; CALC return CALC; CALCN return CALCN; RET return RET; RETC return RETC; RETCN return RETCN; JMP return JMP; JMPC return JMPC; JMPCN return JMPCN; } /***********************/ /* B 3.1 - Expressions */ /***********************/ "**" return OPER_EXP; /* NOT a Delimiter! */ "<>" return OPER_NE; /* NOT a Delimiter! */ ">=" return OPER_GE; /* NOT a Delimiter! */ "<=" return OPER_LE; /* NOT a Delimiter! */ & return AND2; /* NOT a Delimiter! */ AND return AND; /* Keyword */ XOR return XOR; /* Keyword */ OR return OR; /* Keyword */ NOT return NOT; /* Keyword */ MOD return MOD; /* Keyword */ /*****************************************/ /* B 3.2.2 Subprogram Control Statements */ /*****************************************/ := return ASSIGN; /* Delimiter */ => return SENDTO; /* Delimiter */ RETURN return RETURN; /* Keyword */ /********************************/ /* B 3.2.3 Selection Statements */ /********************************/ IF return IF; /* Keyword */ THEN return THEN; /* Keyword */ ELSIF return ELSIF; /* Keyword */ ELSE return ELSE; /* Keyword */ END_IF return END_IF; /* Keyword */ CASE return CASE; /* Keyword */ OF return OF; /* Keyword */ ELSE return ELSE; /* Keyword */ END_CASE return END_CASE; /* Keyword */ /********************************/ /* B 3.2.4 Iteration Statements */ /********************************/ FOR return FOR; /* Keyword */ TO return TO; /* Keyword */ BY return BY; /* Keyword */ DO return DO; /* Keyword */ END_FOR return END_FOR; /* Keyword */ WHILE return WHILE; /* Keyword */ DO return DO; /* Keyword */ END_WHILE return END_WHILE; /* Keyword */ REPEAT return REPEAT; /* Keyword */ UNTIL return UNTIL; /* Keyword */ END_REPEAT return END_REPEAT; /* Keyword */ EXIT return EXIT; /* Keyword */ /********************************************************/ /********************************************************/ /********************************************************/ /***** *****/ /***** *****/ /***** N O W W O R K W I T H V A L U E S *****/ /***** *****/ /***** *****/ /********************************************************/ /********************************************************/ /********************************************************/ /********************************************/ /* B.1.4.1 Directly Represented Variables */ /********************************************/ {direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);} /******************************************/ /* B 1.4.3 - Declaration & Initialisation */ /******************************************/ {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;} /************************/ /* B 1.2.3.1 - Duration */ /************************/ {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;} {interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);} {erroneous_interval} {return erroneous_interval_token;} { {integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} {integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} {integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} {integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} {integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} {fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} {fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} {fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} {fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} {fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} _ /* do nothing - eat it up!*/ \# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;} . {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;} \n {ERROR;} } /*******************************/ /* B.1.2.2 Character Strings */ /*******************************/ {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;} {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;} /******************************/ /* B.1.2.1 Numeric literals */ /******************************/ {integer} {yylval.ID=strdup(yytext); return integer_token;} {real} {yylval.ID=strdup(yytext); return real_token;} {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;} {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;} {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;} /*****************************************/ /* B.1.1 Letters, digits and identifiers */ /*****************************************/ {identifier}/({st_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} {identifier}/({il_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;} {identifier} {yylval.ID=strdup(yytext); // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext)); return get_identifier_token(yytext);} /************************************************/ /************************************************/ /************************************************/ /***** *****/ /***** *****/ /***** T H E L E F T O V E R S . . . *****/ /***** *****/ /***** *****/ /************************************************/ /************************************************/ /************************************************/ /* do the single character tokens... * * e.g.: ':' '(' ')' '+' '*' ... */ . {return yytext[0];} %% /*************************/ /* Tracking Functions... */ /*************************/ #define MAX_LINE_LENGTH 1024 tracking_t *GetNewTracking(FILE* in_file) { tracking_t* new_env = new tracking_t; new_env->eof = 0; new_env->lineNumber = 1; new_env->currentChar = 0; new_env->lineLength = 0; new_env->currentTokenStart = 0; new_env->in_file = in_file; return new_env; } void FreeTracking(tracking_t *tracking) { delete tracking; } void UpdateTracking(const char *text) { const char *newline, *token = text; while ((newline = strchr(token, '\n')) != NULL) { token = newline + 1; current_tracking->lineNumber++; current_tracking->currentChar = 1; } current_tracking->currentChar += strlen(token); } /* GetNextChar: reads a character from input */ int GetNextChar(char *b, int maxBuffer) { int res = fgetc(current_tracking->in_file); if ( res == EOF ) return 0; *b = (char)res; return 1; } /***********************************/ /* Utility function definitions... */ /***********************************/ /* print the include file stack to stderr... */ void print_include_stack(void) { int i; if ((include_stack_ptr - 1) >= 0) fprintf (stderr, "in file "); for (i = include_stack_ptr - 1; i >= 0; i--) fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber); } /* set the internal state variables of lexical analyser to process a new include file */ void handle_include_file_(FILE *filehandle, const char *filename) { if (include_stack_ptr >= MAX_INCLUDE_DEPTH) { fprintf(stderr, "Includes nested too deeply\n"); exit( 1 ); } yyin = filehandle; include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER; include_stack[include_stack_ptr].env = current_tracking; include_stack[include_stack_ptr].filename = current_filename; current_filename = strdup(filename); current_tracking = GetNewTracking(yyin); include_stack_ptr++; /* switch input buffer to new file... */ yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE)); } /* insert the code (in ) into the source code we are parsing. * This is done by creating an artificial file with that new source code, and then 'including' the file */ void include_string_(const char *source_code) { FILE *tmp_file = tmpfile(); if(tmp_file == NULL) { perror("Error creating temp file."); exit(EXIT_FAILURE); } fwrite((void *)source_code, 1, strlen(source_code), tmp_file); rewind(tmp_file); /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */ handle_include_file_(tmp_file, ""); //fclose(tmp_file); /* do NOT close file. It must only be closed when we finish reading from it! */ } /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */ void include_file(const char *filename) { FILE *filehandle = NULL; for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) { char *full_name; full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename); if (full_name == NULL) { fprintf(stderr, "Out of memory!\n"); exit( 1 ); } filehandle = fopen(full_name, "r"); free(full_name); } if (NULL == filehandle) { fprintf(stderr, "Error opening included file %s\n", filename); exit( 1 ); } /* now process the new file... */ handle_include_file_(filehandle, filename); } /* return the specified character to the input stream */ /* WARNING: this function destroys the contents of yytext */ void unput_char(const char c) { /* NOTE: The following uncomented code is not necessary as we currently use a different algorithm: * - make a backup/snapshot of the current tracking data (in previous_tracking variable) * (done in YY_USER_ACTION) * - restore the previous tracking state when we unput any text... * (in unput_text() and unput_and_mark() ) */ // /* We will later be processing this same character again when it is read from the input strem, // * and therefore we will be incrementing the line number and character column acordingly. // * We must therefore try to 'undo' the changes to the line number and character column // * so this character is not counted twice! // */ // if (c == '\n') { // current_tracking->lineNumber--; // /* We should now set the current_tracking->currentChar to the length of the previous line // * But we currently have no way of knowing it, so we simply set it to 0. // * I (msousa) don't think this is currently an issue because I don't believe the code // * ever calls unput_char() with a '\n', so we leave it for now // */ // current_tracking->currentChar = 0; // } else if (current_tracking->currentChar > 0) { // current_tracking->currentChar--; // } unput(c); // unput() destroys the contents of yytext !! } /* return all the text in the current token back to the input stream, except the first n chars. */ void unput_text(int n) { if (n < 0) ERROR; signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !! char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */ for (int i = yyleng-1; i >= n; i--) unput_char(yycopy[i]); *current_tracking = previous_tracking; yycopy[n] = '\0'; UpdateTracking(yycopy); free(yycopy); } /* return all the text in the current token back to the input stream, * but first return to the stream an additional character to mark the end of the token. */ void unput_and_mark(const char mark_char) { char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */ unput_char(mark_char); for (int i = yyleng-1; i >= 0; i--) unput_char(yycopy[i]); free(yycopy); *current_tracking = previous_tracking; } /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule. * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by * the body_state. */ /* The buffer used by the body_state state */ char *bodystate_buffer = NULL; bool bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace. tracking_t bodystate_init_tracking; /* append text to bodystate_buffer */ void append_bodystate_buffer(const char *text, int is_whitespace) { // printf("<<>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL"); long int old_len = 0; // make backup of tracking if we are starting off a new body_state_buffer if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking; // set bodystate_is_whitespace flag if we are starting a new buffer if (NULL == bodystate_buffer) bodystate_is_whitespace = 1; // set bodystate_is_whitespace flag to FALSE if we are adding non white space to buffer if (!is_whitespace) bodystate_is_whitespace = 0; if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer); bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1); if (NULL == bodystate_buffer) ERROR; strcpy(bodystate_buffer + old_len, text); //printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer); } /* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */ void unput_bodystate_buffer(void) { if (NULL == bodystate_buffer) ERROR; // printf("<<>>\n%s\n", bodystate_buffer); for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--) unput_char(bodystate_buffer[i]); free(bodystate_buffer); bodystate_buffer = NULL; bodystate_is_whitespace = 1; *current_tracking = bodystate_init_tracking; } /* Return true if bodystate_buffer is empty or ony contains whitespace!! */ int isempty_bodystate_buffer(void) { if (NULL == bodystate_buffer) return 1; if (bodystate_is_whitespace) return 1; return 0; } /* Delete all data in bodystate. */ /* Will be used to delete ST whitespace when not needed. If not deleted this whitespace * will be prepended to the next text block of code being appended to bodystate_buffer, * which may cause trouble if it is IL code */ void del_bodystate_buffer(void) { free(bodystate_buffer); bodystate_buffer = NULL; bodystate_is_whitespace = 1; } /* Called by flex when it reaches the end-of-file */ int yywrap(void) { /* We reached the end of the input file... */ /* Should we continue with another file? */ /* If so: * open the new file... * return 0; */ /* to stop processing... * return 1; */ return 1; /* Stop scanning at end of input file. */ } /*******************************/ /* Public Interface for Bison. */ /*******************************/ /* The following functions will be called from inside bison code! */ void include_string(const char *source_code) {include_string_(source_code);} /* Tell flex which file to parse. This function will not imediately start parsing the file. * To parse the file, you then need to call yyparse() * * Returns NULL on error opening the file (and a valid errno), or 0 on success. * Caller must close the file! */ FILE *parse_file(const char *filename) { FILE *filehandle = NULL; if((filehandle = fopen(filename, "r")) != NULL) { yyin = filehandle; current_filename = strdup(filename); current_tracking = GetNewTracking(yyin); } return filehandle; } /*************************************/ /* Include a main() function to test */ /* the token parsing by flex.... */ /*************************************/ #ifdef TEST_MAIN #include "../util/symtable.hh" yystype yylval; YYLTYPE yylloc; int get_identifier_token(const char *identifier_str) {return 0;} int get_direct_variable_token(const char *direct_variable_str) {return 0;} int main(int argc, char **argv) { FILE *in_file; int res; if (argc == 1) { /* Work as an interactive (command line) parser... */ while((res=yylex())) fprintf(stderr, "(line %d)token: %d\n", yylineno, res); } else { /* Work as non-interactive (file) parser... */ if((in_file = fopen(argv[1], "r")) == NULL) { char *errmsg = strdup2("Error opening main file ", argv[1]); perror(errmsg); free(errmsg); return -1; } /* parse the file... */ yyin = in_file; current_filename = argv[1]; while(1) { res=yylex(); fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID); } } return 0; } #endif