etisserant@0: /*
msousa@264: * matiec - a compiler for the programming languages defined in IEC 61131-3
msousa@264: *
msousa@264: * Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt)
msousa@264: *
msousa@264: * This program is free software: you can redistribute it and/or modify
msousa@264: * it under the terms of the GNU General Public License as published by
msousa@264: * the Free Software Foundation, either version 3 of the License, or
msousa@264: * (at your option) any later version.
msousa@264: *
msousa@264: * This program is distributed in the hope that it will be useful,
msousa@264: * but WITHOUT ANY WARRANTY; without even the implied warranty of
msousa@264: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
msousa@264: * GNU General Public License for more details.
msousa@264: *
msousa@264: * You should have received a copy of the GNU General Public License
msousa@264: * along with this program. If not, see .
msousa@264: *
etisserant@0: *
etisserant@0: * This code is made available on the understanding that it will not be
etisserant@0: * used in safety-critical situations without a full and competent review.
etisserant@0: */
etisserant@0:
etisserant@0: /*
msousa@264: * An IEC 61131-3 compiler.
etisserant@0: *
etisserant@0: * Based on the
etisserant@0: * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
etisserant@0: *
etisserant@0: */
etisserant@0:
etisserant@0: /*
etisserant@0: * Stage 1
etisserant@0: * =======
etisserant@0: *
etisserant@0: * This file contains the lexical tokens definitions, from which
etisserant@0: * the flex utility will generate a lexical parser function.
etisserant@0: */
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* Lexical Parser Options... */
etisserant@0: /*****************************/
etisserant@0:
etisserant@0: /* The lexical analyser will never work in interactive mode,
etisserant@0: * i.e., it will only process programs saved to files, and never
etisserant@0: * programs being written inter-actively by the user.
etisserant@0: * This option saves the resulting parser from calling the
etisserant@0: * isatty() function, that seems to be generating some compile
etisserant@0: * errors under some (older?) versions of flex.
etisserant@0: */
etisserant@0: %option never-interactive
etisserant@0:
etisserant@0: /* Have the lexical analyser use a 'char *yytext' instead of an
etisserant@0: * array of char 'char yytext[??]' to store the lexical token.
etisserant@0: */
etisserant@0: %pointer
etisserant@0:
etisserant@0:
etisserant@0: /* Have the lexical analyser ignore the case of letters.
etisserant@0: * This will occur for all the tokens and keywords, but
etisserant@0: * the resulting text handed up to the syntax parser
etisserant@0: * will not be changed, and keep the original case
etisserant@0: * of the letters in the input file.
etisserant@0: */
etisserant@0: %option case-insensitive
etisserant@0:
etisserant@0: /* Have the generated lexical analyser keep track of the
etisserant@0: * line number it is currently analysing.
etisserant@0: * This is used to pass up to the syntax parser
etisserant@0: * the number of the line on which the current
etisserant@0: * token was found. It will enable the syntax parser
etisserant@0: * to generate more informatve error messages...
etisserant@0: */
etisserant@0: %option yylineno
etisserant@0:
etisserant@0: /* required for the use of the yy_pop_state() and
etisserant@0: * yy_push_state() functions
etisserant@0: */
etisserant@0: %option stack
etisserant@0:
etisserant@0: /* The '%option stack' also requests the inclusion of
etisserant@0: * the yy_top_state(), however this function is not
etisserant@0: * currently being used. This means that the compiler
etisserant@0: * is complaining about the existance of this function.
etisserant@0: * The following option removes the yy_top_state()
etisserant@0: * function from the resulting c code, so the compiler
etisserant@0: * no longer complains.
etisserant@0: */
etisserant@0: %option noyy_top_state
etisserant@0:
msousa@547: /* We will be using unput() in our flex code, so we cannot set the following option!... */
msousa@547: /*
msousa@267: %option nounput
msousa@547: */
msousa@267:
etisserant@0: /**************************************************/
etisserant@0: /* External Variable and Function declarations... */
etisserant@0: /**************************************************/
etisserant@0:
etisserant@0:
etisserant@0: %{
etisserant@0: /* Define TEST_MAIN to include a main() function.
etisserant@0: * Useful for testing the parser generated by flex.
etisserant@0: */
etisserant@0: /*
etisserant@0: #define TEST_MAIN
etisserant@0: */
etisserant@0: /* If lexical parser is compiled by itself, we need to define the following
etisserant@0: * constant to some string. Under normal circumstances LIBDIRECTORY is set
etisserant@0: * in the syntax parser header file...
etisserant@0: */
etisserant@0: #ifdef TEST_MAIN
etisserant@40: #define DEFAULT_LIBDIR "just_testing"
etisserant@0: #endif
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /* Required for strdup() */
etisserant@0: #include
etisserant@0:
etisserant@0: /* Required only for the declaration of abstract syntax classes
etisserant@0: * (class symbol_c; class token_c; class list_c;)
etisserant@0: * These will not be used in flex, but the token type union defined
msousa@354: * in iec_bison.h contains pointers to these classes, so we must include
etisserant@0: * it here.
etisserant@0: */
etisserant@0: #include "../absyntax/absyntax.hh"
etisserant@0:
mario@15:
msousa@354: /* iec_bison.h is generated by bison.
etisserant@0: * Contains the definition of the token constants, and the
etisserant@0: * token value type YYSTYPE (in our case, a 'const char *')
etisserant@0: */
msousa@354: #include "iec_bison.h"
mario@15: #include "stage1_2_priv.hh"
mario@15:
etisserant@0:
etisserant@0: /* Variable defined by the bison parser,
etisserant@0: * where the value of the tokens will be stored
etisserant@0: */
etisserant@0: extern YYSTYPE yylval;
etisserant@0:
etisserant@0: /* The name of the file currently being parsed...
etisserant@0: * Note that flex accesses and updates this global variable
msousa@757: * apropriately whenever it comes across an (*#include *) directive...
msousa@757: */
msousa@757: const char *current_filename = NULL;
msousa@757:
mario@15:
etisserant@0:
etisserant@0: /* We will not be using unput() in our flex code... */
msousa@267: /* NOTE: it seems that this #define is no longer needed, It has been
msousa@267: * replaced by %option nounput.
msousa@267: * Should we simply delete it?
msousa@267: * For now leave it in, in case someone is using an old version of flex.
msousa@267: * In any case, the most harm that can result in a warning message
msousa@267: * when compiling iec.flex.c:
msousa@267: * warning: ‘void yyunput(int, char*)’ defined but not used
msousa@267: */
etisserant@0: #define YY_NO_UNPUT
etisserant@0:
etisserant@0: /* Variable defined by the bison parser.
etisserant@0: * It must be initialised with the location
etisserant@0: * of the token being parsed.
etisserant@0: * This is only needed if we want to keep
etisserant@0: * track of the locations, in order to give
etisserant@0: * more meaningful error messages!
etisserant@0: */
conti@415: /*
conti@415: *extern YYLTYPE yylloc;
conti@415: b*/
lbessard@136: #define YY_INPUT(buf,result,max_size) {\
lbessard@136: result = GetNextChar(buf, max_size);\
lbessard@136: if ( result <= 0 )\
lbessard@136: result = YY_NULL;\
lbessard@136: }
lbessard@136:
msousa@287:
msousa@287: /* A counter to track the order by which each token is processed.
msousa@287: * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
msousa@287: * i.e.. it may get incremented by more than one between two consecutive tokens.
msousa@287: * This is due to the fact that the counter gets incremented every 'user action' in flex,
msousa@287: * however not every user action will result in a token being passed to bison.
msousa@287: * Nevertheless this is still OK, as we are only interested in the relative
msousa@287: * ordering of tokens...
msousa@287: */
msousa@287: static long int current_order = 0;
msousa@287:
msousa@287:
etisserant@0: /* Macro that is executed for every action.
etisserant@0: * We use it to pass the location of the token
etisserant@0: * back to the bison parser...
etisserant@0: */
lbessard@136: #define YY_USER_ACTION {\
msousa@287: yylloc.first_line = current_tracking->lineNumber; \
msousa@287: yylloc.first_column = current_tracking->currentTokenStart; \
msousa@287: yylloc.first_file = current_filename; \
msousa@287: yylloc.first_order = current_order; \
msousa@287: yylloc.last_line = current_tracking->lineNumber; \
msousa@287: yylloc.last_column = current_tracking->currentChar - 1; \
msousa@287: yylloc.last_file = current_filename; \
msousa@287: yylloc.last_order = current_order; \
msousa@287: current_tracking->currentTokenStart = current_tracking->currentChar; \
msousa@287: current_order++; \
etisserant@0: }
etisserant@0:
etisserant@0: /* Since this lexical parser we defined only works in ASCII based
etisserant@0: * systems, we might as well make sure it is being compiled on
etisserant@0: * one...
etisserant@0: * Lets check a few random characters...
etisserant@0: */
etisserant@0: #if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
etisserant@0: ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
etisserant@0: #error This lexical analyser is not portable to a non ASCII based system.
etisserant@0: #endif
etisserant@0:
etisserant@0:
etisserant@0: /* Function only called from within flex, but defined
etisserant@0: * in iec.y!
lbessard@3: * We declare it here...
etisserant@0: *
etisserant@0: * Search for a symbol in either of the two symbol tables
etisserant@0: * and return the token id of the first symbol found.
etisserant@0: * Searches first in the variables, and only if not found
etisserant@0: * does it continue searching in the library elements
etisserant@0: */
etisserant@0: //token_id_t get_identifier_token(const char *identifier_str);
etisserant@0: int get_identifier_token(const char *identifier_str);
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0: /***************************************************/
etisserant@0: /* Forward Declaration of functions defined later. */
etisserant@0: /***************************************************/
etisserant@0:
etisserant@0: %{
etisserant@0: /* return all the text in the current token back to the input stream. */
etisserant@0: void unput_text(unsigned int n);
msousa@547: /* return all the text in the current token back to the input stream,
msousa@547: * but first return to the stream an additional character to mark the end of the token.
msousa@547: */
msousa@547: void unput_and_mark(const char c);
msousa@756:
msousa@756: void include_file(const char *include_filename);
msousa@757:
msousa@757: int GetNextChar(char *b, int maxBuffer);
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /****************************/
etisserant@0: /* Lexical Parser States... */
etisserant@0: /****************************/
etisserant@0:
etisserant@0: /* NOTE: Our psrser can parse st or il code, intermixed
etisserant@0: * within the same file.
etisserant@0: * With IL we come across the issue of the EOL (end of line) token.
etisserant@0: * ST, and the declaration parts of IL do not use this token!
etisserant@0: * If the lexical analyser were to issue this token during ST
etisserant@0: * language parsing, or during the declaration of data types,
etisserant@0: * function headers, etc. in IL, the syntax parser would crash.
etisserant@0: *
etisserant@0: * We can solve this issue using one of three methods:
etisserant@0: * (1) Augment all the syntax that does not accept the EOL
etisserant@0: * token to simply ignore it. This makes the syntax
etisserant@0: * definition (in iec.y) very cluttered!
etisserant@0: * (2) Let the lexical parser figure out which language
etisserant@0: * it is parsing, and decide whether or not to issue
etisserant@0: * the EOL token. This requires the lexical parser
etisserant@0: * to have knowledge of the syntax!, making for a poor
etisserant@0: * overall organisation of the code. It would also make it
etisserant@0: * very difficult to understand the lexical parser as it
etisserant@0: * would use several states, and a state machine to transition
etisserant@0: * between the states. The state transitions would be
etisserant@0: * intermingled with the lexical parser defintion!
etisserant@0: * (3) Use a mixture of (1) and (2). The lexical analyser
etisserant@0: * merely distinguishes between function headers and function
etisserant@0: * bodies, but no longer makes a distinction between il and
etisserant@0: * st language bodies. When parsing a body, it will return
etisserant@0: * the EOL token. In other states '\n' will be ignored as
etisserant@0: * whitespace.
etisserant@0: * The ST language syntax has been augmented in the syntax
etisserant@0: * parser configuration to ignore any EOL tokens that it may
etisserant@0: * come across!
etisserant@0: * This option has both drawbacks of option (1) and (2), but
etisserant@0: * much less intensely.
etisserant@0: * The syntax that gets cluttered is limited to the ST statements
etisserant@0: * (which is rather limited, compared to the function headers and
etisserant@0: * data type declarations, etc...), while the state machine in
etisserant@0: * the lexical parser becomes very simple. All state transitions
etisserant@0: * can be handled within the lexical parser by itself, and can be
etisserant@0: * easily identified. Thus knowledge of the syntax required by
etisserant@0: * the lexical parser is very limited!
etisserant@0: *
etisserant@0: * Amazingly enough, I (Mario) got to implement option (3)
etisserant@0: * at first, requiring two basic states, decl and body.
etisserant@0: * The lexical parser will enter the body state when
etisserant@0: * it is parsing the body of a function/program/function block. The
etisserant@0: * state transition is done when we find a VAR_END that is not followed
etisserant@0: * by a VAR! This is the syntax knowledge that gets included in the
etisserant@0: * lexical analyser with this option!
etisserant@0: * Unfortunately, getting the st syntax parser to ignore EOL anywhere
etisserant@0: * where they might appear leads to conflicts. This is due to the fact
etisserant@0: * that the syntax parser uses the single look-ahead token to remove
etisserant@0: * possible conflicts. When we insert a possible EOL, the single
etisserant@0: * look ahead token becomes the EOL, which means the potential conflicts
etisserant@0: * could no longer be resolved.
etisserant@0: * Removing these conflicts would make the st syntax parser very convoluted,
etisserant@0: * and adding the extraneous EOL would make it very cluttered.
etisserant@0: * This option was therefore dropped in favour of another!
etisserant@0: *
etisserant@0: * I ended up implementing (2). Unfortunately the lexical analyser can
etisserant@0: * not easily distinguish between il and st code, since function
etisserant@0: * calls in il are very similar to function block calls in st.
etisserant@0: * We therefore use an extra 'body' state. When the lexical parser
etisserant@0: * finds that last END_VAR, it enters the body state. This state
etisserant@0: * must figure out what language is being parsed from the first few
mario@68: * tokens, and switch to the correct state (st, il or sfc) according to the
etisserant@0: * language. This means that we insert quite a bit of knowledge of the
etisserant@0: * syntax of the languages into the lexical parser. This is ugly, but it
etisserant@0: * works, and at least it is possible to keep all the state changes together
etisserant@0: * to make it easier to remove them later on if need be.
mario@68: * Once the language being parsed has been identified,
mario@68: * the body state returns any matched text back to the buffer with unput(),
mario@68: * to be later matched correctly by the apropriate language parser (st, il or sfc).
mario@68: *
mario@68: * Aditionally, in sfc state it may further recursively enter the body state
mario@68: * once again. This is because an sfc body may contain ACTIONS, which are then
mario@68: * written in one of the three languages (ST, IL or SFC), so once again we need
mario@68: * to figure out which language the ACTION in the SFC was written in. We already
mario@68: * ahve all that done in the body state, so we recursively transition to the body
mario@68: * state once again.
mario@68: * Note that in this case, when coming out of the st/il state (whichever language
mario@68: * the action was written in) the sfc state will become active again. This is done by
mario@68: * pushing and poping the previously active state!
mario@68: *
mario@68: * The sfc_qualifier_state is required because when parsing actions within an
mario@68: * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
mario@68: * to bison to work correctly, these qualifiers must be returned as tokens. However,
mario@68: * these tokens are not reserved keywords, which means it should be possible to
mario@68: * define variables/functions/FBs with any of these names (including
mario@68: * S and R which are special because they are also IL operators). So, when we are not
mario@68: * expecting any action qualifiers, flex does not return these tokens, and is free
mario@68: * to interpret them as previously defined variables/functions/... as the case may be.
mario@68: *
msousa@547: * The time_literal_state is required because TIME# literals are decomposed into
msousa@547: * portions, and wewant to send these portions one by one to bison. Each poertion will
msousa@547: * represent the value in days/hours/minutes/seconds/ms.
msousa@547: * Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
msousa@547: * we need to disable lexical identification of identifiers while parsing TIME# literals!
msousa@547: * e.g.: TIME#55d_4h_56m
msousa@547: * We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
msousa@547: * Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
msousa@547: * and return that identifier instead! So, we added this state!
msousa@547: *
mario@68: * The state machine has 7 possible states (INITIAL, config, decl, body, st, il, sfc)
etisserant@0: * Possible state changes are:
mario@68: * INITIAL -> goto(decl_state)
mario@68: * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
mario@68: * and followed by a VAR declaration)
mario@68: * INITIAL -> goto(body_state)
mario@68: * (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found,
mario@68: * and _not_ followed by a VAR declaration)
mario@68: * (This transition is actually commented out, since the syntax
mario@68: * does not allow the declaration of functions, FBs, or programs
mario@68: * without any VAR declaration!)
mario@68: * INITIAL -> goto(config_state)
mario@68: * (when a CONFIGURATION is found)
mario@68: * decl_state -> push(decl_state); goto(body_state)
mario@68: * (when the last END_VAR is found, i.e. the function body starts)
mario@68: * decl_state -> push(decl_state); goto(sfc_state)
mario@68: * (when it figures out it is parsing sfc language)
mario@68: * body_state -> goto(st_state)
mario@68: * (when it figures out it is parsing st language)
mario@68: * body_state -> goto(il_state)
mario@68: * (when it figures out it is parsing il language)
mario@68: * st_state -> pop()
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68: * END_ACTION or END_TRANSITION is found)
mario@68: * il_state -> pop()
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
mario@68: * END_ACTION or END_TRANSITION is found)
mario@68: * decl_state -> goto(INITIAL)
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mario@68: * sfc_state -> goto(INITIAL)
mario@68: * (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
mario@68: * config_state -> goto(INITIAL)
mario@68: * (when a END_CONFIGURATION is found)
mario@68: * sfc_state -> push(sfc_state); goto(body_state)
mario@68: * (when parsing an action. This transition is requested by bison)
mario@68: * sfc_state -> push(sfc_state); goto(sfc_qualifier_state)
mario@68: * (when expecting an action qualifier. This transition is requested by bison)
mario@68: * sfc_qualifier_state -> pop()
mario@68: * (when no longer expecting an action qualifier. This transition is requested by bison)
mario@74: * config_state -> push(config_state); goto(task_init_state)
mario@74: * (when parsing a task initialisation. This transition is requested by bison)
mario@74: * task_init_state -> pop()
mario@74: * (when no longer parsing task initialisation parameters. This transition is requested by bison)
mario@74: *
mario@74: */
mario@68:
mario@68:
etisserant@0: /* we are parsing a configuration. */
lbessard@3: %s config_state
etisserant@0:
mario@74: /* Inside a configuration, we are parsing a task initialisation parameters */
mario@74: /* This means that PRIORITY, SINGLE and INTERVAL must be handled as
mario@74: * tokens, and not as possible identifiers. Note that the above words
mario@74: * are not keywords.
mario@74: */
mario@74: %s task_init_state
mario@74:
etisserant@0: /* we are parsing a function, program or function block declaration */
lbessard@3: %s decl_state
etisserant@0:
msousa@737: /* we will be parsing a function body. Whether il/st/sfc remains to be determined */
mario@68: %x body_state
etisserant@0:
etisserant@0: /* we are parsing il code -> flex must return the EOL tokens! */
lbessard@3: %s il_state
etisserant@0:
etisserant@0: /* we are parsing st code -> flex must not return the EOL tokens! */
lbessard@3: %s st_state
etisserant@0:
mario@68: /* we are parsing sfc code -> flex must not return the EOL tokens! */
lbessard@3: %s sfc_state
etisserant@0:
mario@68: /* we are parsing sfc code, and expecting an action qualifier. */
mario@68: %s sfc_qualifier_state
etisserant@0:
mario@86: /* we are parsing sfc code, and expecting the priority token. */
mario@86: %s sfc_priority_state
etisserant@0:
msousa@547: /* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
msousa@547: %x time_literal_state
mario@75:
mario@75:
etisserant@0: /*******************/
etisserant@0: /* File #include's */
etisserant@0: /*******************/
etisserant@0:
etisserant@0: /* We extend the IEC 61131-3 standard syntax to allow inclusion
etisserant@0: * of other files, using the IEC 61131-3 pragma directive...
etisserant@0: * The accepted syntax is:
etisserant@0: * {#include ""}
etisserant@0: */
etisserant@0:
etisserant@0: /* the "include" states are used for picking up the name of an include file */
etisserant@0: %x include_beg
etisserant@0: %x include_filename
etisserant@0: %x include_end
etisserant@0:
etisserant@0:
etisserant@0: file_include_pragma_filename [^\"]*
etisserant@0: file_include_pragma_beg "{#include"{st_whitespace_only}\"
etisserant@0: file_include_pragma_end \"{st_whitespace_only}"}"
etisserant@0: file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
etisserant@0:
etisserant@0:
etisserant@0: %{
etisserant@0: #define MAX_INCLUDE_DEPTH 16
etisserant@0:
etisserant@0: typedef struct {
msousa@757: int eof;
msousa@757: int lineNumber;
msousa@757: int currentChar;
msousa@757: int lineLength;
msousa@757: int currentTokenStart;
msousa@757: char *buffer;
msousa@757: FILE *in_file;
msousa@757: } tracking_t;
msousa@757:
msousa@757: typedef struct {
etisserant@0: YY_BUFFER_STATE buffer_state;
msousa@757: tracking_t *env;
etisserant@0: const char *filename;
etisserant@0: } include_stack_t;
etisserant@0:
msousa@757: tracking_t *current_tracking = NULL;
etisserant@0: include_stack_t include_stack[MAX_INCLUDE_DEPTH];
etisserant@0: int include_stack_ptr = 0;
etisserant@0:
etisserant@0: const char *INCLUDE_DIRECTORIES[] = {
etisserant@40: DEFAULT_LIBDIR,
etisserant@40: ".",
etisserant@40: "/lib",
etisserant@40: "/usr/lib",
etisserant@40: "/usr/lib/iec",
etisserant@0: NULL /* must end with NULL!! */
etisserant@0: };
etisserant@0: %}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* Prelimenary constructs... */
etisserant@0: /*****************************/
etisserant@0:
msousa@267: /* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
msousa@267: * especially the prototypes of Functions and FBs defined in the standard
msousa@267: * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax
msousa@267: * with two pragmas to indicate that the code is to be parsed (going through the
msousa@267: * lexical, syntactical, and semantic analysers), but no code is to be generated.
msousa@267: *
msousa@267: * The accepted syntax is:
msousa@267: * {no_code_generation begin}
msousa@267: * ... prototypes ...
msousa@267: * {no_code_generation end}
msousa@267: *
msousa@267: * When parsing these prototypes the abstract syntax tree will be populated as usual,
msousa@267: * allowing the semantic analyser to correctly analyse the semantics of calls to these
msousa@267: * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
msousa@267: * between the above two pragmas.
msousa@267: */
msousa@267:
msousa@267: disable_code_generation_pragma "{disable code generation}"
msousa@267: enable_code_generation_pragma "{enable code generation}"
msousa@267:
msousa@267:
msousa@267: /* Any other pragma... */
etisserant@0:
etisserant@0: pragma "{"[^}]*"}"
etisserant@0:
etisserant@0: /* NOTE: this seemingly unnecessary complex definition is required
etisserant@0: * to be able to eat up comments such as:
etisserant@0: * '(* Testing... ! ***** ******)'
etisserant@0: * without using the trailing context command in flex (/{context})
etisserant@0: * since {comment} itself will later be used with
etisserant@0: * trailing context ({comment}/{context})
etisserant@0: */
etisserant@0: not_asterisk [^*]
etisserant@0: not_close_parenthesis_nor_asterisk [^*)]
etisserant@0: asterisk "*"
etisserant@0: comment_text {not_asterisk}|(({asterisk}+){not_close_parenthesis_nor_asterisk})
etisserant@0:
etisserant@0: comment "(*"({comment_text}*)({asterisk}+)")"
etisserant@0:
etisserant@0:
etisserant@0: /*
etisserant@0: 3.1 Whitespace
etisserant@0: (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
etisserant@0: No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
etisserant@0: The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
etisserant@0: On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple3.1 Whitespace
etisserant@0: (NOTE: Whitespace IS clearly defined, to include newline!!! See section 2.1.4!!!)
etisserant@0: No definition of whitespace is given, in other words, the characters that may be used to seperate language tokens are not pecisely defined. One may nevertheless make an inteligent guess of using the space (' '), and other characters also commonly considered whitespace in other programming languages (horizontal tab, vertical tab, form feed, etc.).
etisserant@0: The main question is whether the newline character should be considered whitespace. IL language statements use an EOL token (End Of Line) to distinguish between some language constructs. The EOL token itself is openly defined as "normally consist[ing] of the 'paragraph separator' ", leaving the final choice open to each implemention. If we choose the newline character to represent the EOL token, it may then not be considered whitespace.
etisserant@0: On the other hand, some examples that come in a non-normative annex of the specification allow function declarations to span multiple lines, which means that the newline character is being considered as whitespace.
etisserant@0: Our implementation works around this issue by including the new line character in the whitespace while parsing function declarations and the ST language, and parsing it as the EOL token only while parsing IL language statements. This requires the use of a state machine in the lexical parser that needs at least some knowledge of the syntax itself.
etisserant@0: */
etisserant@0: /* NOTE: Our definition of whitespace will only work in ASCII!
etisserant@0: *
etisserant@0: * Since the IL language needs to know the location of newline
etisserant@0: * (token EOL -> '\n' ), we need one definition of whitespace
etisserant@0: * for each language...
etisserant@0: */
etisserant@0: /*
etisserant@0: * NOTE: we cannot use
etisserant@0: * st_whitespace [:space:]*
etisserant@0: * since we use {st_whitespace} as trailing context. In our case
etisserant@0: * this would not constitute "dangerous trailing context", but the
etisserant@0: * lexical generator (i.e. flex) does not know this (since it does
etisserant@0: * not know which characters belong to the set [:space:]), and will
etisserant@0: * generate a "dangerous trailing context" warning!
etisserant@0: * We use this alternative just to stop the flex utility from
etisserant@0: * generating the invalid (in this case) warning...
etisserant@0: */
etisserant@0:
etisserant@0: st_whitespace_only [ \f\n\r\t\v]*
etisserant@0: il_whitespace_only [ \f\r\t\v]*
etisserant@0:
etisserant@0: st_whitespace_text {st_whitespace_only}|{comment}|{pragma}
etisserant@0: il_whitespace_text {il_whitespace_only}|{comment}|{pragma}
etisserant@0:
etisserant@0: st_whitespace {st_whitespace_text}*
etisserant@0: il_whitespace {il_whitespace_text}*
etisserant@0:
etisserant@0: st_whitespace_text_no_pragma {st_whitespace_only}|{comment}
etisserant@0: il_whitespace_text_no_pragma {il_whitespace_only}|{comment}
etisserant@0:
etisserant@0: st_whitespace_no_pragma {st_whitespace_text_no_pragma}*
etisserant@0: il_whitespace_no_pragma {il_whitespace_text_no_pragma}*
etisserant@0:
mario@72: qualified_identifier {identifier}(\.{identifier})*
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: The following definitions only work if the host computer
etisserant@0: * is using the ASCII maping. For e.g., with EBCDIC [A-Z]
etisserant@0: * contains non-alphabetic characters!
etisserant@0: * The correct way of doing it would be to use
etisserant@0: * the [:upper:] etc... definitions.
etisserant@0: *
etisserant@0: * Unfortunately, further on we need all printable
etisserant@0: * characters (i.e. [:print:]), but excluding '$'.
etisserant@0: * Flex does not allow sets to be composed by excluding
etisserant@0: * elements. Sets may only be constructed by adding new
etisserant@0: * elements, which means that we have to revert to
etisserant@0: * [\x20\x21\x23\x25\x26\x28-x7E] for the definition
etisserant@0: * of the printable characters with the required exceptions.
etisserant@0: * The above also implies the use of ASCII, but now we have
etisserant@0: * no way to work around it|
etisserant@0: *
etisserant@0: * The conclusion is that our parser is limited to ASCII
etisserant@0: * based host computers!!
etisserant@0: */
etisserant@0: letter [A-Za-z]
etisserant@0: digit [0-9]
etisserant@0: octal_digit [0-7]
etisserant@0: hex_digit {digit}|[A-F]
etisserant@0: identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
etisserant@0:
etisserant@0: /*******************/
etisserant@0: /* B.1.2 Constants */
etisserant@0: /*******************/
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1 Numeric literals */
etisserant@0: /******************************/
etisserant@0: integer {digit}((_?{digit})*)
msousa@547:
msousa@547: /* Some helper symbols for parsing TIME literals... */
msousa@547: integer_0_59 (0(_?))*([0-5](_?))?{digit}
msousa@547: integer_0_19 (0(_?))*([0-1](_?))?{digit}
msousa@547: integer_20_23 (0(_?))*2(_?)[0-3]
msousa@547: integer_0_23 {integer_0_19}|{integer_20_23}
msousa@547: integer_0_999 {digit}((_?{digit})?)((_?{digit})?)
msousa@547:
msousa@547:
etisserant@0: binary_integer 2#{bit}((_?{bit})*)
etisserant@0: bit [0-1]
etisserant@0: octal_integer 8#{octal_digit}((_?{octal_digit})*)
etisserant@0: hex_integer 16#{hex_digit}((_?{hex_digit})*)
etisserant@0: exponent [Ee]([+-]?){integer}
etisserant@0: /* The correct definition for real would be:
etisserant@0: * real {integer}\.{integer}({exponent}?)
etisserant@0: *
etisserant@0: * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
etisserant@0: * fixed_point {integer}\.{integer}
etisserant@0: *
etisserant@0: * This means that {integer}\.{integer} could be interpreted
etisserant@0: * as either a fixed_point or a real.
etisserant@0: * I have opted to interpret {integer}\.{integer} as a fixed_point.
etisserant@0: * In order to do this, the definition of real has been changed to:
etisserant@0: * real {integer}\.{integer}{exponent}
etisserant@0: *
etisserant@0: * This means that the syntax parser now needs to define a real to be
etisserant@0: * either a real_token or a fixed_point_token!
etisserant@0: */
etisserant@0: real {integer}\.{integer}{exponent}
etisserant@0:
etisserant@0:
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2 Character Strings */
etisserant@0: /*******************************/
etisserant@0: /*
etisserant@0: common_character_representation :=
etisserant@0:
etisserant@0: |'$$'
etisserant@0: |'$L'|'$N'|'$P'|'$R'|'$T'
etisserant@0: |'$l'|'$n'|'$p'|'$r'|'$t'
etisserant@0:
etisserant@0: NOTE: $ = 0x24
etisserant@0: " = 0x22
etisserant@0: ' = 0x27
etisserant@0:
etisserant@0: printable chars in ASCII: 0x20-0x7E
etisserant@0: */
etisserant@0:
etisserant@0: esc_char_u $L|$N|$P|$R|$T
etisserant@0: esc_char_l $l|$n|$p|$r|$t
etisserant@0: esc_char $$|{esc_char_u}|{esc_char_l}
etisserant@0: double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit})
etisserant@0: single_byte_char (${hex_digit}{hex_digit})
etisserant@0:
etisserant@0: /* WARNING:
etisserant@0: * This definition is only valid in ASCII...
etisserant@0: *
etisserant@0: * Flex includes the function print_char() that defines
etisserant@0: * all printable characters portably (i.e. whatever character
etisserant@0: * encoding is currently being used , ASCII, EBCDIC, etc...)
etisserant@0: * Unfortunately, we cannot generate the definition of
etisserant@0: * common_character_representation portably, since flex
etisserant@0: * does not allow definition of sets by subtracting
etisserant@0: * elements in one set from another set.
etisserant@0: * This means we must build up the defintion of
etisserant@0: * common_character_representation using only set addition,
etisserant@0: * which leaves us with the only choice of defining the
etisserant@0: * characters non-portably...
etisserant@0: */
etisserant@0: common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
etisserant@0: double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation}
etisserant@0: single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation}
etisserant@0:
etisserant@0:
etisserant@0: double_byte_character_string \"({double_byte_character_representation}*)\"
etisserant@0: single_byte_character_string '({single_byte_character_representation}*)'
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: fixed_point {integer}\.{integer}
etisserant@0:
msousa@547:
msousa@547: /* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
msousa@547: * as its definition does not match the standard's text.
msousa@547: * IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
msousa@547: * definition instead!
msousa@547: *
msousa@547: * duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
msousa@547: * interval::= days | hours | minutes | seconds | milliseconds
msousa@547: * fixed_point ::= integer [ '.' integer]
msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
msousa@547: * milliseconds ::= fixed_point 'ms'
msousa@547: *
msousa@547: *
msousa@547: * The original IEC 61131-3 v2 definition is:
msousa@547: * duration ::= ('T' | 'TIME') '#' ['-'] interval
msousa@547: * interval ::= days | hours | minutes | seconds | milliseconds
msousa@547: * fixed_point ::= integer [ '.' integer]
msousa@547: * days ::= fixed_point 'd' | integer 'd' ['_'] hours
msousa@547: * hours ::= fixed_point 'h' | integer 'h' ['_'] minutes
msousa@547: * minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds
msousa@547: * seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds
msousa@547: * milliseconds ::= fixed_point 'ms'
msousa@547:
msousa@547: */
msousa@547:
msousa@547: interval_ms_X ({integer_0_999}(\.{integer})?)ms
msousa@686: interval_s_X {integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
msousa@686: interval_m_X {integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
msousa@686: interval_h_X {integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)
msousa@547:
msousa@547: interval_ms {integer}ms|({fixed_point}ms)
msousa@547: interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s)
msousa@547: interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m)
msousa@547: interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h)
msousa@547: interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d)
msousa@547:
msousa@547: interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}
msousa@547:
msousa@686:
msousa@547: /* to help provide nice error messages, we also parse an incorrect but plausible interval... */
msousa@547: /* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not
msousa@547: * be able to parse any other legal lexcial construct (besides a legal interval, but that
msousa@547: * is OK as this rule will appear _after_ the rule to parse legal intervals!).
msousa@547: */
msousa@547: fixed_point_or_integer {fixed_point}|{integer}
msousa@547: erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1 Directly Represented Variables */
etisserant@0: /********************************************/
etisserant@0: /* The correct definition, if the standard were to be followed... */
mario@11:
mario@11: location_prefix [IQM]
mario@11: size_prefix [XBWDL]
mario@11: direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*)
mario@11:
etisserant@0:
etisserant@0: /* For the MatPLC, we will accept %
etisserant@0: * as a direct variable, this being mapped onto the MatPLC point
etisserant@0: * named
etisserant@0: */
etisserant@0: /* TODO: we should not restrict it to only the accepted syntax
etisserant@0: * of as specified by the standard. MatPLC point names
etisserant@0: * have a more permissive syntax.
etisserant@0: *
etisserant@0: * e.g. "P__234"
etisserant@0: * Is a valid MatPLC point name, but not a valid !!
etisserant@0: * The same happens with names such as "333", "349+23", etc...
etisserant@0: * How can we handle these more expressive names in our case?
etisserant@0: * Remember that some direct variable may remain anonymous, with
etisserant@0: * declarations such as:
etisserant@0: * VAR
etisserant@0: * AT %I3 : BYTE := 255;
etisserant@0: * END_VAR
mario@11: * in which case we are currently using "%I3" as the variable
mario@11: * name.
mario@11: */
msousa@547: /* direct_variable_matplc %{identifier} */
msousa@547: /* direct_variable {direct_variable_standard}|{direct_variable_matplc} */
msousa@547: direct_variable {direct_variable_standard}
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: incompl_location %[IQM]\*
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: %%
etisserant@0: /* fprintf(stderr, "flex: state %d\n", YY_START); */
etisserant@0:
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** F I R S T T H I N G S F I R S T *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0: /*****************************************************/
etisserant@0:
mario@68: /***********************************************************/
mario@68: /* Handle requests sent by bison for flex to change state. */
mario@68: /***********************************************************/
mario@13: if (get_goto_body_state()) {
mario@68: yy_push_state(body_state);
mario@13: rst_goto_body_state();
mario@6: }
lbessard@3:
mario@68: if (get_goto_sfc_qualifier_state()) {
mario@68: yy_push_state(sfc_qualifier_state);
mario@68: rst_goto_sfc_qualifier_state();
mario@68: }
mario@68:
mario@86: if (get_goto_sfc_priority_state()) {
mario@86: yy_push_state(sfc_priority_state);
mario@86: rst_goto_sfc_priority_state();
mario@86: }
mario@86:
mario@74: if (get_goto_task_init_state()) {
mario@74: yy_push_state(task_init_state);
mario@74: rst_goto_task_init_state();
mario@74: }
mario@74:
mario@68: if (get_pop_state()) {
mario@68: yy_pop_state();
mario@68: rst_pop_state();
mario@68: }
mario@68:
mario@68: /***************************/
etisserant@0: /* Handle the pragmas! */
mario@68: /***************************/
etisserant@0:
etisserant@0: /* We start off by searching for the pragmas we handle in the lexical parser. */
etisserant@0: {file_include_pragma} unput_text(0); yy_push_state(include_beg);
etisserant@0:
msousa@267: /* Pragmas sent to syntax analyser (bison) */
msousa@267: {disable_code_generation_pragma} return disable_code_generation_pragma_token;
msousa@267: {enable_code_generation_pragma} return enable_code_generation_pragma_token;
msousa@267: {disable_code_generation_pragma} return disable_code_generation_pragma_token;
msousa@267: {enable_code_generation_pragma} return enable_code_generation_pragma_token;
msousa@267:
etisserant@0: /* Any other pragma we find, we just pass it up to the syntax parser... */
mario@68: /* Note that the state is exclusive, so we have to include it here too. */
etisserant@0: {pragma} {/* return the pragmma without the enclosing '{' and '}' */
mario@13: yytext[strlen(yytext)-1] = '\0';
etisserant@0: yylval.ID=strdup(yytext+1);
etisserant@0: return pragma_token;
etisserant@0: }
mario@68: {pragma} {/* return the pragmma without the enclosing '{' and '}' */
mario@13: yytext[strlen(yytext)-1] = '\0';
etisserant@0: yylval.ID=strdup(yytext+1);
etisserant@0: return pragma_token;
etisserant@0: }
etisserant@0:
etisserant@0:
etisserant@0: /*********************************/
etisserant@0: /* Handle the file includes! */
etisserant@0: /*********************************/
etisserant@0: {file_include_pragma_beg} BEGIN(include_filename);
etisserant@0:
etisserant@0: {file_include_pragma_filename} {
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: include_file(yytext);
etisserant@0: /* switch to whatever state was active before the include file */
etisserant@0: yy_pop_state();
etisserant@0: /* now process the new file... */
etisserant@0: }
etisserant@0:
etisserant@0:
mario@76: <> { /* NOTE: We must not change the value of include_stack_ptr
mario@76: * just yet. We must only decrement it if we are NOT
mario@76: * at the end of the main file.
mario@76: * If we have finished parsing the main file, then we
mario@76: * must leave include_stack_ptr at 0, in case the
mario@76: * parser is called once again with a new file.
mario@76: * (In fact, we currently do just that!)
mario@76: */
msousa@757: fclose(yyin);
lbessard@136: free(current_tracking);
mario@76: if (include_stack_ptr == 0) {
mario@73: /* yyterminate() terminates the scanner and returns a 0 to the
mario@73: * scanner's caller, indicating "all done".
mario@73: *
mario@73: * Our syntax parser (written with bison) has the token
mario@73: * END_OF_INPUT associated to the value 0, so even though
mario@73: * we don't explicitly return the token END_OF_INPUT
mario@73: * calling yyterminate() is equivalent to doing that.
mario@73: */
etisserant@0: yyterminate();
msousa@737: } else {
lbessard@136: --include_stack_ptr;
etisserant@0: yy_delete_buffer(YY_CURRENT_BUFFER);
etisserant@0: yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
lbessard@136: current_tracking = include_stack[include_stack_ptr].env;
etisserant@0: /* removing constness of char *. This is safe actually,
etisserant@0: * since the only real const char * that is stored on the stack is
etisserant@1: * the first one (i.e. the one that gets stored in include_stack[0],
etisserant@0: * which is never free'd!
etisserant@0: */
msousa@286: /* NOTE: We do __NOT__ free the malloc()'d memory since
msousa@286: * pointers to this filename will be kept by many objects
msousa@286: * in the abstract syntax tree.
msousa@286: * This will later be used to provide correct error
msousa@286: * messages during semantic analysis (stage 3)
msousa@286: */
msousa@286: /* free((char *)current_filename); */
etisserant@0: current_filename = include_stack[include_stack_ptr].filename;
etisserant@0: yy_push_state(include_end);
etisserant@0: }
etisserant@0: }
etisserant@0:
etisserant@0: {file_include_pragma_end} yy_pop_state();
msousa@756: /* handle the artificial file includes created by include_string(), which do not end with a '}' */
msousa@756: . unput_text(0); yy_pop_state();
etisserant@0:
etisserant@0:
etisserant@0: /*********************************/
etisserant@0: /* Handle all the state changes! */
etisserant@0: /*********************************/
etisserant@0:
lbessard@3: /* INITIAL -> decl_state */
etisserant@0: {
mario@68: /* NOTE: how about functions that do not declare variables, and go directly to the body_state???
etisserant@0: * - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
etisserant@0: * must have at least one input argument, so a correct declaration will have at least
etisserant@0: * one VAR_INPUT ... VAR_END construct!
etisserant@0: * - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
etisserant@0: * must have at least one input argument, so a correct declaration will have at least
etisserant@0: * one VAR_INPUT ... VAR_END construct!
etisserant@0: * - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
etisserant@0: * argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
etisserant@0: * construct!
etisserant@0: *
etisserant@0: * All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
mario@68: * FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
etisserant@0: * If the code has an error, and no VAR_END before the body, we will simply
lbessard@3: * continue in the state, untill the end of the FUNCTION, FUNCTION_BLOCK
etisserant@0: * or PROGAM.
etisserant@0: */
lbessard@3: FUNCTION BEGIN(decl_state); return FUNCTION;
lbessard@3: FUNCTION_BLOCK BEGIN(decl_state); return FUNCTION_BLOCK;
lbessard@3: PROGRAM BEGIN(decl_state); return PROGRAM;
lbessard@3: CONFIGURATION BEGIN(config_state); return CONFIGURATION;
etisserant@0: }
etisserant@0:
mario@68: /* INITIAL -> body_state */
etisserant@0: /* required if the function, program, etc.. has no VAR block! */
mario@6: /* We comment it out since the standard does not allow this. */
mario@6: /* NOTE: Even if we were to include the following code, it */
mario@6: /* would have no effect whatsoever since the above */
mario@6: /* rules will take precendence! */
mario@6: /*
etisserant@0: {
mario@68: FUNCTION BEGIN(body_state); return FUNCTION;
mario@68: FUNCTION_BLOCK BEGIN(body_state); return FUNCTION_BLOCK;
mario@68: PROGRAM BEGIN(body_state); return PROGRAM;
etisserant@0: }
mario@6: */
mario@6:
mario@68: /* decl_state -> (body_state | sfc_state) */
lbessard@3: {
mario@6: END_VAR{st_whitespace}VAR {unput_text(strlen("END_VAR"));
mario@6: return END_VAR;
mario@6: }
mario@6: END_VAR{st_whitespace}INITIAL_STEP {unput_text(strlen("END_VAR"));
mario@9: yy_push_state(sfc_state);
mario@6: return END_VAR;
mario@6: }
mario@6: END_VAR{st_whitespace} {unput_text(strlen("END_VAR"));
mario@6: cmd_goto_body_state();
mario@6: return END_VAR;
mario@6: }
etisserant@0: }
etisserant@0:
mario@68: /* body_state -> (il_state | st_state) */
mario@68: {
mario@13: {st_whitespace_no_pragma} /* Eat any whitespace */
msousa@757: {qualified_identifier}{st_whitespace}":=" unput_text(0); BEGIN(st_state);
msousa@757: {direct_variable_standard}{st_whitespace}":=" unput_text(0); BEGIN(st_state);
lbessard@3: {qualified_identifier}"[" unput_text(0); BEGIN(st_state);
lbessard@3:
msousa@757: RETURN unput_text(0); BEGIN(st_state);
msousa@757: IF unput_text(0); BEGIN(st_state);
lbessard@3: CASE unput_text(0); BEGIN(st_state);
msousa@757: FOR unput_text(0); BEGIN(st_state);
lbessard@3: WHILE unput_text(0); BEGIN(st_state);
msousa@757: REPEAT unput_text(0); BEGIN(st_state);
lbessard@3: EXIT unput_text(0); BEGIN(st_state);
lbessard@151:
mario@6: /* ':=' occurs only in transitions, and not Function or FB bodies! */
msousa@757: := unput_text(0); BEGIN(st_state);
etisserant@0:
mario@68: /* Hopefully, the above rules (along with the last one),
mario@68: * used to distinguish ST from IL, are
mario@68: * enough to handle all ocurrences. However, if
mario@68: * there is some situation where the compiler is getting confused,
mario@68: * we add the following rule to detect 'label:' in IL code. This will
mario@68: * allow the user to insert a label right at the beginning (which
mario@68: * will probably not be used further by his code) simply as a way
mario@68: * to force the compiler to interpret his code as IL code.
mario@68: */
mario@68: {identifier}{st_whitespace}":"{st_whitespace} unput_text(0); BEGIN(il_state);
etisserant@0:
etisserant@0: {identifier} {int token = get_identifier_token(yytext);
etisserant@0: if (token == prev_declared_fb_name_token) {
etisserant@0: /* the code has a call to a function block */
mario@68: /* NOTE: if we ever decide to allow the user to use IL operator tokens
mario@68: * (LD, ST, ...) as identifiers for variable names (including
mario@68: * function block instances), then the above inference/conclusion
mario@68: * may be incorrect, and this condition may have to be changed!
mario@68: */
lbessard@3: BEGIN(st_state);
etisserant@0: } else {
lbessard@3: BEGIN(il_state);
etisserant@0: }
etisserant@0: unput_text(0);
etisserant@0: }
mario@68:
lbessard@3: . unput_text(0); BEGIN(il_state);
mario@68: } /* end of body_state lexical parser */
lbessard@3:
lbessard@3: /* (il_state | st_state) -> $previous_state (decl_state or sfc_state) */
lbessard@3: {
lbessard@3: END_FUNCTION yy_pop_state(); unput_text(0);
lbessard@3: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
lbessard@3: END_PROGRAM yy_pop_state(); unput_text(0);
lbessard@3: END_TRANSITION yy_pop_state(); unput_text(0);
mario@6: END_ACTION yy_pop_state(); unput_text(0);
lbessard@3: }
lbessard@3:
lbessard@4: /* sfc_state -> INITIAL */
lbessard@4: {
lbessard@4: END_FUNCTION yy_pop_state(); unput_text(0);
lbessard@4: END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
lbessard@4: END_PROGRAM yy_pop_state(); unput_text(0);
lbessard@4: }
lbessard@4:
lbessard@4: /* decl_state -> INITIAL */
lbessard@4: {
etisserant@0: END_FUNCTION BEGIN(INITIAL); return END_FUNCTION;
etisserant@0: END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK;
etisserant@0: END_PROGRAM BEGIN(INITIAL); return END_PROGRAM;
lbessard@3: }
etisserant@0: /* config -> INITIAL */
etisserant@0: END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION;
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /***************************************/
etisserant@0: /* Next is to to remove all whitespace */
etisserant@0: /***************************************/
etisserant@0: /* NOTE: pragmas are handled right at the beginning... */
etisserant@0:
mario@75: {st_whitespace_no_pragma} /* Eat any whitespace */
mario@13: {il_whitespace_no_pragma} /* Eat any whitespace */
etisserant@0:
etisserant@0:
msousa@267:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
etisserant@0: /* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
etisserant@0: * On the other hand, the spec does not define them as keywords,
etisserant@0: * which means they may be re-used for variable names, etc...!
etisserant@0: * The syntax parser already caters for the possibility of these
etisserant@0: * tokens being used for variable names in their declarations.
etisserant@0: * When they are declared, they will be added to the variable symbol table!
etisserant@0: * Further appearances of these tokens must no longer be parsed
etisserant@0: * as R1_tokens etc..., but rather as variable_name_tokens!
etisserant@0: *
etisserant@0: * That is why the first thing we do with identifiers, even before
etisserant@0: * checking whether they may be a 'keyword', is to check whether
etisserant@0: * they have been previously declared as a variable name,
etisserant@0: *
mario@13: * However, we have a dilema! Should we here also check for
mario@13: * prev_declared_derived_function_name_token?
mario@13: * If we do, then the 'MOD' default library function (defined in
mario@13: * the standard) will always be returned as a function name, and
mario@13: * it will therefore not be possible to use it as an operator as
mario@13: * in the following ST expression 'X := Y MOD Z;' !
mario@13: * If we don't, then even it will not be possible to use 'MOD'
mario@13: * as a funtion as in 'X := MOD(Y, Z);'
mario@13: * We solve this by NOT testing for function names here, and
mario@13: * handling this function and keyword clash in bison!
etisserant@0: */
mario@83: /*
etisserant@0: {identifier} {int token = get_identifier_token(yytext);
mario@81: // fprintf(stderr, "flex: analysing identifier '%s'...", yytext);
etisserant@0: if ((token == prev_declared_variable_name_token) ||
mario@13: // (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
etisserant@0: (token == prev_declared_fb_name_token)) {
mario@83: // if (token != identifier_token)
mario@83: // * NOTE: if we replace the above uncommented conditions with
mario@13: * the simple test of (token != identifier_token), then
mario@13: * 'MOD' et al must be removed from the
mario@13: * library_symbol_table as a default function name!
mario@83: * //
etisserant@0: yylval.ID=strdup(yytext);
mario@81: // fprintf(stderr, "returning token %d\n", token);
etisserant@0: return token;
etisserant@0: }
mario@83: // otherwise, leave it for the other lexical parser rules...
mario@81: // fprintf(stderr, "rejecting\n");
etisserant@0: REJECT;
etisserant@0: }
mario@83: */
etisserant@0:
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** N O W D O T H E K E Y W O R D S *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0: /******************************************************/
etisserant@0:
etisserant@0:
mario@82: EN return EN; /* Keyword */
mario@82: ENO return ENO; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B 1.2.1 - Numeric Literals */
etisserant@0: /******************************/
mario@82: TRUE return TRUE; /* Keyword */
msousa@257: BOOL#1 return boolean_true_literal_token;
msousa@257: BOOL#TRUE return boolean_true_literal_token;
msousa@257: SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257:
mario@82: FALSE return FALSE; /* Keyword */
msousa@257: BOOL#0 return boolean_false_literal_token;
msousa@257: BOOL#FALSE return boolean_false_literal_token;
msousa@257: SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
msousa@257: SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
mario@82: t# return T_SHARP; /* Delimiter */
mario@82: T# return T_SHARP; /* Delimiter */
mario@82: TIME return TIME; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /************************************/
etisserant@0: /* B 1.2.3.2 - Time of day and Date */
etisserant@0: /************************************/
mario@82: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
mario@82: TOD return TIME_OF_DAY; /* Keyword (Data Type) */
mario@82: DATE return DATE; /* Keyword (Data Type) */
mario@82: d# return D_SHARP; /* Delimiter */
mario@82: D# return D_SHARP; /* Delimiter */
mario@82: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
mario@82: DT return DATE_AND_TIME; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /***********************************/
etisserant@0: /* B 1.3.1 - Elementary Data Types */
etisserant@0: /***********************************/
msousa@257: BOOL return BOOL; /* Keyword (Data Type) */
msousa@257:
mario@82: BYTE return BYTE; /* Keyword (Data Type) */
mario@82: WORD return WORD; /* Keyword (Data Type) */
mario@82: DWORD return DWORD; /* Keyword (Data Type) */
mario@82: LWORD return LWORD; /* Keyword (Data Type) */
etisserant@0:
msousa@257: SINT return SINT; /* Keyword (Data Type) */
msousa@257: INT return INT; /* Keyword (Data Type) */
msousa@257: DINT return DINT; /* Keyword (Data Type) */
msousa@257: LINT return LINT; /* Keyword (Data Type) */
msousa@257:
msousa@257: USINT return USINT; /* Keyword (Data Type) */
msousa@257: UINT return UINT; /* Keyword (Data Type) */
msousa@257: UDINT return UDINT; /* Keyword (Data Type) */
msousa@257: ULINT return ULINT; /* Keyword (Data Type) */
msousa@257:
msousa@257: REAL return REAL; /* Keyword (Data Type) */
msousa@257: LREAL return LREAL; /* Keyword (Data Type) */
msousa@257:
msousa@257: WSTRING return WSTRING; /* Keyword (Data Type) */
msousa@257: STRING return STRING; /* Keyword (Data Type) */
msousa@257:
msousa@257: TIME return TIME; /* Keyword (Data Type) */
msousa@257: DATE return DATE; /* Keyword (Data Type) */
msousa@257: DT return DT; /* Keyword (Data Type) */
msousa@257: TOD return TOD; /* Keyword (Data Type) */
msousa@257: DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
msousa@257: TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
msousa@257:
msousa@257: /*****************************************************************/
msousa@257: /* Keywords defined in "Safety Software Technical Specification" */
msousa@257: /*****************************************************************/
msousa@257: /*
msousa@257: * NOTE: The following keywords are define in
msousa@257: * "Safety Software Technical Specification,
msousa@257: * Part 1: Concepts and Function Blocks,
msousa@257: * Version 1.0 – Official Release"
msousa@257: * written by PLCopen - Technical Committee 5
msousa@257: *
msousa@257: * We only support these extensions and keywords
msousa@257: * if the apropriate command line option is given.
msousa@257: */
msousa@257: SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}}
msousa@257:
msousa@257: SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}}
msousa@257: SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}}
msousa@257: SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}}
msousa@257: SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}}
msousa@257:
msousa@257: SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
msousa@257: SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}}
msousa@257:
msousa@257: SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
msousa@257: SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}}
msousa@257: SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}}
msousa@257: SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}}
msousa@257:
msousa@257: SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}}
msousa@257: SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}}
msousa@257: SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}}
msousa@257: SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}}
msousa@257:
msousa@257: /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
msousa@257: /* so it is best not to support them at all... */
msousa@257: /*
msousa@257: SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}}
msousa@257: SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}}
msousa@257: */
msousa@257:
msousa@257: SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}}
msousa@257: SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}}
msousa@257: SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}}
msousa@257: SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}}
msousa@257: SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
msousa@257: SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}}
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.3.2 - Generic data types */
etisserant@0: /********************************/
etisserant@0: /* Strangely, the following symbols do not seem to be required! */
etisserant@0: /* But we include them so they become reserved words, and do not
etisserant@0: * get passed up to bison as an identifier...
etisserant@0: */
mario@82: ANY return ANY; /* Keyword (Data Type) */
mario@82: ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */
mario@82: ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */
mario@82: ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */
mario@82: ANY_NUM return ANY_NUM; /* Keyword (Data Type) */
mario@82: ANY_REAL return ANY_REAL; /* Keyword (Data Type) */
mario@82: ANY_INT return ANY_INT; /* Keyword (Data Type) */
mario@82: ANY_BIT return ANY_BIT; /* Keyword (Data Type) */
mario@82: ANY_STRING return ANY_STRING; /* Keyword (Data Type) */
mario@82: ANY_DATE return ANY_DATE; /* Keyword (Data Type) */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.3.3 - Derived data types */
etisserant@0: /********************************/
mario@82: ":=" return ASSIGN; /* Delimiter */
mario@82: ".." return DOTDOT; /* Delimiter */
mario@82: TYPE return TYPE; /* Keyword */
mario@82: END_TYPE return END_TYPE; /* Keyword */
mario@82: ARRAY return ARRAY; /* Keyword */
mario@82: OF return OF; /* Keyword */
mario@82: STRUCT return STRUCT; /* Keyword */
mario@82: END_STRUCT return END_STRUCT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*********************/
etisserant@0: /* B 1.4 - Variables */
etisserant@0: /*********************/
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
mario@82: VAR_INPUT return VAR_INPUT; /* Keyword */
mario@82: VAR_OUTPUT return VAR_OUTPUT; /* Keyword */
mario@82: VAR_IN_OUT return VAR_IN_OUT; /* Keyword */
mario@82: VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */
mario@82: VAR_GLOBAL return VAR_GLOBAL; /* Keyword */
mario@82: END_VAR return END_VAR; /* Keyword */
mario@82: RETAIN return RETAIN; /* Keyword */
mario@82: NON_RETAIN return NON_RETAIN; /* Keyword */
mario@82: R_EDGE return R_EDGE; /* Keyword */
mario@82: F_EDGE return F_EDGE; /* Keyword */
mario@82: AT return AT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /***********************/
etisserant@0: /* B 1.5.1 - Functions */
etisserant@0: /***********************/
mario@82: FUNCTION return FUNCTION; /* Keyword */
mario@82: END_FUNCTION return END_FUNCTION; /* Keyword */
mario@82: VAR return VAR; /* Keyword */
mario@82: CONSTANT return CONSTANT; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*****************************/
etisserant@0: /* B 1.5.2 - Function Blocks */
etisserant@0: /*****************************/
mario@82: FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */
mario@82: END_FUNCTION_BLOCK return END_FUNCTION_BLOCK; /* Keyword */
mario@82: VAR_TEMP return VAR_TEMP; /* Keyword */
mario@82: VAR return VAR; /* Keyword */
mario@82: NON_RETAIN return NON_RETAIN; /* Keyword */
mario@82: END_VAR return END_VAR; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /**********************/
etisserant@0: /* B 1.5.3 - Programs */
etisserant@0: /**********************/
mario@82: PROGRAM return PROGRAM; /* Keyword */
mario@82: END_PROGRAM return END_PROGRAM; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B 1.6 Sequential Function Chart elements */
etisserant@0: /********************************************/
etisserant@0: /* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
etisserant@0: .* as other identifiers that may be used as variable names inside IL and ST programs.
etisserant@0: * They will have to be handled when we include parsing of SFC... For now, simply
etisserant@0: * ignore them!
etisserant@0: */
etisserant@1:
mario@82: ACTION return ACTION; /* Keyword */
mario@82: END_ACTION return END_ACTION; /* Keyword */
mario@82:
mario@82: TRANSITION return TRANSITION; /* Keyword */
mario@82: END_TRANSITION return END_TRANSITION; /* Keyword */
mario@82: FROM return FROM; /* Keyword */
mario@82: TO return TO; /* Keyword */
mario@82:
mario@82: INITIAL_STEP return INITIAL_STEP; /* Keyword */
mario@82: STEP return STEP; /* Keyword */
mario@82: END_STEP return END_STEP; /* Keyword */
etisserant@0:
mario@74: /* PRIORITY is not a keyword, so we only return it when
mario@74: * it is explicitly required and we are not expecting any identifiers
mario@74: * that could also use the same letter sequence (i.e. an identifier: piority)
mario@74: */
mario@86: PRIORITY return PRIORITY;
mario@74:
mario@68: {
etisserant@0: L return L;
etisserant@0: D return D;
etisserant@0: SD return SD;
etisserant@0: DS return DS;
etisserant@0: SL return SL;
etisserant@0: N return N;
etisserant@0: P return P;
Laurent@627: P0 return P0;
Laurent@627: P1 return P1;
etisserant@0: R return R;
etisserant@0: S return S;
etisserant@1: }
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 1.7 Configuration elements */
etisserant@0: /********************************/
mario@82: CONFIGURATION return CONFIGURATION; /* Keyword */
mario@82: END_CONFIGURATION return END_CONFIGURATION; /* Keyword */
mario@82: TASK return TASK; /* Keyword */
mario@82: RESOURCE return RESOURCE; /* Keyword */
mario@82: ON return ON; /* Keyword */
mario@82: END_RESOURCE return END_RESOURCE; /* Keyword */
mario@82: VAR_CONFIG return VAR_CONFIG; /* Keyword */
mario@82: VAR_ACCESS return VAR_ACCESS; /* Keyword */
mario@82: END_VAR return END_VAR; /* Keyword */
mario@82: WITH return WITH; /* Keyword */
mario@82: PROGRAM return PROGRAM; /* Keyword */
mario@82: RETAIN return RETAIN; /* Keyword */
mario@82: NON_RETAIN return NON_RETAIN; /* Keyword */
mario@82: READ_WRITE return READ_WRITE; /* Keyword */
mario@82: READ_ONLY return READ_ONLY; /* Keyword */
mario@74:
mario@74: /* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when
mario@74: * it is explicitly required and we are not expecting any identifiers
mario@74: * that could also use the same letter sequence (i.e. an identifier: piority, ...)
mario@74: */
mario@74: {
etisserant@0: PRIORITY return PRIORITY;
etisserant@0: SINGLE return SINGLE;
etisserant@0: INTERVAL return INTERVAL;
mario@74: }
etisserant@0:
etisserant@0: /***********************************/
etisserant@0: /* B 2.1 Instructions and Operands */
etisserant@0: /***********************************/
lbessard@3: \n return EOL;
etisserant@0:
etisserant@0:
etisserant@0: /*******************/
etisserant@0: /* B 2.2 Operators */
etisserant@0: /*******************/
etisserant@0: /* NOTE: we can't have flex return the same token for
etisserant@0: * ANDN and &N, neither for AND and &, since
etisserant@0: * AND and ANDN are considered valid variable
etisserant@0: * function or functionblock type names!
etisserant@0: * This means that the parser may decide that the
etisserant@0: * AND or ANDN strings found in the source code
etisserant@0: * are being used as variable names
etisserant@0: * and not as operators, and will therefore transform
etisserant@0: * these tokens into indentifier tokens!
etisserant@0: * We can't have the parser thinking that the source
etisserant@0: * code contained the string AND (which may be interpreted
etisserant@0: * as a vairable name) when in reality the source code
etisserant@0: * merely contained the character &, so we use two
etisserant@0: * different tokens for & and AND (and similarly
etisserant@0: * ANDN and &N)!
etisserant@0: */
mario@68: /* The following tokens clash with ST expression operators and Standard Functions */
mario@73: /* They are also keywords! */
mario@82: AND return AND; /* Keyword */
mario@82: MOD return MOD; /* Keyword */
mario@82: OR return OR; /* Keyword */
mario@82: XOR return XOR; /* Keyword */
mario@82: NOT return NOT; /* Keyword */
mario@68:
mario@68: /* The following tokens clash with Standard Functions */
mario@82: /* They are keywords because they are a function name */
mario@73: {
mario@82: ADD return ADD; /* Keyword (Standard Function) */
mario@82: DIV return DIV; /* Keyword (Standard Function) */
mario@82: EQ return EQ; /* Keyword (Standard Function) */
mario@82: GE return GE; /* Keyword (Standard Function) */
mario@82: GT return GT; /* Keyword (Standard Function) */
mario@82: LE return LE; /* Keyword (Standard Function) */
mario@82: LT return LT; /* Keyword (Standard Function) */
mario@82: MUL return MUL; /* Keyword (Standard Function) */
mario@82: NE return NE; /* Keyword (Standard Function) */
mario@82: SUB return SUB; /* Keyword (Standard Function) */
mario@73: }
mario@68:
mario@68: /* The following tokens clash with SFC action qualifiers */
mario@82: /* They are not keywords! */
mario@73: {
mario@68: S return S;
mario@68: R return R;
mario@73: }
mario@68:
mario@68: /* The following tokens clash with ST expression operators */
mario@82: & return AND2; /* NOT a Delimiter! */
mario@68:
mario@68: /* The following tokens have no clashes */
mario@82: /* They are not keywords! */
mario@73: {
etisserant@0: LD return LD;
etisserant@0: LDN return LDN;
etisserant@0: ST return ST;
etisserant@0: STN return STN;
etisserant@0: S1 return S1;
etisserant@0: R1 return R1;
etisserant@0: CLK return CLK;
etisserant@0: CU return CU;
etisserant@0: CD return CD;
etisserant@0: PV return PV;
etisserant@0: IN return IN;
etisserant@0: PT return PT;
etisserant@0: ANDN return ANDN;
etisserant@0: &N return ANDN2;
etisserant@0: ORN return ORN;
etisserant@0: XORN return XORN;
etisserant@0: CAL return CAL;
etisserant@0: CALC return CALC;
etisserant@0: CALCN return CALCN;
etisserant@0: RET return RET;
etisserant@0: RETC return RETC;
etisserant@0: RETCN return RETCN;
etisserant@0: JMP return JMP;
etisserant@0: JMPC return JMPC;
etisserant@0: JMPCN return JMPCN;
mario@73: }
etisserant@0:
etisserant@0: /***********************/
etisserant@0: /* B 3.1 - Expressions */
etisserant@0: /***********************/
mario@82: "**" return OPER_EXP; /* NOT a Delimiter! */
mario@82: "<>" return OPER_NE; /* NOT a Delimiter! */
mario@82: ">=" return OPER_GE; /* NOT a Delimiter! */
mario@82: "<=" return OPER_LE; /* NOT a Delimiter! */
mario@82: & return AND2; /* NOT a Delimiter! */
mario@82: AND return AND; /* Keyword */
mario@82: XOR return XOR; /* Keyword */
mario@82: OR return OR; /* Keyword */
mario@82: NOT return NOT; /* Keyword */
mario@82: MOD return MOD; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B 3.2.2 Subprogram Control Statements */
etisserant@0: /*****************************************/
mario@82: := return ASSIGN; /* Delimiter */
mario@82: => return SENDTO; /* Delimiter */
mario@82: RETURN return RETURN; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 3.2.3 Selection Statements */
etisserant@0: /********************************/
mario@82: IF return IF; /* Keyword */
mario@82: THEN return THEN; /* Keyword */
mario@82: ELSIF return ELSIF; /* Keyword */
mario@82: ELSE return ELSE; /* Keyword */
mario@82: END_IF return END_IF; /* Keyword */
mario@82:
mario@82: CASE return CASE; /* Keyword */
mario@82: OF return OF; /* Keyword */
mario@82: ELSE return ELSE; /* Keyword */
mario@82: END_CASE return END_CASE; /* Keyword */
etisserant@0:
etisserant@0:
etisserant@0: /********************************/
etisserant@0: /* B 3.2.4 Iteration Statements */
etisserant@0: /********************************/
mario@82: FOR return FOR; /* Keyword */
mario@82: TO return TO; /* Keyword */
mario@82: BY return BY; /* Keyword */
mario@82: DO return DO; /* Keyword */
mario@82: END_FOR return END_FOR; /* Keyword */
mario@82:
mario@82: WHILE return WHILE; /* Keyword */
mario@82: DO return DO; /* Keyword */
mario@82: END_WHILE return END_WHILE; /* Keyword */
mario@82:
mario@82: REPEAT return REPEAT; /* Keyword */
mario@82: UNTIL return UNTIL; /* Keyword */
mario@82: END_REPEAT return END_REPEAT; /* Keyword */
mario@82:
mario@82: EXIT return EXIT; /* Keyword */
etisserant@0:
etisserant@0:
msousa@257:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** N O W W O R K W I T H V A L U E S *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0: /********************************************************/
etisserant@0:
etisserant@0:
etisserant@0: /********************************************/
etisserant@0: /* B.1.4.1 Directly Represented Variables */
etisserant@0: /********************************************/
lbessard@175: {direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
etisserant@0:
etisserant@0:
etisserant@0: /******************************************/
etisserant@0: /* B 1.4.3 - Declaration & Initialisation */
etisserant@0: /******************************************/
etisserant@0: {incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;}
etisserant@0:
etisserant@0:
etisserant@0: /************************/
etisserant@0: /* B 1.2.3.1 - Duration */
etisserant@0: /************************/
etisserant@0: {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;}
msousa@547: {interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
msousa@547: {erroneous_interval} {return erroneous_interval_token;}
msousa@547:
msousa@547: {
msousa@547: {integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
msousa@547: {integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
msousa@547: {integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
msousa@547: {integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
msousa@547: {integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
msousa@547: {fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
msousa@547: {fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
msousa@547: {fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
msousa@547: {fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
msousa@547: {fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
msousa@547:
msousa@547: _ /* do nothing - eat it up!*/
msousa@616: \# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
msousa@616: . {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
msousa@547: \n {ERROR;}
msousa@547: }
etisserant@0: /*******************************/
etisserant@0: /* B.1.2.2 Character Strings */
etisserant@0: /*******************************/
etisserant@0: {double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
etisserant@0: {single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
etisserant@0:
etisserant@0:
etisserant@0: /******************************/
etisserant@0: /* B.1.2.1 Numeric literals */
etisserant@0: /******************************/
etisserant@0: {integer} {yylval.ID=strdup(yytext); return integer_token;}
etisserant@0: {real} {yylval.ID=strdup(yytext); return real_token;}
etisserant@0: {binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;}
etisserant@0: {octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;}
etisserant@0: {hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;}
etisserant@0:
etisserant@0:
etisserant@0: /*****************************************/
etisserant@0: /* B.1.1 Letters, digits and identifiers */
etisserant@0: /*****************************************/
lbessard@3: {identifier}/({st_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
lbessard@3: {identifier}/({il_whitespace})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
etisserant@0: {identifier} {yylval.ID=strdup(yytext);
mario@75: // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
etisserant@0: return get_identifier_token(yytext);}
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0:
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /***** T H E L E F T O V E R S . . . *****/
etisserant@0: /***** *****/
etisserant@0: /***** *****/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0: /************************************************/
etisserant@0:
etisserant@0: /* do the single character tokens...
etisserant@0: *
etisserant@0: * e.g.: ':' '(' ')' '+' '*' ...
etisserant@0: */
etisserant@0: . {return yytext[0];}
etisserant@0:
etisserant@0:
etisserant@0: %%
etisserant@0:
etisserant@0:
msousa@757: /*************************/
msousa@757: /* Tracking Functions... */
msousa@757: /*************************/
msousa@757:
msousa@757: #define MAX_BUFFER_LENGTH 1000
msousa@757:
msousa@757: tracking_t *GetNewTracking(FILE* in_file) {
msousa@757: tracking_t* new_env = new tracking_t;
msousa@757: new_env->eof = 0;
msousa@757: new_env->lineNumber = 0;
msousa@757: new_env->currentChar = 0;
msousa@757: new_env->lineLength = 0;
msousa@757: new_env->currentTokenStart = 0;
msousa@757: new_env->buffer = (char*)malloc(MAX_BUFFER_LENGTH);
msousa@757: new_env->in_file = in_file;
msousa@757: return new_env;
msousa@757: }
msousa@757:
msousa@757:
msousa@757: /* GetNextChar: reads a character from input */
msousa@757: int GetNextChar(char *b, int maxBuffer) {
msousa@757: char *p;
msousa@757:
msousa@757: if ( current_tracking->eof )
msousa@757: return 0;
msousa@757:
msousa@757: while ( current_tracking->currentChar >= current_tracking->lineLength ) {
msousa@757: current_tracking->currentChar = 0;
msousa@757: current_tracking->currentTokenStart = 1;
msousa@757: current_tracking->eof = false;
msousa@757:
msousa@757: p = fgets(current_tracking->buffer, MAX_BUFFER_LENGTH, current_tracking->in_file);
msousa@757: if ( p == NULL ) {
msousa@757: if ( ferror(current_tracking->in_file) )
msousa@757: return 0;
msousa@757: current_tracking->eof = true;
msousa@757: return 0;
msousa@757: }
msousa@757:
msousa@757: current_tracking->lineNumber++;
msousa@757: current_tracking->lineLength = strlen(current_tracking->buffer);
msousa@757: }
msousa@757:
msousa@757: b[0] = current_tracking->buffer[current_tracking->currentChar];
msousa@757: if (b[0] == ' ' || b[0] == '\t')
msousa@757: current_tracking->currentTokenStart++;
msousa@757: current_tracking->currentChar++;
msousa@757:
msousa@757: return b[0]==0?0:1;
msousa@757: }
msousa@757:
msousa@757:
msousa@757:
msousa@757:
etisserant@0: /***********************************/
etisserant@0: /* Utility function definitions... */
etisserant@0: /***********************************/
etisserant@0:
etisserant@0: /* print the include file stack to stderr... */
etisserant@0: void print_include_stack(void) {
etisserant@0: int i;
etisserant@0:
etisserant@0: if ((include_stack_ptr - 1) >= 0)
etisserant@0: fprintf (stderr, "in file ");
etisserant@0: for (i = include_stack_ptr - 1; i >= 0; i--)
lbessard@136: fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
etisserant@0: }
etisserant@0:
etisserant@0:
msousa@756:
msousa@756: /* set the internal state variables of lexical analyser to process a new include file */
msousa@756: void handle_include_file_(FILE *filehandle, const char *filename) {
msousa@756: if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
msousa@756: fprintf(stderr, "Includes nested too deeply\n");
msousa@756: exit( 1 );
msousa@756: }
msousa@756:
msousa@756: yyin = filehandle;
msousa@756:
msousa@756: include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
msousa@756: include_stack[include_stack_ptr].env = current_tracking;
msousa@756: include_stack[include_stack_ptr].filename = current_filename;
msousa@756:
msousa@756: current_filename = strdup(filename);
msousa@756: current_tracking = GetNewTracking(yyin);
msousa@756: include_stack_ptr++;
msousa@756:
msousa@756: /* switch input buffer to new file... */
msousa@756: yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756: /* insert the code (in ) into the source code we are parsing.
msousa@756: * This is done by creating an artificial file with that new source code, and then 'including' the file
msousa@756: */
msousa@757: void include_string_(const char *source_code) {
msousa@756: FILE *tmp_file = tmpfile();
msousa@756:
msousa@756: if(tmp_file == NULL) {
msousa@756: perror("Error creating temp file.");
msousa@756: exit(EXIT_FAILURE);
msousa@756: }
msousa@756:
msousa@756: fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
msousa@756: rewind(tmp_file);
msousa@756:
msousa@756: /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
msousa@756: handle_include_file_(tmp_file, "");
msousa@756: //fclose(tmp_file); /* do NOT close file. It must only be closed when we finish reading from it! */
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756: /* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
msousa@756: void include_file(const char *filename) {
msousa@756: FILE *filehandle = NULL;
msousa@756:
msousa@756: for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
msousa@756: char *full_name;
msousa@756: full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
msousa@756: if (full_name == NULL) {
msousa@756: fprintf(stderr, "Out of memory!\n");
msousa@756: exit( 1 );
msousa@756: }
msousa@756: filehandle = fopen(full_name, "r");
msousa@756: free(full_name);
msousa@756: }
msousa@756:
msousa@756: if (NULL == filehandle) {
msousa@756: fprintf(stderr, "Error opening included file %s\n", filename);
msousa@756: exit( 1 );
msousa@756: }
msousa@756:
msousa@756: /* now process the new file... */
msousa@756: handle_include_file_(filehandle, filename);
msousa@756: }
msousa@756:
msousa@756:
msousa@756:
msousa@756:
msousa@756:
etisserant@0: /* return all the text in the current token back to the input stream, except the first n chars. */
etisserant@0: void unput_text(unsigned int n) {
etisserant@0: /* it seems that flex has a bug in that it will not correctly count the line numbers
etisserant@0: * if we return newlines back to the input stream. These newlines will be re-counted
etisserant@0: * a second time when they are processed again by flex.
etisserant@0: * We therefore determine how many newlines are in the text we are returning,
etisserant@0: * and decrement the line counter acordingly...
etisserant@0: */
lbessard@136: /*unsigned int i;
lbessard@136:
etisserant@0: for (i = n; i < strlen(yytext); i++)
etisserant@0: if (yytext[i] == '\n')
lbessard@136: current_tracking->lineNumber--;*/
etisserant@0:
etisserant@0: /* now return all the text back to the input stream... */
etisserant@0: yyless(n);
etisserant@0: }
etisserant@0:
etisserant@0:
msousa@547: /* return all the text in the current token back to the input stream,
msousa@547: * but first return to the stream an additional character to mark the end of the token.
msousa@547: */
msousa@547: void unput_and_mark(const char c) {
msousa@547: char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */
msousa@547: unput(c);
msousa@547: for (int i = yyleng-1; i >= 0; i--)
msousa@547: unput(yycopy[i]);
msousa@547:
msousa@547: free(yycopy);
msousa@547: }
msousa@547:
msousa@547:
msousa@547:
etisserant@0: /* Called by flex when it reaches the end-of-file */
etisserant@0: int yywrap(void)
etisserant@0: {
etisserant@0: /* We reached the end of the input file... */
etisserant@0:
etisserant@0: /* Should we continue with another file? */
etisserant@0: /* If so:
etisserant@0: * open the new file...
etisserant@0: * return 0;
etisserant@0: */
etisserant@0:
msousa@737: /* to stop processing...
etisserant@0: * return 1;
etisserant@0: */
etisserant@0:
etisserant@0: return 1; /* Stop scanning at end of input file. */
etisserant@0: }
etisserant@0:
etisserant@0:
etisserant@0:
msousa@757: /*******************************/
msousa@757: /* Public Interface for Bison. */
msousa@757: /*******************************/
msousa@757:
msousa@757: /* The following functions will be called from inside bison code! */
msousa@757:
msousa@757: void include_string(const char *source_code) {include_string_(source_code);}
msousa@757:
msousa@757:
msousa@757: /* Tell flex which file to parse. This function will not imediately start parsing the file.
msousa@757: * To parse the file, you then need to call yyparse()
msousa@757: *
msousa@757: * Returns -1 on error opening the file (and a valid errno), or 0 on success.
msousa@757: */
msousa@757: int parse_file(const char *filename) {
msousa@757: FILE *filehandle = NULL;
msousa@757:
msousa@757: if((filehandle = fopen(filename, "r")) == NULL)
msousa@757: return -1;
msousa@757:
msousa@757: yyin = filehandle;
msousa@757: current_filename = strdup(filename);
msousa@757: current_tracking = GetNewTracking(yyin);
msousa@757: return 0;
msousa@757: }
msousa@757:
msousa@757:
msousa@757:
msousa@757:
msousa@757:
msousa@757:
etisserant@0: /*************************************/
etisserant@0: /* Include a main() function to test */
etisserant@0: /* the token parsing by flex.... */
etisserant@0: /*************************************/
etisserant@0: #ifdef TEST_MAIN
etisserant@0:
etisserant@0: #include "../util/symtable.hh"
etisserant@0:
etisserant@0: yystype yylval;
etisserant@0: YYLTYPE yylloc;
etisserant@0:
etisserant@0:
mario@15:
mario@15:
etisserant@0: int get_identifier_token(const char *identifier_str) {return 0;}
lbessard@175: int get_direct_variable_token(const char *direct_variable_str) {return 0;}
etisserant@0:
etisserant@0:
etisserant@0: int main(int argc, char **argv) {
etisserant@0:
etisserant@0: FILE *in_file;
etisserant@0: int res;
lbessard@136:
etisserant@0: if (argc == 1) {
etisserant@0: /* Work as an interactive (command line) parser... */
etisserant@0: while((res=yylex()))
etisserant@0: fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
etisserant@0: } else {
etisserant@0: /* Work as non-interactive (file) parser... */
etisserant@0: if((in_file = fopen(argv[1], "r")) == NULL) {
etisserant@0: char *errmsg = strdup2("Error opening main file ", argv[1]);
etisserant@0: perror(errmsg);
etisserant@0: free(errmsg);
etisserant@0: return -1;
etisserant@0: }
etisserant@0:
etisserant@0: /* parse the file... */
etisserant@0: yyin = in_file;
etisserant@0: current_filename = argv[1];
etisserant@0: while(1) {
etisserant@0: res=yylex();
etisserant@0: fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
etisserant@0: }
etisserant@0: }
lbessard@136:
lbessard@136: return 0;
etisserant@0:
etisserant@0: }
etisserant@0: #endif