Continue changing STRING length from int8_t to uint8_t.
This fix segmentation faults caused by overflow when converting from string to numeric types.
/*
* matiec - a compiler for the programming languages defined in IEC 61131-3
*
* Copyright (C) 2003-2011 Mario de Sousa (msousa@fe.up.pt)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of thest_whitespaceLicense, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
* This code is made available on the understanding that it will not be
* used in safety-critical situations without a full and competent review.
*/
/*
* An IEC 61131-3 compiler.
*
* Based on the
* FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
*
*/
/*
* Stage 1
* =======
*
* This file contains the lexical tokens definitions, from which
* the flex utility will generate a lexical parser function.
*/
/*****************************/
/* Lexical Parser Options... */
/*****************************/
/* The lexical analyser will never work in interactive mode,
* i.e., it will only process programs saved to files, and never
* programs being written inter-actively by the user.
* This option saves the resulting parser from calling the
* isatty() function, that seems to be generating some compile
* errors under some (older?) versions of flex.
*/
%option never-interactive
/* Have the lexical analyser use a 'char *yytext' instead of an
* array of char 'char yytext[??]' to store the lexical token.
*/
%pointer
/* Have the lexical analyser ignore the case of letters.
* This will occur for all the tokens and keywords, but
* the resulting text handed up to the syntax parser
* will not be changed, and keep the original case
* of the letters in the input file.
*/
%option case-insensitive
/* Have the generated lexical analyser keep track of the
* line number it is currently analysing.
* This is used to pass up to the syntax parser
* the number of the line on which the current
* token was found. It will enable the syntax parser
* to generate more informatve error messages...
*/
%option yylineno
/* required for the use of the yy_pop_state() and
* yy_push_state() functions
*/
%option stack
/* The '%option stack' also requests the inclusion of
* the yy_top_state(), however this function is not
* currently being used. This means that the compiler
* is complaining about the existance of this function.
* The following option removes the yy_top_state()
* function from the resulting c code, so the compiler
* no longer complains.
*/
%option noyy_top_state
/* We will be using unput() in our flex code, so we cannot set the following option!... */
/*
%option nounput
*/
/* The '%option debug' makes the generated scanner run in
* debug mode.
%option debug
*/
/**************************************************/
/* External Variable and Function declarations... */
/**************************************************/
%{
/* Define TEST_MAIN to include a main() function.
* Useful for testing the parser generated by flex.
*/
/*
#define TEST_MAIN
*/
/* If lexical parser is compiled by itself, we need to define the following
* constant to some string. Under normal circumstances LIBDIRECTORY is set
* in the syntax parser header file...
*/
#ifdef TEST_MAIN
#define DEFAULT_LIBDIR "just_testing"
#endif
/* Required for strdup() */
#include <string.h>
/* Required only for the declaration of abstract syntax classes
* (class symbol_c; class token_c; class list_c;)
* These will not be used in flex, but the token type union defined
* in iec_bison.hh contains pointers to these classes, so we must include
* it here.
*/
#include "../absyntax/absyntax.hh"
/* iec_bison.hh is generated by bison.
* Contains the definition of the token constants, and the
* token value type YYSTYPE (in our case, a 'const char *')
*/
#include "iec_bison.hh"
#include "stage1_2_priv.hh"
/* Variable defined by the bison parser,
* where the value of the tokens will be stored
*/
extern YYSTYPE yylval;
/* The name of the file currently being parsed...
* Note that flex accesses and updates this global variable
* apropriately whenever it comes across an (*#include <filename> *) directive...
*/
const char *current_filename = NULL;
/* Variable defined by the bison parser.
* It must be initialised with the location
* of the token being parsed.
* This is only needed if we want to keep
* track of the locations, in order to give
* more meaningful error messages!
*/
/*
*extern YYLTYPE yylloc;
b*/
#define YY_INPUT(buf,result,max_size) {\
result = GetNextChar(buf, max_size);\
if ( result <= 0 )\
result = YY_NULL;\
}
/* Macro that is executed for every action.
* We use it to pass the location of the token
* back to the bison parser...
*/
#define YY_USER_ACTION {\
previous_tracking =*current_tracking; \
yylloc.first_line = current_tracking->lineNumber; \
yylloc.first_column = current_tracking->currentChar; \
yylloc.first_file = current_filename; \
yylloc.first_order = current_order; \
\
UpdateTracking(yytext); \
\
yylloc.last_line = current_tracking->lineNumber; \
yylloc.last_column = current_tracking->currentChar - 1; \
yylloc.last_file = current_filename; \
yylloc.last_order = current_order; \
\
current_tracking->currentTokenStart = current_tracking->currentChar; \
current_order++; \
}
/* Since this lexical parser we defined only works in ASCII based
* systems, we might as well make sure it is being compiled on
* one...
* Lets check a few random characters...
*/
#if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
#error This lexical analyser is not portable to a non ASCII based system.
#endif
/* Function only called from within flex, but defined
* in iec.y!
* We declare it here...
*
* Search for a symbol in either of the two symbol tables
* and return the token id of the first symbol found.
* Searches first in the variables, and only if not found
* does it continue searching in the library elements
*/
//token_id_t get_identifier_token(const char *identifier_str);
int get_identifier_token(const char *identifier_str);
%}
/***************************************************/
/* Forward Declaration of functions defined later. */
/***************************************************/
%{
void UpdateTracking(const char *text);
/* return the character back to the input stream. */
void unput_char(const char c);
/* return all the text in the current token back to the input stream. */
void unput_text(int n);
/* return all the text in the current token back to the input stream,
* but first return to the stream an additional character to mark the end of the token.
*/
void unput_and_mark(const char mark_char);
void include_file(const char *include_filename);
/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
* and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
* Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
* once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
* the body_state.
*/
void append_bodystate_buffer(const char *text, int is_whitespace = 0);
void unput_bodystate_buffer(void);
int isempty_bodystate_buffer(void);
void del_bodystate_buffer(void);
int GetNextChar(char *b, int maxBuffer);
%}
/****************************/
/* Lexical Parser States... */
/****************************/
/* NOTE: Our psrser can parse st or il code, intermixed
* within the same file.
* With IL we come across the issue of the EOL (end of line) token.
* ST, and the declaration parts of IL do not use this token!
* If the lexical analyser were to issue this token during ST
* language parsing, or during the declaration of data types,
* function headers, etc. in IL, the syntax parser would crash.
*
* We can solve this issue using one of three methods:
* (1) Augment all the syntax that does not accept the EOL
* token to simply ignore it. This makes the syntax
* definition (in iec.y) very cluttered!
* (2) Let the lexical parser figure out which language
* it is parsing, and decide whether or not to issue
* the EOL token. This requires the lexical parser
* to have knowledge of the syntax!, making for a poor
* overall organisation of the code. It would also make it
* very difficult to understand the lexical parser as it
* would use several states, and a state machine to transition
* between the states. The state transitions would be
* intermingled with the lexical parser defintion!
* (3) Use a mixture of (1) and (2). The lexical analyser
* merely distinguishes between function headers and function
* bodies, but no longer makes a distinction between il and
* st language bodies. When parsing a body, it will return
* the EOL token. In other states '\n' will be ignored as
* whitespace.
* The ST language syntax has been augmented in the syntax
* parser configuration to ignore any EOL tokens that it may
* come across!
* This option has both drawbacks of option (1) and (2), but
* much less intensely.
* The syntax that gets cluttered is limited to the ST statements
* (which is rather limited, compared to the function headers and
* data type declarations, etc...), while the state machine in
* the lexical parser becomes very simple. All state transitions
* can be handled within the lexical parser by itself, and can be
* easily identified. Thus knowledge of the syntax required by
* the lexical parser is very limited!
*
* Amazingly enough, I (Mario) got to implement option (3)
* at first, requiring two basic states, decl and body.
* The lexical parser will enter the body state when
* it is parsing the body of a function/program/function block. The
* state transition is done when we find a VAR_END that is not followed
* by a VAR! This is the syntax knowledge that gets included in the
* lexical analyser with this option!
* Unfortunately, getting the st syntax parser to ignore EOL anywhere
* where they might appear leads to conflicts. This is due to the fact
* that the syntax parser uses the single look-ahead token to remove
* possible conflicts. When we insert a possible EOL, the single
* look ahead token becomes the EOL, which means the potential conflicts
* could no longer be resolved.
* Removing these conflicts would make the st syntax parser very convoluted,
* and adding the extraneous EOL would make it very cluttered.
* This option was therefore dropped in favour of another!
*
* I ended up implementing (2). Unfortunately the lexical analyser can
* not easily distinguish between il and st code, since function
* calls in il are very similar to function block calls in st.
* We therefore use an extra 'body' state. When the lexical parser
* finds that last END_VAR, it enters the body state. This state
* must figure out what language is being parsed from the first few
* tokens, and switch to the correct state (st, il or sfc) according to the
* language. This means that we insert quite a bit of knowledge of the
* syntax of the languages into the lexical parser. This is ugly, but it
* works, and at least it is possible to keep all the state changes together
* to make it easier to remove them later on if need be.
* Once the language being parsed has been identified,
* the body state returns any matched text back to the buffer with unput(),
* to be later matched correctly by the apropriate language parser (st, il or sfc).
*
* Aditionally, in sfc state it may further recursively enter the body state
* once again. This is because an sfc body may contain ACTIONS, which are then
* written in one of the three languages (ST, IL or SFC), so once again we need
* to figure out which language the ACTION in the SFC was written in. We already
* ahve all that done in the body state, so we recursively transition to the body
* state once again.
* Note that in this case, when coming out of the st/il state (whichever language
* the action was written in) the sfc state will become active again. This is done by
* pushing and poping the previously active state!
*
* The sfc_qualifier_state is required because when parsing actions within an
* sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
* to bison to work correctly, these qualifiers must be returned as tokens. However,
* these tokens are not reserved keywords, which means it should be possible to
* define variables/functions/FBs with any of these names (including
* S and R which are special because they are also IL operators). So, when we are not
* expecting any action qualifiers, flex does not return these tokens, and is free
* to interpret them as previously defined variables/functions/... as the case may be.
*
* The time_literal_state is required because TIME# literals are decomposed into
* portions, and wewant to send these portions one by one to bison. Each poertion will
* represent the value in days/hours/minutes/seconds/ms.
* Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
* we need to disable lexical identification of identifiers while parsing TIME# literals!
* e.g.: TIME#55d_4h_56m
* We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
* Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
* and return that identifier instead! So, we added this state!
*
* The ignore_pou_state state is only used when bison says it is doing the pre-parsing.
* During pre-parsing, the main state machine will only transition between
* INITIAL and ignore_pou_state, and from here back to INITIAL. All other
* transitions are inhibited. This inhibition is actually just enforced by making
* sure that the INITIAL ---> ignore_pou_state transition is tested before all other
* transitions coming out of INITIAL state. All other transitions are unaffected, as they
* never get a chance to be evaluated when bison is doing pre-parsing.
* Pre-parsing is a first quick scan through the whole input source code simply
* to determine the list of POUs and datatypes that will be defined in that
* code. Basically, the objective is to fill up the previously_declared_xxxxx
* maps, without processing the code itself. Once these maps have been filled up,
* bison will throw away the AST (abstract syntax tree) created up to that point,
* and scan through the same source code again, but this time creating a correct AST.
* This pre-scan allows the source code to reference POUs and datatypes that are
* only declared after they are used!
*
*
* Here is a main state machine...
* --+
* | these states are
* +------------> get_pou_name_state ----> ignore_pou_state | only active
* | | | when bison is
* | ------------------------------------------+ | doing the
* | | | pre-parsing!!
* | v --+
* +---> INITIAL <-------> config
* | \
* | V
* | header_state
* | |
* | V
* vardecl_list_state <------> var_decl
* ^ |
* | | [using push()]
* | |
* | V
* | body,
* | |
* | |
* | -------------------
* | | | |
* | v v v
* | st il sfc
* | | | | [using pop() when leaving st/il/sfc => goes to vardecl_list_state]
* | | | |
* -----------------------
*
* NOTE:- When inside sfc, and an action or transition in ST/IL is found, then
* we also push() to the body state. This means that sometimes, when pop()ing
* from st and il, the state machine may return to the sfc state!
* - The transitions form sfc to body will be decided by bison, which will
* tell flex to do the transition by calling cmd_goto_body_state().
*
*
* Possible state changes are:
* INITIAL -> goto(ignore_pou_state)
* (This transition state is only used when bison says it is doing the pre-parsing.)
* (This transition takes precedence over all other transitions!)
* (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found)
*
* INITIAL -> goto(config_state)
* (when a CONFIGURATION is found)
*
* INITIAL -> goto(header_state)
* (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found)
*
* header_state -> goto(vardecl_list_state)
* (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration)
*
* vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state)
* (when a VAR token is found)
* vardecl_state -> pop() to (vardecl_list_state)
* (when a END_VAR token is found)
*
* vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state)
* (when the last END_VAR is found!)
*
* body_state -> goto(sfc_state)
* (when it figures out it is parsing sfc language)
* body_state -> goto(st_state)
* (when it figures out it is parsing st language)
* body_state -> goto(il_state)
* (when it figures out it is parsing il language)
* st_state -> pop() to vardecl_list_state
* (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
* END_ACTION or END_TRANSITION is found)
* il_state -> pop() to vardecl_list_state
* (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
* END_ACTION or END_TRANSITION is found)
* sfc_state -> pop() to vardecl_list_state
* (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
*
* ignore_pou_state -> goto(INITIAL)
* (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found)
* vardecl_list_state -> goto(INITIAL)
* (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
* config_state -> goto(INITIAL)
* (when a END_CONFIGURATION is found)
*
*
* sfc_state -> push current state(sfc_state); goto(body_state)
* (when parsing an action. This transition is requested by bison)
* sfc_state -> push current state(sfc_state); goto(sfc_qualifier_state)
* (when expecting an action qualifier. This transition is requested by bison)
* sfc_qualifier_state -> pop() to sfc_state
* (when no longer expecting an action qualifier. This transition is requested by bison)
*
* config_state -> push(config_state); goto(task_init_state)
* (when parsing a task initialisation. This transition is requested by bison)
* task_init_state -> pop()
* (when no longer parsing task initialisation parameters. This transition is requested by bison)
*
*
* There is another secondary state machine for parsing comments, another for file_includes,
* and yet another for time literals.
*/
/* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */
%x ignore_pou_state
%x get_pou_name_state
/* we are parsing a configuration. */
%s config_state
/* Inside a configuration, we are parsing a task initialisation parameters */
/* This means that PRIORITY, SINGLE and INTERVAL must be handled as
* tokens, and not as possible identifiers. Note that the above words
* are not keywords.
*/
%s task_init_state
/* we are looking for the first VAR inside a function's, program's or function block's declaration */
/* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */
%s header_state
/* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */
%x vardecl_list_state
/* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */
%s vardecl_state
/* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */
%x body_state
/* we are parsing il code -> flex must return the EOL tokens! */
%s il_state
/* we are parsing st code -> flex must not return the EOL tokens! */
%s st_state
/* we are parsing sfc code -> flex must not return the EOL tokens! */
%s sfc_state
/* we are parsing sfc code, and expecting an action qualifier. */
%s sfc_qualifier_state
/* we are parsing sfc code, and expecting the priority token. */
%s sfc_priority_state
/* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
%x time_literal_state
/* we are parsing a comment. */
%x comment_state
/*******************/
/* File #include's */
/*******************/
/* We extend the IEC 61131-3 standard syntax to allow inclusion
* of other files, using the IEC 61131-3 pragma directive...
* The accepted syntax is:
* {#include "<filename>"}
*/
/* the "include" states are used for picking up the name of an include file */
%x include_beg
%x include_filename
%x include_end
file_include_pragma_filename [^\"]*
file_include_pragma_beg "{#include"{st_whitespace}\"
file_include_pragma_end \"{st_whitespace}"}"
file_include_pragma {file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}
%{
/* A counter to track the order by which each token is processed.
* NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
* i.e.. it may get incremented by more than one between two consecutive tokens.
* This is due to the fact that the counter gets incremented every 'user action' in flex,
* however not every user action will result in a token being passed to bison.
* Nevertheless this is still OK, as we are only interested in the relative
* ordering of tokens...
*/
static long int current_order = 0;
typedef struct {
int eof;
int lineNumber;
int currentChar;
int lineLength;
int currentTokenStart;
FILE *in_file;
} tracking_t;
/* A forward declaration of a function defined at the end of this file. */
void FreeTracking(tracking_t *tracking);
#define MAX_INCLUDE_DEPTH 16
typedef struct {
YY_BUFFER_STATE buffer_state;
tracking_t *env;
const char *filename;
} include_stack_t;
tracking_t * current_tracking = NULL;
tracking_t previous_tracking;
include_stack_t include_stack[MAX_INCLUDE_DEPTH];
int include_stack_ptr = 0;
const char *INCLUDE_DIRECTORIES[] = {
DEFAULT_LIBDIR,
".",
"/lib",
"/usr/lib",
"/usr/lib/iec",
NULL /* must end with NULL!! */
};
%}
/*****************************/
/* Prelimenary constructs... */
/*****************************/
/* PRAGMAS */
/* ======= */
/* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
* especially the prototypes of Functions and FBs defined in the standard
* (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax
* with two pragmas to indicate that the code is to be parsed (going through the
* lexical, syntactical, and semantic analysers), but no code is to be generated.
*
* The accepted syntax is:
* {no_code_generation begin}
* ... prototypes ...
* {no_code_generation end}
*
* When parsing these prototypes the abstract syntax tree will be populated as usual,
* allowing the semantic analyser to correctly analyse the semantics of calls to these
* functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
* between the above two pragmas.
*/
disable_code_generation_pragma "{disable code generation}"
enable_code_generation_pragma "{enable code generation}"
/* Any other pragma... */
pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}")
/* COMMENTS */
/* ======== */
/* In order to allow nested comments, comments are handled by a specific comment_state state */
/* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state.
* Whenever a "*)" is found, we pop a state off the stack
*/
/* comments... */
comment_beg "(*"
comment_end "*)"
/* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB
* invocations with the 'NOT <variable_name> =>' syntax (which needs two look ahead
* tokens to be parsed correctly - and bison being LALR(1) only supports one).
* The current work around requires flex to completely parse the '<variable_name> =>'
* sequence. This sequence includes whitespace and/or comments between the
* <variable_name> and the "=>" token.
*
* This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context,
* which means we can not use the comment_state method of specifying/finding and ignoring
* comments.
*
* For this reason only, we must also define what a complete comment looks like, so
* it may be used in this rule. Since the rule uses the whitespace_or_comment
* construct as trailing context, this definition of comment must not use any
* trailing context either.
*
* Aditionally, it is not possible to define nested comments in flex without the use of
* states, so for this particular location, we do NOT support nested comments.
*/
/* NOTE: this seemingly unnecessary complex definition is required
* to be able to eat up comments such as:
* '(* Testing... ! ***** ******)'
* without using the trailing context command in flex (/{context})
* since {comment} itself will later be used with
* trailing context ({comment}/{context})
*/
not_asterisk [^*]
not_close_parenthesis_nor_asterisk [^*)]
asterisk "*"
comment_text ({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk})
comment "(*"({comment_text}*)({asterisk}+)")"
/* 3.1 Whitespace */
/* ============== */
/*
* Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4)
*
* Whitespace definition includes the newline character.
*
* However, the standard is inconsistent in that in IL the newline character
* is considered a token (EOL - end of line).
* In our implementation we therefore have two definitions of whitespace
* - one for ST, that includes the newline character
* - one for IL without the newline character.
*
* IL whitespace is only active while parsing IL code, whereas ST whitespace
* is used in all other circumstances. Additionally, when parsing IL, the newline
* character is treated as the EOL token.
* The above requires the use of a state machine in the lexical parser to track which
* language is being parsed. This requires that the lexical parser (i.e. flex)
* have some knowledge of the syntax itself.
*
* NOTE: Our definition of whitespace will only work in ASCII!
*
* NOTE: we cannot use
* st_whitespace [:space:]*
* since we use {st_whitespace} as trailing context. In our case
* this would not constitute "dangerous trailing context", but the
* lexical generator (i.e. flex) does not know this (since it does
* not know which characters belong to the set [:space:]), and will
* generate a "dangerous trailing context" warning!
* We use this alternative just to stop the flex utility from
* generating the invalid (in this case) warning...
*/
/* NOTE: il_whitespace_char is not currenty used, be we include it for completeness */
st_whitespace_char [ \f\n\r\t\v]
il_whitespace_char [ \f\r\t\v]
st_whitespace [ \f\n\r\t\v]*
il_whitespace [ \f\r\t\v]*
st_whitespace_or_pragma_or_commentX ({st_whitespace})|({pragma})|({comment})
il_whitespace_or_pragma_or_commentX ({il_whitespace})|({pragma})|({comment})
st_whitespace_or_pragma_or_comment {st_whitespace_or_pragma_or_commentX}*
il_whitespace_or_pragma_or_comment {il_whitespace_or_pragma_or_commentX}*
qualified_identifier {identifier}(\.{identifier})+
/*****************************************/
/* B.1.1 Letters, digits and identifiers */
/*****************************************/
/* NOTE: The following definitions only work if the host computer
* is using the ASCII maping. For e.g., with EBCDIC [A-Z]
* contains non-alphabetic characters!
* The correct way of doing it would be to use
* the [:upper:] etc... definitions.
*
* Unfortunately, further on we need all printable
* characters (i.e. [:print:]), but excluding '$'.
* Flex does not allow sets to be composed by excluding
* elements. Sets may only be constructed by adding new
* elements, which means that we have to revert to
* [\x20\x21\x23\x25\x26\x28-x7E] for the definition
* of the printable characters with the required exceptions.
* The above also implies the use of ASCII, but now we have
* no way to work around it|
*
* The conclusion is that our parser is limited to ASCII
* based host computers!!
*/
letter [A-Za-z]
digit [0-9]
octal_digit [0-7]
hex_digit {digit}|[A-F]
identifier ({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)
/*******************/
/* B.1.2 Constants */
/*******************/
/******************************/
/* B.1.2.1 Numeric literals */
/******************************/
integer {digit}((_?{digit})*)
/* Some helper symbols for parsing TIME literals... */
integer_0_59 (0(_?))*([0-5](_?))?{digit}
integer_0_19 (0(_?))*([0-1](_?))?{digit}
integer_20_23 (0(_?))*2(_?)[0-3]
integer_0_23 {integer_0_19}|{integer_20_23}
integer_0_999 {digit}((_?{digit})?)((_?{digit})?)
binary_integer 2#{bit}((_?{bit})*)
bit [0-1]
octal_integer 8#{octal_digit}((_?{octal_digit})*)
hex_integer 16#{hex_digit}((_?{hex_digit})*)
exponent [Ee]([+-]?){integer}
/* The correct definition for real would be:
* real {integer}\.{integer}({exponent}?)
*
* Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
* fixed_point {integer}\.{integer}
*
* This means that {integer}\.{integer} could be interpreted
* as either a fixed_point or a real.
* I have opted to interpret {integer}\.{integer} as a fixed_point.
* In order to do this, the definition of real has been changed to:
* real {integer}\.{integer}{exponent}
*
* This means that the syntax parser now needs to define a real to be
* either a real_token or a fixed_point_token!
*/
real {integer}\.{integer}{exponent}
/*******************************/
/* B.1.2.2 Character Strings */
/*******************************/
/*
common_character_representation :=
<any printable character except '$', '"' or "'">
|'$$'
|'$L'|'$N'|'$P'|'$R'|'$T'
|'$l'|'$n'|'$p'|'$r'|'$t'
NOTE: $ = 0x24
" = 0x22
' = 0x27
printable chars in ASCII: 0x20-0x7E
*/
esc_char_u $L|$N|$P|$R|$T
esc_char_l $l|$n|$p|$r|$t
esc_char $$|{esc_char_u}|{esc_char_l}
double_byte_char (${hex_digit}{hex_digit}{hex_digit}{hex_digit})
single_byte_char (${hex_digit}{hex_digit})
/* WARNING:
* This definition is only valid in ASCII...
*
* Flex includes the function print_char() that defines
* all printable characters portably (i.e. whatever character
* encoding is currently being used , ASCII, EBCDIC, etc...)
* Unfortunately, we cannot generate the definition of
* common_character_representation portably, since flex
* does not allow definition of sets by subtracting
* elements in one set from another set.
* This means we must build up the defintion of
* common_character_representation using only set addition,
* which leaves us with the only choice of defining the
* characters non-portably...
*/
common_character_representation [\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
double_byte_character_representation $\"|'|{double_byte_char}|{common_character_representation}
single_byte_character_representation $'|\"|{single_byte_char}|{common_character_representation}
double_byte_character_string \"({double_byte_character_representation}*)\"
single_byte_character_string '({single_byte_character_representation}*)'
/************************/
/* B 1.2.3.1 - Duration */
/************************/
fixed_point {integer}\.{integer}
/* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
* as its definition does not match the standard's text.
* IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
* definition instead!
*
* duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
* interval::= days | hours | minutes | seconds | milliseconds
* fixed_point ::= integer [ '.' integer]
* days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
* hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
* minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
* seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
* milliseconds ::= fixed_point 'ms'
*
*
* The original IEC 61131-3 v2 definition is:
* duration ::= ('T' | 'TIME') '#' ['-'] interval
* interval ::= days | hours | minutes | seconds | milliseconds
* fixed_point ::= integer [ '.' integer]
* days ::= fixed_point 'd' | integer 'd' ['_'] hours
* hours ::= fixed_point 'h' | integer 'h' ['_'] minutes
* minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds
* seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds
* milliseconds ::= fixed_point 'ms'
*/
interval_ms_X ({integer_0_999}(\.{integer})?)ms
interval_s_X {integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
interval_m_X {integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
interval_h_X {integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)
interval_ms {integer}ms|({fixed_point}ms)
interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s)
interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m)
interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h)
interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d)
interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}
/* to help provide nice error messages, we also parse an incorrect but plausible interval... */
/* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not
* be able to parse any other legal lexcial construct (besides a legal interval, but that
* is OK as this rule will appear _after_ the rule to parse legal intervals!).
*/
fixed_point_or_integer {fixed_point}|{integer}
erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?
/********************************************/
/* B.1.4.1 Directly Represented Variables */
/********************************************/
/* The correct definition, if the standard were to be followed... */
location_prefix [IQM]
size_prefix [XBWDL]
direct_variable_standard %{location_prefix}({size_prefix}?){integer}((.{integer})*)
/* For the MatPLC, we will accept %<identifier>
* as a direct variable, this being mapped onto the MatPLC point
* named <identifier>
*/
/* TODO: we should not restrict it to only the accepted syntax
* of <identifier> as specified by the standard. MatPLC point names
* have a more permissive syntax.
*
* e.g. "P__234"
* Is a valid MatPLC point name, but not a valid <identifier> !!
* The same happens with names such as "333", "349+23", etc...
* How can we handle these more expressive names in our case?
* Remember that some direct variable may remain anonymous, with
* declarations such as:
* VAR
* AT %I3 : BYTE := 255;
* END_VAR
* in which case we are currently using "%I3" as the variable
* name.
*/
/* direct_variable_matplc %{identifier} */
/* direct_variable {direct_variable_standard}|{direct_variable_matplc} */
direct_variable {direct_variable_standard}
/******************************************/
/* B 1.4.3 - Declaration & Initialisation */
/******************************************/
incompl_location %[IQM]\*
%%
/* fprintf(stderr, "flex: state %d\n", YY_START); */
/*****************************************************/
/*****************************************************/
/*****************************************************/
/***** *****/
/***** *****/
/***** F I R S T T H I N G S F I R S T *****/
/***** *****/
/***** *****/
/*****************************************************/
/*****************************************************/
/*****************************************************/
/***********************************************************/
/* Handle requests sent by bison for flex to change state. */
/***********************************************************/
if (get_goto_body_state()) {
yy_push_state(body_state);
rst_goto_body_state();
}
if (get_goto_sfc_qualifier_state()) {
yy_push_state(sfc_qualifier_state);
rst_goto_sfc_qualifier_state();
}
if (get_goto_sfc_priority_state()) {
yy_push_state(sfc_priority_state);
rst_goto_sfc_priority_state();
}
if (get_goto_task_init_state()) {
yy_push_state(task_init_state);
rst_goto_task_init_state();
}
if (get_pop_state()) {
yy_pop_state();
rst_pop_state();
}
/***************************/
/* Handle the pragmas! */
/***************************/
/* We start off by searching for the pragmas we handle in the lexical parser. */
<INITIAL>{file_include_pragma} unput_text(0); yy_push_state(include_beg);
/* Pragmas sent to syntax analyser (bison) */
/* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks.
* We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state.
* This is because the pragmas are stored in a statement_list or instruction_list (in bison),
* but these lists must start with the special tokens start_IL_body_token/start_ST_body_token.
* This means that these special tokens must be generated (by the body_state) before processing
* the pragme => we cannot process the trailing pragmas in the vardecl_list_state state.
*/
{disable_code_generation_pragma} return disable_code_generation_pragma_token;
{enable_code_generation_pragma} return enable_code_generation_pragma_token;
<vardecl_list_state>{disable_code_generation_pragma}/(VAR) return disable_code_generation_pragma_token;
<vardecl_list_state>{enable_code_generation_pragma}/(VAR) return enable_code_generation_pragma_token;
<body_state>{disable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
<body_state>{enable_code_generation_pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
/* Any other pragma we find, we just pass it up to the syntax parser... */
/* Note that the <body_state> state is exclusive, so we have to include it here too. */
<body_state>{pragma} append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
{pragma} {/* return the pragmma without the enclosing '{' and '}' */
int cut = yytext[1]=='{'?2:1;
yytext[strlen(yytext)-cut] = '\0';
yylval.ID=strdup(yytext+cut);
return pragma_token;
}
<vardecl_list_state>{pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */
int cut = yytext[1]=='{'?2:1;
yytext[strlen(yytext)-cut] = '\0';
yylval.ID=strdup(yytext+cut);
return pragma_token;
}
/*********************************/
/* Handle the file includes! */
/*********************************/
<include_beg>{file_include_pragma_beg} BEGIN(include_filename);
<include_filename>{file_include_pragma_filename} {
/* set the internal state variables of lexical analyser to process a new include file */
include_file(yytext);
/* switch to whatever state was active before the include file */
yy_pop_state();
/* now process the new file... */
}
<<EOF>> { /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules
* when checking for syntax errors in the input source code.
* This means that in reality flex will be asked to carry on reading the input
* even after it has reached the end of all (including the main) input files.
* In other owrds, we will be called to return more tokens, even after we have
* already returned an END_OF_INPUT token. In this case, we must carry on returning
* more END_OF_INPUT tokens.
*
* However, in the above case we will be asked to carry on reading more tokens
* from the main input file, after we have reached the end. For this to work
* correctly, we cannot close the main input file!
*
* This is why we WILL be called with include_stack_ptr == 0 multiple times,
* and why we must handle it as a special case
* that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again.
*
* As a corollory, flex can never safely close the main input file, and we must ask
* bison to close it!
*/
if (include_stack_ptr == 0) {
// fclose(yyin); // Must not do this!!
// FreeTracking(current_tracking); // Must not do this!!
/* yyterminate() terminates the scanner and returns a 0 to the
* scanner's caller, indicating "all done".
*
* Our syntax parser (written with bison) has the token
* END_OF_INPUT associated to the value 0, so even though
* we don't explicitly return the token END_OF_INPUT
* calling yyterminate() is equivalent to doing that.
*/
yyterminate();
} else {
fclose(yyin);
FreeTracking(current_tracking);
--include_stack_ptr;
yy_delete_buffer(YY_CURRENT_BUFFER);
yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
current_tracking = include_stack[include_stack_ptr].env;
/* removing constness of char *. This is safe actually,
* since the only real const char * that is stored on the stack is
* the first one (i.e. the one that gets stored in include_stack[0],
* which is never free'd!
*/
/* NOTE: We do __NOT__ free the malloc()'d memory since
* pointers to this filename will be kept by many objects
* in the abstract syntax tree.
* This will later be used to provide correct error
* messages during semantic analysis (stage 3)
*/
/* free((char *)current_filename); */
current_filename = include_stack[include_stack_ptr].filename;
yy_push_state(include_end);
}
}
<include_end>{file_include_pragma_end} yy_pop_state();
/* handle the artificial file includes created by include_string(), which do not end with a '}' */
<include_end>. unput_text(0); yy_pop_state();
/*********************************/
/* Handle all the state changes! */
/*********************************/
/* INITIAL -> header_state */
<INITIAL>{
FUNCTION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION;
FUNCTION_BLOCK{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK;
PROGRAM{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM;
CONFIGURATION{st_whitespace} if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION;
}
<get_pou_name_state>{
{identifier} BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token;
. BEGIN(ignore_pou_state); unput_text(0);
}
<ignore_pou_state>{
END_FUNCTION unput_text(0); BEGIN(INITIAL);
END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL);
END_PROGRAM unput_text(0); BEGIN(INITIAL);
END_CONFIGURATION unput_text(0); BEGIN(INITIAL);
.|\n {}/* Ignore text inside POU! (including the '\n' character!)) */
}
/* header_state -> (vardecl_list_state) */
/* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain
* at least one VAR_XXX block.
* How about functions that do not declare variables, and go directly to the body_state???
* - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
* must have at least one input argument, so a correct declaration will have at least
* one VAR_INPUT ... VAR_END construct!
* - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
* must have at least one input argument, so a correct declaration will have at least
* one VAR_INPUT ... VAR_END construct!
* - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
* argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
* construct!
*
* All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
* FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
* If the code has an error, and no VAR_END before the body, we will simply
* continue in the <vardecl_state> state, until the end of the FUNCTION, FUNCTION_BLOCK
* or PROGAM.
*
* WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function
* may be declared with no Input, Output or IN_OUT variables. This means that the above
* assumption is no longer valid.
*
* NOTE: Some code being parsed may be erroneous and not contain any VAR END_VAR block.
* To generate error messages that make sense, the flex state machine should not get lost
* in these situations. We therefore consider the possibility of finding
* END_FUNCTION, END_FUNCTION_BLOCK or END_PROGRAM when inside the header_state.
*/
<header_state>{
VAR | /* execute the next rule's action, i.e. fall-through! */
VAR_INPUT |
VAR_OUTPUT |
VAR_IN_OUT |
VAR_EXTERNAL |
VAR_GLOBAL |
VAR_TEMP |
VAR_CONFIG |
VAR_ACCESS unput_text(0); BEGIN(vardecl_list_state);
END_FUNCTION | /* execute the next rule's action, i.e. fall-through! */
END_FUNCTION_BLOCK |
END_PROGRAM unput_text(0); BEGIN(vardecl_list_state);
/* Notice that we do NOT go directly to body_state, as that requires a push().
* If we were to puch to body_state here, then the corresponding pop() at the
*end of body_state would return to header_state.
* After this pop() header_state would not return to INITIAL as it should, but
* would instead enter an infitie loop push()ing again to body_state
*/
}
/* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */
<vardecl_list_state>{
/* NOTE: vardecl_list_state is an exclusive state, i.e. when in this state
* default rules do not apply! This means that when in this state identifiers
* are not recognised!
* NOTE: Notice that we only change to vardecl_state if the VAR*** is followed by
* at least one whitespace. This is to dintinguish the VAR declaration
* from identifiers starting with 'var' (e.g. a variable named 'varint')
* NOTE: Notice that we cannot use st_whitespace here, as it can legally be empty.
* We therefore use st_whitespace_char instead.
*/
VAR_INPUT{st_whitespace_char} | /* execute the next rule's action, i.e. fall-through! */
VAR_OUTPUT{st_whitespace_char} |
VAR_IN_OUT{st_whitespace_char} |
VAR_EXTERNAL{st_whitespace_char} |
VAR_GLOBAL{st_whitespace_char} |
VAR_TEMP{st_whitespace_char} |
VAR_CONFIG{st_whitespace_char} |
VAR_ACCESS{st_whitespace_char} |
VAR{st_whitespace_char} unput_text(0); yy_push_state(vardecl_state); //printf("\nChanging to vardecl_state\n");
END_FUNCTION{st_whitespace} unput_text(0); BEGIN(INITIAL);
END_FUNCTION_BLOCK{st_whitespace} unput_text(0); BEGIN(INITIAL);
END_PROGRAM{st_whitespace} unput_text(0); BEGIN(INITIAL);
/* NOTE: Handling of whitespace...
* - Must come __before__ the next rule for any single character '.'
* - If the rules were reversed, any whitespace with a single space (' ')
* would be handled by the '.' rule instead of the {whitespace} rule!
*/
{st_whitespace} /* Eat any whitespace */
/* anything else, just change to body_state! */
. unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");
}
/* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */
<vardecl_state>{
END_VAR yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */
}
/* body_state -> (il_state | st_state | sfc_state) */
<body_state>{
{st_whitespace} {/* In body state we do not process any tokens,
* we simply store them for later processing!
* NOTE: we must return ALL text when in body_state, including
* all comments and whitespace, so as not
* to lose track of the line_number and column number
* used when printing debugging messages.
* NOTE: some of the following rules depend on the fact that
* the body state buffer is either empty or only contains white space up to
* that point. Since the vardecl_list_state will eat up all
* whitespace before entering the body_state, the contents of the bodystate_buffer
* will _never_ start with whitespace if the previous state was vardecl_list_state.
* However, it is possible to enter the body_state from other states (e.g. when
* parsing SFC code, that contains transitions or actions in other languages)
*/
append_bodystate_buffer(yytext, 1 /* is whitespace */);
}
/* 'INITIAL_STEP' always used in beginning of SFCs !! */
INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(sfc_state);}
else {append_bodystate_buffer(yytext);}
}
/* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */
:= { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */
else {append_bodystate_buffer(yytext);}
}
/* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */
END_ACTION | /* execute the next rule's action, i.e. fall-through! */
END_FUNCTION |
END_FUNCTION_BLOCK |
END_TRANSITION |
END_PROGRAM { append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;}
.|\n { append_bodystate_buffer(yytext);
if (strcmp(yytext, ";") == 0)
{unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;}
}
/* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */
RETURN | /* execute the next rule's action, i.e. fall-through! */
IF |
CASE |
FOR |
WHILE |
EXIT |
REPEAT { if (isempty_bodystate_buffer()) {unput_text(0); del_bodystate_buffer(); BEGIN(st_state); return start_ST_body_token;}
else {append_bodystate_buffer(yytext);}
}
} /* end of body_state lexical parser */
/* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */
<il_state,st_state>{
END_FUNCTION yy_pop_state(); unput_text(0);
END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
END_PROGRAM yy_pop_state(); unput_text(0);
END_TRANSITION yy_pop_state(); unput_text(0);
END_ACTION yy_pop_state(); unput_text(0);
}
/* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */
<sfc_state>{
END_FUNCTION yy_pop_state(); unput_text(0);
END_FUNCTION_BLOCK yy_pop_state(); unput_text(0);
END_PROGRAM yy_pop_state(); unput_text(0);
}
/* config -> INITIAL */
END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION;
/***************************************/
/* Next is to to remove all whitespace */
/***************************************/
/* NOTE: pragmas are handled right at the beginning... */
/* The whitespace */
<INITIAL,header_state,config_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace} /* Eat any whitespace */
<il_state>{il_whitespace} /* Eat any whitespace */
/* NOTE: Due to the need of having the following rule have higher priority,
* the following rule was moved to an earlier position in this file.
<body_state>{st_whitespace} {...}
*/
/* The comments */
<get_pou_name_state,ignore_pou_state,body_state,vardecl_list_state>{comment_beg} yy_push_state(comment_state);
{comment_beg} yy_push_state(comment_state);
<comment_state>{
{comment_beg} {if (get_opt_nested_comments()) yy_push_state(comment_state);}
{comment_end} yy_pop_state();
. /* Ignore text inside comment! */
\n /* Ignore text inside comment! */
}
/*****************************************/
/* B.1.1 Letters, digits and identifiers */
/*****************************************/
/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
* On the other hand, the spec does not define them as keywords,
* which means they may be re-used for variable names, etc...!
* The syntax parser already caters for the possibility of these
* tokens being used for variable names in their declarations.
* When they are declared, they will be added to the variable symbol table!
* Further appearances of these tokens must no longer be parsed
* as R1_tokens etc..., but rather as variable_name_tokens!
*
* That is why the first thing we do with identifiers, even before
* checking whether they may be a 'keyword', is to check whether
* they have been previously declared as a variable name,
*
* However, we have a dilema! Should we here also check for
* prev_declared_derived_function_name_token?
* If we do, then the 'MOD' default library function (defined in
* the standard) will always be returned as a function name, and
* it will therefore not be possible to use it as an operator as
* in the following ST expression 'X := Y MOD Z;' !
* If we don't, then even it will not be possible to use 'MOD'
* as a funtion as in 'X := MOD(Y, Z);'
* We solve this by NOT testing for function names here, and
* handling this function and keyword clash in bison!
*/
/* NOTE: The following code has been commented out as most users do not want matiec
* to allow the use of 'R1', 'IN' ... IL operators as identifiers,
* even though a literal reading of the standard allows this.
* We could add this as a commadnd line option, but it is not yet done.
* For now we just comment out the code, but leave it the commented code
* in so we can re-activate quickly (without having to go through old commits
* in the mercurial repository to figure out the missing code!
*/
/*
{identifier} {int token = get_identifier_token(yytext);
// fprintf(stderr, "flex: analysing identifier '%s'...", yytext);
if ((token == prev_declared_variable_name_token) ||
// (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
(token == prev_declared_fb_name_token)) {
// if (token != identifier_token)
// * NOTE: if we replace the above uncommented conditions with
* the simple test of (token != identifier_token), then
* 'MOD' et al must be removed from the
* library_symbol_table as a default function name!
* //
yylval.ID=strdup(yytext);
// fprintf(stderr, "returning token %d\n", token);
return token;
}
// otherwise, leave it for the other lexical parser rules...
// fprintf(stderr, "rejecting\n");
REJECT;
}
*/
/******************************************************/
/******************************************************/
/******************************************************/
/***** *****/
/***** *****/
/***** N O W D O T H E K E Y W O R D S *****/
/***** *****/
/***** *****/
/******************************************************/
/******************************************************/
/******************************************************/
REF {if (get_opt_ref_standard_extensions()) return REF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
DREF {if (get_opt_ref_standard_extensions()) return DREF; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
REF_TO {if (get_opt_ref_standard_extensions()) return REF_TO; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
NULL {if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}} /* Keyword in IEC 61131-3 v3 */
EN return EN; /* Keyword */
ENO return ENO; /* Keyword */
/******************************/
/* B 1.2.1 - Numeric Literals */
/******************************/
TRUE return TRUE; /* Keyword */
BOOL#1 return boolean_true_literal_token;
BOOL#TRUE return boolean_true_literal_token;
SAFEBOOL#1 {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
SAFEBOOL#TRUE {if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
FALSE return FALSE; /* Keyword */
BOOL#0 return boolean_false_literal_token;
BOOL#FALSE return boolean_false_literal_token;
SAFEBOOL#0 {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
SAFEBOOL#FALSE {if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */
/************************/
/* B 1.2.3.1 - Duration */
/************************/
t# return T_SHARP; /* Delimiter */
T# return T_SHARP; /* Delimiter */
TIME return TIME; /* Keyword (Data Type) */
/************************************/
/* B 1.2.3.2 - Time of day and Date */
/************************************/
TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
TOD return TIME_OF_DAY; /* Keyword (Data Type) */
DATE return DATE; /* Keyword (Data Type) */
d# return D_SHARP; /* Delimiter */
D# return D_SHARP; /* Delimiter */
DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
DT return DATE_AND_TIME; /* Keyword (Data Type) */
/***********************************/
/* B 1.3.1 - Elementary Data Types */
/***********************************/
BOOL return BOOL; /* Keyword (Data Type) */
BYTE return BYTE; /* Keyword (Data Type) */
WORD return WORD; /* Keyword (Data Type) */
DWORD return DWORD; /* Keyword (Data Type) */
LWORD return LWORD; /* Keyword (Data Type) */
SINT return SINT; /* Keyword (Data Type) */
INT return INT; /* Keyword (Data Type) */
DINT return DINT; /* Keyword (Data Type) */
LINT return LINT; /* Keyword (Data Type) */
USINT return USINT; /* Keyword (Data Type) */
UINT return UINT; /* Keyword (Data Type) */
UDINT return UDINT; /* Keyword (Data Type) */
ULINT return ULINT; /* Keyword (Data Type) */
REAL return REAL; /* Keyword (Data Type) */
LREAL return LREAL; /* Keyword (Data Type) */
WSTRING return WSTRING; /* Keyword (Data Type) */
STRING return STRING; /* Keyword (Data Type) */
TIME return TIME; /* Keyword (Data Type) */
DATE return DATE; /* Keyword (Data Type) */
DT return DT; /* Keyword (Data Type) */
TOD return TOD; /* Keyword (Data Type) */
DATE_AND_TIME return DATE_AND_TIME; /* Keyword (Data Type) */
TIME_OF_DAY return TIME_OF_DAY; /* Keyword (Data Type) */
/* A non-standard extension! */
VOID {if (runtime_options.allow_void_datatype) {return VOID;} else {REJECT;}}
/*****************************************************************/
/* Keywords defined in "Safety Software Technical Specification" */
/*****************************************************************/
/*
* NOTE: The following keywords are define in
* "Safety Software Technical Specification,
* Part 1: Concepts and Function Blocks,
* Version 1.0 – Official Release"
* written by PLCopen - Technical Committee 5
*
* We only support these extensions and keywords
* if the apropriate command line option is given.
*/
SAFEBOOL {if (get_opt_safe_extensions()) {return SAFEBOOL;} else {REJECT;}}
SAFEBYTE {if (get_opt_safe_extensions()) {return SAFEBYTE;} else {REJECT;}}
SAFEWORD {if (get_opt_safe_extensions()) {return SAFEWORD;} else {REJECT;}}
SAFEDWORD {if (get_opt_safe_extensions()) {return SAFEDWORD;} else{REJECT;}}
SAFELWORD {if (get_opt_safe_extensions()) {return SAFELWORD;} else{REJECT;}}
SAFEREAL {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
SAFELREAL {if (get_opt_safe_extensions()) {return SAFELREAL;} else{REJECT;}}
SAFESINT {if (get_opt_safe_extensions()) {return SAFESINT;} else{REJECT;}}
SAFEINT {if (get_opt_safe_extensions()) {return SAFEINT;} else{REJECT;}}
SAFEDINT {if (get_opt_safe_extensions()) {return SAFEDINT;} else{REJECT;}}
SAFELINT {if (get_opt_safe_extensions()) {return SAFELINT;} else{REJECT;}}
SAFEUSINT {if (get_opt_safe_extensions()) {return SAFEUSINT;} else{REJECT;}}
SAFEUINT {if (get_opt_safe_extensions()) {return SAFEUINT;} else{REJECT;}}
SAFEUDINT {if (get_opt_safe_extensions()) {return SAFEUDINT;} else{REJECT;}}
SAFEULINT {if (get_opt_safe_extensions()) {return SAFEULINT;} else{REJECT;}}
/* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
/* so it is best not to support them at all... */
/*
SAFEWSTRING {if (get_opt_safe_extensions()) {return SAFEWSTRING;} else{REJECT;}}
SAFESTRING {if (get_opt_safe_extensions()) {return SAFESTRING;} else{REJECT;}}
*/
SAFETIME {if (get_opt_safe_extensions()) {return SAFETIME;} else{REJECT;}}
SAFEDATE {if (get_opt_safe_extensions()) {return SAFEDATE;} else{REJECT;}}
SAFEDT {if (get_opt_safe_extensions()) {return SAFEDT;} else{REJECT;}}
SAFETOD {if (get_opt_safe_extensions()) {return SAFETOD;} else{REJECT;}}
SAFEDATE_AND_TIME {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
SAFETIME_OF_DAY {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;} else{REJECT;}}
/********************************/
/* B 1.3.2 - Generic data types */
/********************************/
/* Strangely, the following symbols do not seem to be required! */
/* But we include them so they become reserved words, and do not
* get passed up to bison as an identifier...
*/
ANY return ANY; /* Keyword (Data Type) */
ANY_DERIVED return ANY_DERIVED; /* Keyword (Data Type) */
ANY_ELEMENTARY return ANY_ELEMENTARY; /* Keyword (Data Type) */
ANY_MAGNITUDE return ANY_MAGNITUDE; /* Keyword (Data Type) */
ANY_NUM return ANY_NUM; /* Keyword (Data Type) */
ANY_REAL return ANY_REAL; /* Keyword (Data Type) */
ANY_INT return ANY_INT; /* Keyword (Data Type) */
ANY_BIT return ANY_BIT; /* Keyword (Data Type) */
ANY_STRING return ANY_STRING; /* Keyword (Data Type) */
ANY_DATE return ANY_DATE; /* Keyword (Data Type) */
/********************************/
/* B 1.3.3 - Derived data types */
/********************************/
":=" return ASSIGN; /* Delimiter */
".." return DOTDOT; /* Delimiter */
TYPE return TYPE; /* Keyword */
END_TYPE return END_TYPE; /* Keyword */
ARRAY return ARRAY; /* Keyword */
OF return OF; /* Keyword */
STRUCT return STRUCT; /* Keyword */
END_STRUCT return END_STRUCT; /* Keyword */
/*********************/
/* B 1.4 - Variables */
/*********************/
/******************************************/
/* B 1.4.3 - Declaration & Initialisation */
/******************************************/
VAR_INPUT return VAR_INPUT; /* Keyword */
VAR_OUTPUT return VAR_OUTPUT; /* Keyword */
VAR_IN_OUT return VAR_IN_OUT; /* Keyword */
VAR_EXTERNAL return VAR_EXTERNAL; /* Keyword */
VAR_GLOBAL return VAR_GLOBAL; /* Keyword */
END_VAR return END_VAR; /* Keyword */
RETAIN return RETAIN; /* Keyword */
NON_RETAIN return NON_RETAIN; /* Keyword */
R_EDGE return R_EDGE; /* Keyword */
F_EDGE return F_EDGE; /* Keyword */
AT return AT; /* Keyword */
/***********************/
/* B 1.5.1 - Functions */
/***********************/
/* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command.
* This is necessary in case the input program being parsed has syntax errors that force
* flex's main state machine to never change to the il_state or the st_state
* after changing to the body_state.
* Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
* the input stream even in the presence of buggy code!
*/
FUNCTION return FUNCTION; /* Keyword */
END_FUNCTION BEGIN(INITIAL); return END_FUNCTION; /* Keyword */ /* see Note above */
VAR return VAR; /* Keyword */
CONSTANT return CONSTANT; /* Keyword */
/*****************************/
/* B 1.5.2 - Function Blocks */
/*****************************/
/* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command.
* This is necessary in case the input program being parsed has syntax errors that force
* flex's main state machine to never change to the il_state or the st_state
* after changing to the body_state.
* Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
* the input stream even in the presence of buggy code!
*/
FUNCTION_BLOCK return FUNCTION_BLOCK; /* Keyword */
END_FUNCTION_BLOCK BEGIN(INITIAL); return END_FUNCTION_BLOCK; /* Keyword */ /* see Note above */
VAR_TEMP return VAR_TEMP; /* Keyword */
VAR return VAR; /* Keyword */
NON_RETAIN return NON_RETAIN; /* Keyword */
END_VAR return END_VAR; /* Keyword */
/**********************/
/* B 1.5.3 - Programs */
/**********************/
/* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command.
* This is necessary in case the input program being parsed has syntax errors that force
* flex's main state machine to never change to the il_state or the st_state
* after changing to the body_state.
* Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with
* the input stream even in the presence of buggy code!
*/
PROGRAM return PROGRAM; /* Keyword */
END_PROGRAM BEGIN(INITIAL); return END_PROGRAM; /* Keyword */ /* see Note above */
/********************************************/
/* B 1.6 Sequential Function Chart elements */
/********************************************/
/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
.* as other identifiers that may be used as variable names inside IL and ST programs.
* They will have to be handled when we include parsing of SFC... For now, simply
* ignore them!
*/
ACTION return ACTION; /* Keyword */
END_ACTION return END_ACTION; /* Keyword */
TRANSITION return TRANSITION; /* Keyword */
END_TRANSITION return END_TRANSITION; /* Keyword */
FROM return FROM; /* Keyword */
TO return TO; /* Keyword */
INITIAL_STEP return INITIAL_STEP; /* Keyword */
STEP return STEP; /* Keyword */
END_STEP return END_STEP; /* Keyword */
/* PRIORITY is not a keyword, so we only return it when
* it is explicitly required and we are not expecting any identifiers
* that could also use the same letter sequence (i.e. an identifier: piority)
*/
<sfc_priority_state>PRIORITY return PRIORITY;
<sfc_qualifier_state>{
L return L;
D return D;
SD return SD;
DS return DS;
SL return SL;
N return N;
P return P;
P0 return P0;
P1 return P1;
R return R;
S return S;
}
/********************************/
/* B 1.7 Configuration elements */
/********************************/
/* Note: The following END_CONFIGURATION rule will never get to be used, as we have
* another identical rule above (closer to the rules handling the transitions
* of the main state machine) that will always execute before this one.
* Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command.
* This is nt strictly necessary, but I place it here so it follwos the same
* pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK
*/
CONFIGURATION return CONFIGURATION; /* Keyword */
END_CONFIGURATION BEGIN(INITIAL); return END_CONFIGURATION; /* Keyword */ /* see 2 Notes above! */
TASK return TASK; /* Keyword */
RESOURCE return RESOURCE; /* Keyword */
ON return ON; /* Keyword */
END_RESOURCE return END_RESOURCE; /* Keyword */
VAR_CONFIG return VAR_CONFIG; /* Keyword */
VAR_ACCESS return VAR_ACCESS; /* Keyword */
END_VAR return END_VAR; /* Keyword */
WITH return WITH; /* Keyword */
PROGRAM return PROGRAM; /* Keyword */
RETAIN return RETAIN; /* Keyword */
NON_RETAIN return NON_RETAIN; /* Keyword */
READ_WRITE return READ_WRITE; /* Keyword */
READ_ONLY return READ_ONLY; /* Keyword */
/* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when
* it is explicitly required and we are not expecting any identifiers
* that could also use the same letter sequence (i.e. an identifier: piority, ...)
*/
<task_init_state>{
PRIORITY return PRIORITY;
SINGLE return SINGLE;
INTERVAL return INTERVAL;
}
/***********************************/
/* B 2.1 Instructions and Operands */
/***********************************/
<il_state>\n return EOL;
/*******************/
/* B 2.2 Operators */
/*******************/
/* NOTE: we can't have flex return the same token for
* ANDN and &N, neither for AND and &, since
* AND and ANDN are considered valid variable
* function or functionblock type names!
* This means that the parser may decide that the
* AND or ANDN strings found in the source code
* are being used as variable names
* and not as operators, and will therefore transform
* these tokens into indentifier tokens!
* We can't have the parser thinking that the source
* code contained the string AND (which may be interpreted
* as a vairable name) when in reality the source code
* merely contained the character &, so we use two
* different tokens for & and AND (and similarly
* ANDN and &N)!
*/
/* The following tokens clash with ST expression operators and Standard Functions */
/* They are also keywords! */
AND return AND; /* Keyword */
MOD return MOD; /* Keyword */
OR return OR; /* Keyword */
XOR return XOR; /* Keyword */
NOT return NOT; /* Keyword */
/* The following tokens clash with Standard Functions */
/* They are keywords because they are a function name */
<il_state>{
ADD return ADD; /* Keyword (Standard Function) */
DIV return DIV; /* Keyword (Standard Function) */
EQ return EQ; /* Keyword (Standard Function) */
GE return GE; /* Keyword (Standard Function) */
GT return GT; /* Keyword (Standard Function) */
LE return LE; /* Keyword (Standard Function) */
LT return LT; /* Keyword (Standard Function) */
MUL return MUL; /* Keyword (Standard Function) */
NE return NE; /* Keyword (Standard Function) */
SUB return SUB; /* Keyword (Standard Function) */
}
/* The following tokens clash with SFC action qualifiers */
/* They are not keywords! */
<il_state>{
S return S;
R return R;
}
/* The following tokens clash with ST expression operators */
& return AND2; /* NOT a Delimiter! */
/* The following tokens have no clashes */
/* They are not keywords! */
<il_state>{
LD return LD;
LDN return LDN;
ST return ST;
STN return STN;
S1 return S1;
R1 return R1;
CLK return CLK;
CU return CU;
CD return CD;
PV return PV;
IN return IN;
PT return PT;
ANDN return ANDN;
&N return ANDN2;
ORN return ORN;
XORN return XORN;
CAL return CAL;
CALC return CALC;
CALCN return CALCN;
RET return RET;
RETC return RETC;
RETCN return RETCN;
JMP return JMP;
JMPC return JMPC;
JMPCN return JMPCN;
}
/***********************/
/* B 3.1 - Expressions */
/***********************/
"**" return OPER_EXP; /* NOT a Delimiter! */
"<>" return OPER_NE; /* NOT a Delimiter! */
">=" return OPER_GE; /* NOT a Delimiter! */
"<=" return OPER_LE; /* NOT a Delimiter! */
& return AND2; /* NOT a Delimiter! */
AND return AND; /* Keyword */
XOR return XOR; /* Keyword */
OR return OR; /* Keyword */
NOT return NOT; /* Keyword */
MOD return MOD; /* Keyword */
/*****************************************/
/* B 3.2.2 Subprogram Control Statements */
/*****************************************/
:= return ASSIGN; /* Delimiter */
=> return SENDTO; /* Delimiter */
RETURN return RETURN; /* Keyword */
/********************************/
/* B 3.2.3 Selection Statements */
/********************************/
IF return IF; /* Keyword */
THEN return THEN; /* Keyword */
ELSIF return ELSIF; /* Keyword */
ELSE return ELSE; /* Keyword */
END_IF return END_IF; /* Keyword */
CASE return CASE; /* Keyword */
OF return OF; /* Keyword */
ELSE return ELSE; /* Keyword */
END_CASE return END_CASE; /* Keyword */
/********************************/
/* B 3.2.4 Iteration Statements */
/********************************/
FOR return FOR; /* Keyword */
TO return TO; /* Keyword */
BY return BY; /* Keyword */
DO return DO; /* Keyword */
END_FOR return END_FOR; /* Keyword */
WHILE return WHILE; /* Keyword */
DO return DO; /* Keyword */
END_WHILE return END_WHILE; /* Keyword */
REPEAT return REPEAT; /* Keyword */
UNTIL return UNTIL; /* Keyword */
END_REPEAT return END_REPEAT; /* Keyword */
EXIT return EXIT; /* Keyword */
/********************************************************/
/********************************************************/
/********************************************************/
/***** *****/
/***** *****/
/***** N O W W O R K W I T H V A L U E S *****/
/***** *****/
/***** *****/
/********************************************************/
/********************************************************/
/********************************************************/
/********************************************/
/* B.1.4.1 Directly Represented Variables */
/********************************************/
{direct_variable} {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}
/******************************************/
/* B 1.4.3 - Declaration & Initialisation */
/******************************************/
{incompl_location} {yylval.ID=strdup(yytext); return incompl_location_token;}
/************************/
/* B 1.2.3.1 - Duration */
/************************/
{fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;}
{interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
{erroneous_interval} {return erroneous_interval_token;}
<time_literal_state>{
{integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
{integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
{integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
{integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
{integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
{fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
{fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
{fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
{fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
{fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}
_ /* do nothing - eat it up!*/
\# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
. {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
\n {ERROR;}
}
/*******************************/
/* B.1.2.2 Character Strings */
/*******************************/
{double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
{single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}
/******************************/
/* B.1.2.1 Numeric literals */
/******************************/
{integer} {yylval.ID=strdup(yytext); return integer_token;}
{real} {yylval.ID=strdup(yytext); return real_token;}
{binary_integer} {yylval.ID=strdup(yytext); return binary_integer_token;}
{octal_integer} {yylval.ID=strdup(yytext); return octal_integer_token;}
{hex_integer} {yylval.ID=strdup(yytext); return hex_integer_token;}
/*****************************************/
/* B.1.1 Letters, digits and identifiers */
/*****************************************/
<st_state>{identifier}/({st_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
<il_state>{identifier}/({il_whitespace_or_pragma_or_comment})"=>" {yylval.ID=strdup(yytext); return sendto_identifier_token;}
{identifier} {yylval.ID=strdup(yytext);
// printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
return get_identifier_token(yytext);}
/************************************************/
/************************************************/
/************************************************/
/***** *****/
/***** *****/
/***** T H E L E F T O V E R S . . . *****/
/***** *****/
/***** *****/
/************************************************/
/************************************************/
/************************************************/
/* do the single character tokens...
*
* e.g.: ':' '(' ')' '+' '*' ...
*/
. {return yytext[0];}
%%
/*************************/
/* Tracking Functions... */
/*************************/
#define MAX_LINE_LENGTH 1024
tracking_t *GetNewTracking(FILE* in_file) {
tracking_t* new_env = new tracking_t;
new_env->eof = 0;
new_env->lineNumber = 1;
new_env->currentChar = 0;
new_env->lineLength = 0;
new_env->currentTokenStart = 0;
new_env->in_file = in_file;
return new_env;
}
void FreeTracking(tracking_t *tracking) {
delete tracking;
}
void UpdateTracking(const char *text) {
const char *newline, *token = text;
while ((newline = strchr(token, '\n')) != NULL) {
token = newline + 1;
current_tracking->lineNumber++;
current_tracking->currentChar = 1;
}
current_tracking->currentChar += strlen(token);
}
/* GetNextChar: reads a character from input */
int GetNextChar(char *b, int maxBuffer) {
int res = fgetc(current_tracking->in_file);
if ( res == EOF )
return 0;
*b = (char)res;
return 1;
}
/***********************************/
/* Utility function definitions... */
/***********************************/
/* print the include file stack to stderr... */
void print_include_stack(void) {
int i;
if ((include_stack_ptr - 1) >= 0)
fprintf (stderr, "in file ");
for (i = include_stack_ptr - 1; i >= 0; i--)
fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
}
/* set the internal state variables of lexical analyser to process a new include file */
void handle_include_file_(FILE *filehandle, const char *filename) {
if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
fprintf(stderr, "Includes nested too deeply\n");
exit( 1 );
}
yyin = filehandle;
include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
include_stack[include_stack_ptr].env = current_tracking;
include_stack[include_stack_ptr].filename = current_filename;
current_filename = strdup(filename);
current_tracking = GetNewTracking(yyin);
include_stack_ptr++;
/* switch input buffer to new file... */
yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
}
/* insert the code (in <source_code>) into the source code we are parsing.
* This is done by creating an artificial file with that new source code, and then 'including' the file
*/
void include_string_(const char *source_code) {
FILE *tmp_file = tmpfile();
if(tmp_file == NULL) {
perror("Error creating temp file.");
exit(EXIT_FAILURE);
}
fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
rewind(tmp_file);
/* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
handle_include_file_(tmp_file, "");
//fclose(tmp_file); /* do NOT close file. It must only be closed when we finish reading from it! */
}
/* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
void include_file(const char *filename) {
FILE *filehandle = NULL;
for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
char *full_name;
full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
if (full_name == NULL) {
fprintf(stderr, "Out of memory!\n");
exit( 1 );
}
filehandle = fopen(full_name, "r");
free(full_name);
}
if (NULL == filehandle) {
fprintf(stderr, "Error opening included file %s\n", filename);
exit( 1 );
}
/* now process the new file... */
handle_include_file_(filehandle, filename);
}
/* return the specified character to the input stream */
/* WARNING: this function destroys the contents of yytext */
void unput_char(const char c) {
/* NOTE: The following uncomented code is not necessary as we currently use a different algorithm:
* - make a backup/snapshot of the current tracking data (in previous_tracking variable)
* (done in YY_USER_ACTION)
* - restore the previous tracking state when we unput any text...
* (in unput_text() and unput_and_mark() )
*/
// /* We will later be processing this same character again when it is read from the input strem,
// * and therefore we will be incrementing the line number and character column acordingly.
// * We must therefore try to 'undo' the changes to the line number and character column
// * so this character is not counted twice!
// */
// if (c == '\n') {
// current_tracking->lineNumber--;
// /* We should now set the current_tracking->currentChar to the length of the previous line
// * But we currently have no way of knowing it, so we simply set it to 0.
// * I (msousa) don't think this is currently an issue because I don't believe the code
// * ever calls unput_char() with a '\n', so we leave it for now
// */
// current_tracking->currentChar = 0;
// } else if (current_tracking->currentChar > 0) {
// current_tracking->currentChar--;
// }
unput(c); // unput() destroys the contents of yytext !!
}
/* return all the text in the current token back to the input stream, except the first n chars. */
void unput_text(int n) {
if (n < 0) ERROR;
signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !!
char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
for (int i = yyleng-1; i >= n; i--)
unput_char(yycopy[i]);
*current_tracking = previous_tracking;
yycopy[n] = '\0';
UpdateTracking(yycopy);
free(yycopy);
}
/* return all the text in the current token back to the input stream,
* but first return to the stream an additional character to mark the end of the token.
*/
void unput_and_mark(const char mark_char) {
char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
unput_char(mark_char);
for (int i = yyleng-1; i >= 0; i--)
unput_char(yycopy[i]);
free(yycopy);
*current_tracking = previous_tracking;
}
/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
* and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
* Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
* once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
* the body_state.
*/
/* The buffer used by the body_state state */
char *bodystate_buffer = NULL;
bool bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace.
tracking_t bodystate_init_tracking;
/* append text to bodystate_buffer */
void append_bodystate_buffer(const char *text, int is_whitespace) {
// printf("<<<append_bodystate_buffer>>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
long int old_len = 0;
// make backup of tracking if we are starting off a new body_state_buffer
if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking;
// set bodystate_is_whitespace flag if we are starting a new buffer
if (NULL == bodystate_buffer) bodystate_is_whitespace = 1;
// set bodystate_is_whitespace flag to FALSE if we are adding non white space to buffer
if (!is_whitespace) bodystate_is_whitespace = 0;
if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer);
bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1);
if (NULL == bodystate_buffer) ERROR;
strcpy(bodystate_buffer + old_len, text);
//printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer);
}
/* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */
void unput_bodystate_buffer(void) {
if (NULL == bodystate_buffer) ERROR;
// printf("<<<unput_bodystate_buffer>>>\n%s\n", bodystate_buffer);
for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--)
unput_char(bodystate_buffer[i]);
free(bodystate_buffer);
bodystate_buffer = NULL;
bodystate_is_whitespace = 1;
*current_tracking = bodystate_init_tracking;
}
/* Return true if bodystate_buffer is empty or ony contains whitespace!! */
int isempty_bodystate_buffer(void) {
if (NULL == bodystate_buffer) return 1;
if (bodystate_is_whitespace) return 1;
return 0;
}
/* Delete all data in bodystate. */
/* Will be used to delete ST whitespace when not needed. If not deleted this whitespace
* will be prepended to the next text block of code being appended to bodystate_buffer,
* which may cause trouble if it is IL code
*/
void del_bodystate_buffer(void) {
free(bodystate_buffer);
bodystate_buffer = NULL;
bodystate_is_whitespace = 1;
}
/* Called by flex when it reaches the end-of-file */
int yywrap(void)
{
/* We reached the end of the input file... */
/* Should we continue with another file? */
/* If so:
* open the new file...
* return 0;
*/
/* to stop processing...
* return 1;
*/
return 1; /* Stop scanning at end of input file. */
}
/*******************************/
/* Public Interface for Bison. */
/*******************************/
/* The following functions will be called from inside bison code! */
void include_string(const char *source_code) {include_string_(source_code);}
/* Tell flex which file to parse. This function will not imediately start parsing the file.
* To parse the file, you then need to call yyparse()
*
* Returns NULL on error opening the file (and a valid errno), or 0 on success.
* Caller must close the file!
*/
FILE *parse_file(const char *filename) {
FILE *filehandle = NULL;
if((filehandle = fopen(filename, "r")) != NULL) {
yyin = filehandle;
current_filename = strdup(filename);
current_tracking = GetNewTracking(yyin);
}
return filehandle;
}
/*************************************/
/* Include a main() function to test */
/* the token parsing by flex.... */
/*************************************/
#ifdef TEST_MAIN
#include "../util/symtable.hh"
yystype yylval;
YYLTYPE yylloc;
int get_identifier_token(const char *identifier_str) {return 0;}
int get_direct_variable_token(const char *direct_variable_str) {return 0;}
int main(int argc, char **argv) {
FILE *in_file;
int res;
if (argc == 1) {
/* Work as an interactive (command line) parser... */
while((res=yylex()))
fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
} else {
/* Work as non-interactive (file) parser... */
if((in_file = fopen(argv[1], "r")) == NULL) {
char *errmsg = strdup2("Error opening main file ", argv[1]);
perror(errmsg);
free(errmsg);
return -1;
}
/* parse the file... */
yyin = in_file;
current_filename = argv[1];
while(1) {
res=yylex();
fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
}
}
return 0;
}
#endif