stage1_2/iec_flex.ll
author Andrey Skvortsov <andrej.skvortzov@gmail.com>
Sun, 14 Oct 2018 20:14:13 +0300
changeset 1073 24ef30a9bcee
parent 1065 0066fe31a034
child 1074 c46b3d3c9441
permissions -rw-r--r--
revert commits improved performance of some extensible Standard Functions (ADD, MUL, AND, OR, XOR)

Following commits are reverted:
mjsousa 0b275a2 improve performance of some extensible Standard Functions (ADD, MUL, AND, OR, XOR) -- increase hardcoded limit to 499
mjsousa 2228799 improve performance of some extensible Standard Functions (ADD, MUL, AND, OR, XOR) -- Add comments!!
mjsousa ce81fa6 improve performance of some extensible Standard Functions (ADD, MUL, AND, OR, XOR)"

The reason is that they cause regression in some cases (if function is
used as argument for function block, for example) and this is not
fixed for a long time.
/*
 *  matiec - a compiler for the programming languages defined in IEC 61131-3
 *
 *  Copyright (C) 2003-2011  Mario de Sousa (msousa@fe.up.pt)
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of thest_whitespaceLicense, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 *
 * This code is made available on the understanding that it will not be
 * used in safety-critical situations without a full and competent review.
 */

/*
 * An IEC 61131-3 compiler.
 *
 * Based on the
 * FINAL DRAFT - IEC 61131-3, 2nd Ed. (2001-12-10)
 *
 */

/*
 * Stage 1
 * =======
 *
 * This file contains the lexical tokens definitions, from which
 * the flex utility will generate a lexical parser function.
 */




/*****************************/
/* Lexical Parser Options... */
/*****************************/

/* The lexical analyser will never work in interactive mode,
 * i.e., it will only process programs saved to files, and never
 * programs being written inter-actively by the user.
 * This option saves the resulting parser from calling the
 * isatty() function, that seems to be generating some compile
 * errors under some (older?) versions of flex.
 */
%option never-interactive

/* Have the lexical analyser use a 'char *yytext' instead of an
 * array of char 'char yytext[??]' to store the lexical token.
 */
%pointer


/* Have the lexical analyser ignore the case of letters.
 * This will occur for all the tokens and keywords, but
 * the resulting text handed up to the syntax parser
 * will not be changed, and keep the original case
 * of the letters in the input file.
 */
%option case-insensitive

/* Have the generated lexical analyser keep track of the
 * line number it is currently analysing.
 * This is used to pass up to the syntax parser
 * the number of the line on which the current
 * token was found. It will enable the syntax parser
 * to generate more informatve error messages...
 */
%option yylineno

/* required for the use of the yy_pop_state() and
 * yy_push_state() functions
 */
%option stack

/* The '%option stack' also requests the inclusion of 
 * the yy_top_state(), however this function is not
 * currently being used. This means that the compiler
 * is complaining about the existance of this function.
 * The following option removes the yy_top_state()
 * function from the resulting c code, so the compiler 
 * no longer complains.
 */
%option noyy_top_state

/* We will be using unput() in our flex code, so we cannot set the following option!... */
/*
%option nounput
*/

/* The '%option debug' makes the generated scanner run in
 * debug mode.
%option debug
 */

/**************************************************/
/* External Variable and Function declarations... */
/**************************************************/


%{
/* Define TEST_MAIN to include a main() function.
 * Useful for testing the parser generated by flex.
 */
/*
#define TEST_MAIN
*/
/* If lexical parser is compiled by itself, we need to define the following
 * constant to some string. Under normal circumstances LIBDIRECTORY is set
 * in the syntax parser header file...
 */
#ifdef TEST_MAIN
#define DEFAULT_LIBDIR "just_testing"
#endif



/* Required for strdup() */
#include <string.h>

/* Required only for the declaration of abstract syntax classes
 * (class symbol_c; class token_c; class list_c;)
 * These will not be used in flex, but the token type union defined
 * in iec_bison.hh contains pointers to these classes, so we must include
 * it here.
 */
#include "../absyntax/absyntax.hh"


/* iec_bison.hh is generated by bison.
 * Contains the definition of the token constants, and the
 * token value type YYSTYPE (in our case, a 'const char *')
 */
#include "iec_bison.hh"
#include "stage1_2_priv.hh"


/* Variable defined by the bison parser,
 * where the value of the tokens will be stored
 */
extern YYSTYPE yylval;

/* The name of the file currently being parsed...
 * Note that flex accesses and updates this global variable
 * apropriately whenever it comes across an (*#include <filename> *) directive...
 */
const char *current_filename = NULL;



/* Variable defined by the bison parser.
 * It must be initialised with the location
 * of the token being parsed.
 * This is only needed if we want to keep
 * track of the locations, in order to give
 * more meaningful error messages!
 */
/*
 *extern YYLTYPE yylloc;
b*/
#define YY_INPUT(buf,result,max_size)  {\
    result = GetNextChar(buf, max_size);\
    if (  result <= 0  )\
      result = YY_NULL;\
    }


/* Macro that is executed for every action.
 * We use it to pass the location of the token
 * back to the bison parser...
 */
#define YY_USER_ACTION {\
	previous_tracking   =*current_tracking;					\
	yylloc.first_line   = current_tracking->lineNumber;			\
	yylloc.first_column = current_tracking->currentChar;			\
	yylloc.first_file   = current_filename;					\
	yylloc.first_order  = current_order;					\
	\
	UpdateTracking(yytext);							\
	\
	yylloc.last_line    = current_tracking->lineNumber;			\
	yylloc.last_column  = current_tracking->currentChar - 1;		\
	yylloc.last_file    = current_filename;					\
	yylloc.last_order   = current_order;					\
	\
	current_tracking->currentTokenStart = current_tracking->currentChar;	\
	current_order++;							\
	}


/* Since this lexical parser we defined only works in ASCII based
 * systems, we might as well make sure it is being compiled on
 * one...
 * Lets check a few random characters...
 */
#if (('a' != 0x61) || ('A' != 0x41) || ('z' != 0x7A) || ('Z' != 0x5A) || \
     ('0' != 0x30) || ('9' != 0x39) || ('(' != 0x28) || ('[' != 0x5B))
#error This lexical analyser is not portable to a non ASCII based system.
#endif


/* Function only called from within flex, but defined
 * in iec.y!
 * We declare it here...
 *
 * Search for a symbol in either of the two symbol tables
 * and return the token id of the first symbol found.
 * Searches first in the variables, and only if not found
 * does it continue searching in the library elements
 */
//token_id_t get_identifier_token(const char *identifier_str);
int get_identifier_token(const char *identifier_str);
%}


/***************************************************/
/* Forward Declaration of functions defined later. */
/***************************************************/

%{
void UpdateTracking(const char *text);
/* return the character back to the input stream. */
void unput_char(const char c);
/* return all the text in the current token back to the input stream. */
void unput_text(int n);
/* return all the text in the current token back to the input stream, 
 * but first return to the stream an additional character to mark the end of the token. 
 */
void unput_and_mark(const char mark_char);

void include_file(const char *include_filename);

/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
 * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
 * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
 * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
 * the body_state.
 */
void  append_bodystate_buffer(const char *text, int is_whitespace = 0);
void   unput_bodystate_buffer(void);
int  isempty_bodystate_buffer(void);

int GetNextChar(char *b, int maxBuffer);
%}



/****************************/
/* Lexical Parser States... */
/****************************/

/* NOTE: Our psrser can parse st or il code, intermixed
 *       within the same file.
 *       With IL we come across the issue of the EOL (end of line) token.
 *       ST, and the declaration parts of IL do not use this token!
 *       If the lexical analyser were to issue this token during ST
 *       language parsing, or during the declaration of data types,
 *       function headers, etc. in IL, the syntax parser would crash.
 *
 *       We can solve this issue using one of three methods:
 *        (1) Augment all the syntax that does not accept the EOL
 *            token to simply ignore it. This makes the syntax
 *            definition (in iec.y) very cluttered!
 *        (2) Let the lexical parser figure out which language
 *            it is parsing, and decide whether or not to issue
 *            the EOL token. This requires the lexical parser
 *            to have knowledge of the syntax!, making for a poor
 *            overall organisation of the code. It would also make it
 *            very difficult to understand the lexical parser as it
 *            would use several states, and a state machine to transition
 *            between the states. The state transitions would be
 *            intermingled with the lexical parser defintion!
 *        (3) Use a mixture of (1) and (2). The lexical analyser
 *            merely distinguishes between function headers and function
 *            bodies, but no longer makes a distinction between il and
 *            st language bodies. When parsing a body, it will return
 *            the EOL token. In other states '\n' will be ignored as
 *            whitespace.
 *            The ST language syntax has been augmented in the syntax
 *            parser configuration to ignore any EOL tokens that it may
 *            come across!
 *            This option has both drawbacks of option (1) and (2), but
 *            much less intensely.
 *            The syntax that gets cluttered is limited to the ST statements
 *            (which is rather limited, compared to the function headers and
 *            data type declarations, etc...), while the state machine in
 *            the lexical parser becomes very simple. All state transitions
 *            can be handled within the lexical parser by itself, and can be
 *            easily identified. Thus knowledge of the syntax required by
 *            the lexical parser is very limited!
 *
 * Amazingly enough, I (Mario) got to implement option (3)
 * at first, requiring two basic states, decl and body.
 * The lexical parser will enter the body state when
 * it is parsing the body of a function/program/function block. The
 * state transition is done when we find a VAR_END that is not followed
 * by a VAR! This is the syntax knowledge that gets included in the
 * lexical analyser with this option!
 * Unfortunately, getting the st syntax parser to ignore EOL anywhere
 * where they might appear leads to conflicts. This is due to the fact
 * that the syntax parser uses the single look-ahead token to remove
 * possible conflicts. When we insert a possible EOL, the single
 * look ahead token becomes the EOL, which means the potential conflicts
 * could no longer be resolved.
 * Removing these conflicts would make the st syntax parser very convoluted,
 * and adding the extraneous EOL would make it very cluttered.
 * This option was therefore dropped in favour of another!
 *
 * I ended up implementing (2). Unfortunately the lexical analyser can
 * not easily distinguish between il and st code, since function
 * calls in il are very similar to function block calls in st.
 * We therefore use an extra 'body' state. When the lexical parser
 * finds that last END_VAR, it enters the body state. This state
 * must figure out what language is being parsed from the first few
 * tokens, and switch to the correct state (st, il or sfc) according to the
 * language. This means that we insert quite a bit of knowledge of the
 * syntax of the languages into the lexical parser. This is ugly, but it
 * works, and at least it is possible to keep all the state changes together
 * to make it easier to remove them later on if need be.
 * Once the language being parsed has been identified, 
 * the body state returns any matched text back to the buffer with unput(),
 * to be later matched correctly by the apropriate language parser (st, il or sfc).
 *
 * Aditionally, in sfc state it may further recursively enter the body state
 * once again. This is because an sfc body may contain ACTIONS, which are then
 * written in one of the three languages (ST, IL or SFC), so once again we need
 * to figure out which language the ACTION in the SFC was written in. We already
 * ahve all that done in the body state, so we recursively transition to the body 
 * state once again.
 * Note that in this case, when coming out of the st/il state (whichever language
 * the action was written in) the sfc state will become active again. This is done by
 * pushing and poping the previously active state!
 *
 * The sfc_qualifier_state is required because when parsing actions within an
 * sfc, we will be expecting action qualifiers (N, P, R, S, DS, SD, ...). In order
 * to bison to work correctly, these qualifiers must be returned as tokens. However,
 * these tokens are not reserved keywords, which means it should be possible to
 * define variables/functions/FBs with any of these names (including 
 * S and R which are special because they are also IL operators). So, when we are not
 * expecting any action qualifiers, flex does not return these tokens, and is free
 * to interpret them as previously defined variables/functions/... as the case may be.
 *
 * The time_literal_state is required because TIME# literals are decomposed into 
 * portions, and wewant to send these portions one by one to bison. Each poertion will 
 * represent the value in days/hours/minutes/seconds/ms.
 * Unfortunately, some of these portions may also be lexically analysed as an identifier. So,
 * we need to disable lexical identification of identifiers while parsing TIME# literals!
 * e.g.:  TIME#55d_4h_56m
 *       We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m'
 *       Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h',
 *       and return that identifier instead! So, we added this state!
 *
 * The ignore_pou_state state is only used when bison says it is doing the pre-parsing.
 * During pre-parsing, the main state machine will only transition between
 * INITIAL and ignore_pou_state, and from here back to INITIAL. All other
 * transitions are inhibited. This inhibition is actually just enforced by making
 * sure that the INITIAL ---> ignore_pou_state transition is tested before all other
 * transitions coming out of INITIAL state. All other transitions are unaffected, as they
 * never get a chance to be evaluated when bison is doing pre-parsing.
 * Pre-parsing is a first quick scan through the whole input source code simply
 * to determine the list of POUs and datatypes that will be defined in that
 * code. Basically, the objective is to fill up the previously_declared_xxxxx
 * maps, without processing the code itself. Once these maps have been filled up,
 * bison will throw away the AST (abstract syntax tree) created up to that point, 
 * and scan through the same source code again, but this time creating a correct AST.
 * This pre-scan allows the source code to reference POUs and datatypes that are
 * only declared after they are used!
 * 
 *
 * Here is a main state machine...
 *                                                                         --+  
 *                                                                           |  these states are
 *              +------------> get_pou_name_state  ----> ignore_pou_state    |  only active 
 *              |                                            |               |  when bison is 
 *              |  ------------------------------------------+               |  doing the 
 *              |  |                                                         |  pre-parsing!!
 *              |  v                                                       --+
 *       +---> INITIAL <-------> config
 *       |        \
 *       |        V
 *       |   header_state
 *       |        |
 *       |        V
 *     vardecl_list_state <------> var_decl
 *       ^        | 
 *       |        | [using push()]
 *       |        |
 *       |        V
 *       |       body, 
 *       |        |
 *       |        | 
 *       |   -------------------
 *       |   |       |         |
 *       |   v       v         v
 *       |  st      il        sfc
 *       |   |       |         |  [using pop() when leaving st/il/sfc => goes to vardecl_list_state]
 *       |   |       |         |
 *       -----------------------
 *
 * NOTE:- When inside sfc, and an action or transition in ST/IL is found, then 
 *        we also push() to the body state. This means that sometimes, when pop()ing
 *        from st and il, the state machine may return to the sfc state!
 *      - The transitions form sfc to body will be decided by bison, which will
 *        tell flex to do the transition by calling cmd_goto_body_state().
 *   
 * 
 * Possible state changes are:
 *   INITIAL -> goto(ignore_pou_state)
 *               (This transition state is only used when bison says it is doing the pre-parsing.)
 *               (This transition takes precedence over all other transitions!)
 *               (when a FUNCTION, FUNCTION_BLOCK, PROGRAM or CONFIGURATION is found)
 * 
 *   INITIAL -> goto(config_state)
 *                (when a CONFIGURATION is found)
 * 
 *   INITIAL -> goto(header_state)
 *               (when a FUNCTION, FUNCTION_BLOCK, or PROGRAM is found)
 * 
 *   header_state -> goto(vardecl_list_state)
 *               (When the first VAR token is found, i.e. at begining of first VAR .. END_VAR declaration)
 * 
 *  vardecl_list_state -> push current state (vardecl_list_state), and goto(vardecl_state) 
 *                (when a VAR token is found)
 *   vardecl_state -> pop() to (vardecl_list_state) 
 *                (when a END_VAR token is found)
 * 
 *   vardecl_list_state -> push current state (vardecl_list_state), and goto(body_state) 
 *                (when the last END_VAR is found!)
 *
 *   body_state    -> goto(sfc_state)
 *                     (when it figures out it is parsing sfc language)
 *   body_state    -> goto(st_state)
 *                     (when it figures out it is parsing st language)
 *   body_state    -> goto(il_state)
 *                     (when it figures out it is parsing il language)
 *   st_state      -> pop() to vardecl_list_state
 *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
 *                      END_ACTION or END_TRANSITION is found)
 *   il_state      -> pop() to vardecl_list_state
 *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM,
 *                      END_ACTION or END_TRANSITION is found)
 *   sfc_state     -> pop() to vardecl_list_state
 *                     (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
 * 
 *   ignore_pou_state   -> goto(INITIAL)
 *                         (when a END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM or END_CONFIGURATION is found)
 *   vardecl_list_state -> goto(INITIAL)
 *                         (when a END_FUNCTION, END_FUNCTION_BLOCK, or END_PROGRAM is found)
 *   config_state       -> goto(INITIAL)
 *                         (when a END_CONFIGURATION is found)
 * 
 *  
 *   sfc_state     -> push current state(sfc_state); goto(body_state)
 *                     (when parsing an action. This transition is requested by bison)
 *   sfc_state     -> push current state(sfc_state); goto(sfc_qualifier_state)
 *                     (when expecting an action qualifier. This transition is requested by bison)
 *   sfc_qualifier_state -> pop() to sfc_state
 *                     (when no longer expecting an action qualifier. This transition is requested by bison)
 *
 *   config_state  -> push(config_state); goto(task_init_state)
 *                     (when parsing a task initialisation. This transition is requested by bison)
 *   task_init_state -> pop()
 *                     (when no longer parsing task initialisation parameters. This transition is requested by bison)
 *
 * 
 * There is another secondary state machine for parsing comments, another for file_includes, 
 * and yet another for time literals.
 */


/* Bison is in the pre-parsing stage, and we are parsing a POU. Ignore everything up to the end of the POU! */
%x ignore_pou_state
%x get_pou_name_state

/* we are parsing a configuration. */
%s config_state

/* Inside a configuration, we are parsing a task initialisation parameters */
/* This means that PRIORITY, SINGLE and INTERVAL must be handled as
 * tokens, and not as possible identifiers. Note that the above words
 * are not keywords.
 */
%s task_init_state

/* we are looking for the first VAR inside a function's, program's or function block's declaration */
/* This is not exclusive (%x) as we must be able to parse the identifier and data types of a function/FB */
%s header_state

/* we are parsing a function, program or function block sequence of VAR..END_VAR delcarations */
%x vardecl_list_state 
/* a substate of the vardecl_list_state: we are inside a specific VAR .. END_VAR */
%s vardecl_state

/* we will be parsing a function body/action/transition. Whether il/st/sfc remains to be determined */
%x body_state

/* we are parsing il code -> flex must return the EOL tokens!       */
%s il_state

/* we are parsing st code -> flex must not return the EOL tokens!   */
%s st_state

/* we are parsing sfc code -> flex must not return the EOL tokens!  */
%s sfc_state

/* we are parsing sfc code, and expecting an action qualifier.      */
%s sfc_qualifier_state

/* we are parsing sfc code, and expecting the priority token.       */
%s sfc_priority_state

/* we are parsing a TIME# literal. We must not return any {identifier} tokens. */
%x time_literal_state

/* we are parsing a comment. */
%x comment_state


/*******************/
/* File #include's */
/*******************/

/* We extend the IEC 61131-3 standard syntax to allow inclusion
 * of other files, using the IEC 61131-3 pragma directive...
 * The accepted syntax is:
 *  {#include "<filename>"}
 */

/* the "include" states are used for picking up the name of an include file */
%x include_beg
%x include_filename
%x include_end


file_include_pragma_filename	[^\"]*
file_include_pragma_beg		"{#include"{st_whitespace}\"
file_include_pragma_end		\"{st_whitespace}"}"
file_include_pragma			{file_include_pragma_beg}{file_include_pragma_filename}{file_include_pragma_end}


%{

/* A counter to track the order by which each token is processed.
 * NOTE: This counter is not exactly linear (i.e., it does not get incremented by 1 for each token).
 *       i.e.. it may get incremented by more than one between two consecutive tokens.
 *       This is due to the fact that the counter gets incremented every 'user action' in flex,
 *       however not every user action will result in a token being passed to bison.
 *       Nevertheless this is still OK, as we are only interested in the relative
 *       ordering of tokens...
 */
static long int current_order = 0;
  
typedef struct {
    int eof;
    int lineNumber;
    int currentChar;
    int lineLength;
    int currentTokenStart;
    FILE *in_file;
  } tracking_t;

/* A forward declaration of a function defined at the end of this file. */
void FreeTracking(tracking_t *tracking);


#define MAX_INCLUDE_DEPTH 16

typedef struct {
	  YY_BUFFER_STATE buffer_state;
	  tracking_t *env;
	  const char *filename;
	} include_stack_t;

tracking_t * current_tracking = NULL;
tracking_t  previous_tracking;
include_stack_t include_stack[MAX_INCLUDE_DEPTH];
int include_stack_ptr = 0;

const char *INCLUDE_DIRECTORIES[] = {
	DEFAULT_LIBDIR,
	".",
	"/lib",
	"/usr/lib",
	"/usr/lib/iec",
	NULL /* must end with NULL!! */
	};
%}



/*****************************/
/* Prelimenary constructs... */
/*****************************/

/* PRAGMAS */
/* ======= */
/* In order to allow the declaration of POU prototypes (Function, FB, Program, ...),
 * especially the prototypes of Functions and FBs defined in the standard
 * (i.e. standard functions and FBs), we extend the IEC 61131-3 standard syntax 
 * with two pragmas to indicate that the code is to be parsed (going through the 
 * lexical, syntactical, and semantic analysers), but no code is to be generated.
 * 
 * The accepted syntax is:
 *  {no_code_generation begin}
 *    ... prototypes ...
 *  {no_code_generation end}
 * 
 * When parsing these prototypes the abstract syntax tree will be populated as usual,
 * allowing the semantic analyser to correctly analyse the semantics of calls to these
 * functions/FBs. However, stage4 will simply ignore all IEC61131-3 code
 * between the above two pragmas.
 */

disable_code_generation_pragma	"{disable code generation}"
enable_code_generation_pragma	"{enable code generation}"


/* Any other pragma... */
pragma ("{"[^}]*"}")|("{{"([^}]|"}"[^}])*"}}")



/* COMMENTS */
/* ======== */

/* In order to allow nested comments, comments are handled by a specific comment_state state */
/* Whenever a "(*" is found, we push the current state onto the stack, and enter a new instance of the comment_state state.
 * Whenever a "*)" is found, we pop a state off the stack
 */

/* comments... */
comment_beg  "(*"
comment_end  "*)"

/* However, bison has a shift/reduce conflict in bison, when parsing formal function/FB
 * invocations with the 'NOT <variable_name> =>' syntax (which needs two look ahead 
 * tokens to be parsed correctly - and bison being LALR(1) only supports one).
 * The current work around requires flex to completely parse the '<variable_name> =>'
 * sequence. This sequence includes whitespace and/or comments between the 
 * <variable_name> and the "=>" token.
 * 
 * This flex rule (sendto_identifier_token) uses the whitespace/comment as trailing context,
 * which means we can not use the comment_state method of specifying/finding and ignoring 
 * comments.
 * 
 * For this reason only, we must also define what a complete comment looks like, so
 * it may be used in this rule. Since the rule uses the whitespace_or_comment
 * construct as trailing context, this definition of comment must not use any
 * trailing context either.
 * 
 * Aditionally, it is not possible to define nested comments in flex without the use of
 * states, so for this particular location, we do NOT support nested comments.
 */
/* NOTE: this seemingly unnecessary complex definition is required
 *       to be able to eat up comments such as:
 *          '(* Testing... ! ***** ******)'
 *       without using the trailing context command in flex (/{context})
 *       since {comment} itself will later be used with
 *       trailing context ({comment}/{context})
 */
not_asterisk				[^*]
not_close_parenthesis_nor_asterisk	[^*)]
asterisk				"*"
comment_text	({not_asterisk})|(({asterisk}+){not_close_parenthesis_nor_asterisk})
comment		"(*"({comment_text}*)({asterisk}+)")"



/* 3.1 Whitespace */
/* ============== */
/*
 * Whitespace is clearly defined (see IEC 61131-3 v2, section 2.1.4)
 * 
 * Whitespace definition includes the newline character.
 * 
 * However, the standard is inconsistent in that in IL the newline character 
 * is considered a token (EOL - end of line). 
 * In our implementation we therefore have two definitions of whitespace
 *   - one for ST, that includes the newline character
 *   - one for IL without the newline character.
 *
 * IL whitespace is only active while parsing IL code, whereas ST whitespace
 * is used in all other circumstances. Additionally, when parsing IL, the newline
 * character is treated as the EOL token.
 * The above requires the use of a state machine in the lexical parser to track which
 * language is being parsed. This requires that the lexical parser (i.e. flex)
 * have some knowledge of the syntax itself.
 *
 * NOTE: Our definition of whitespace will only work in ASCII!
 *
 * NOTE: we cannot use
 *         st_whitespace	[:space:]*
 *       since we use {st_whitespace} as trailing context. In our case
 *       this would not constitute "dangerous trailing context", but the
 *       lexical generator (i.e. flex) does not know this (since it does
 *       not know which characters belong to the set [:space:]), and will
 *       generate a "dangerous trailing context" warning!
 *       We use this alternative just to stop the flex utility from
 *       generating the invalid (in this case) warning...
 */
/* NOTE: il_whitespace_char is not currenty used, be we include it for completeness */ 
st_whitespace_char		[ \f\n\r\t\v]
il_whitespace_char		[ \f\r\t\v]

st_whitespace			[ \f\n\r\t\v]*
il_whitespace			[ \f\r\t\v]*

st_whitespace_or_pragma_or_commentX	({st_whitespace})|({pragma})|({comment})
il_whitespace_or_pragma_or_commentX	({il_whitespace})|({pragma})|({comment})

st_whitespace_or_pragma_or_comment	{st_whitespace_or_pragma_or_commentX}*
il_whitespace_or_pragma_or_comment	{il_whitespace_or_pragma_or_commentX}*



qualified_identifier	{identifier}(\.{identifier})+



/*****************************************/
/* B.1.1 Letters, digits and identifiers */
/*****************************************/
/* NOTE: The following definitions only work if the host computer
 *       is using the ASCII maping. For e.g., with EBCDIC [A-Z]
 *       contains non-alphabetic characters!
 *       The correct way of doing it would be to use
 *       the [:upper:] etc... definitions.
 *
 *       Unfortunately, further on we need all printable
 *       characters (i.e. [:print:]), but excluding '$'.
 *       Flex does not allow sets to be composed by excluding
 *       elements. Sets may only be constructed by adding new
 *       elements, which means that we have to revert to
 *       [\x20\x21\x23\x25\x26\x28-x7E] for the definition
 *       of the printable characters with the required exceptions.
 *       The above also implies the use of ASCII, but now we have
 *       no way to work around it|
 *
 *       The conclusion is that our parser is limited to ASCII
 *       based host computers!!
 */
letter		[A-Za-z]
digit		[0-9]
octal_digit	[0-7]
hex_digit	{digit}|[A-F]
identifier	({letter}|(_({letter}|{digit})))((_?({letter}|{digit}))*)

/*******************/
/* B.1.2 Constants */
/*******************/

/******************************/
/* B.1.2.1   Numeric literals */
/******************************/
integer         {digit}((_?{digit})*)

/* Some helper symbols for parsing TIME literals... */
integer_0_59    (0(_?))*([0-5](_?))?{digit}
integer_0_19    (0(_?))*([0-1](_?))?{digit}
integer_20_23   (0(_?))*2(_?)[0-3]
integer_0_23    {integer_0_19}|{integer_20_23}
integer_0_999   {digit}((_?{digit})?)((_?{digit})?)


binary_integer  2#{bit}((_?{bit})*)
bit		[0-1]
octal_integer   8#{octal_digit}((_?{octal_digit})*)
hex_integer     16#{hex_digit}((_?{hex_digit})*)
exponent        [Ee]([+-]?){integer}
/* The correct definition for real would be:
 * real		{integer}\.{integer}({exponent}?)
 *
 * Unfortunately, the spec also defines fixed_point (B 1.2.3.1) as:
 * fixed_point		{integer}\.{integer}
 *
 * This means that {integer}\.{integer} could be interpreted
 * as either a fixed_point or a real.
 * I have opted to interpret {integer}\.{integer} as a fixed_point.
 * In order to do this, the definition of real has been changed to:
 * real		{integer}\.{integer}{exponent}
 *
 * This means that the syntax parser now needs to define a real to be
 * either a real_token or a fixed_point_token!
 */
real		{integer}\.{integer}{exponent}


/*******************************/
/* B.1.2.2   Character Strings */
/*******************************/
/*
common_character_representation :=
<any printable character except '$', '"' or "'">
|'$$'
|'$L'|'$N'|'$P'|'$R'|'$T'
|'$l'|'$n'|'$p'|'$r'|'$t'

NOTE: 	$ = 0x24
	" = 0x22
	' = 0x27

	printable chars in ASCII: 0x20-0x7E
*/

esc_char_u		$L|$N|$P|$R|$T
esc_char_l		$l|$n|$p|$r|$t
esc_char		$$|{esc_char_u}|{esc_char_l}
double_byte_char	(${hex_digit}{hex_digit}{hex_digit}{hex_digit})
single_byte_char	(${hex_digit}{hex_digit})

/* WARNING:
 * This definition is only valid in ASCII...
 *
 * Flex includes the function print_char() that defines
 * all printable characters portably (i.e. whatever character
 * encoding is currently being used , ASCII, EBCDIC, etc...)
 * Unfortunately, we cannot generate the definition of
 * common_character_representation portably, since flex
 * does not allow definition of sets by subtracting
 * elements in one set from another set.
 * This means we must build up the defintion of
 * common_character_representation using only set addition,
 * which leaves us with the only choice of defining the
 * characters non-portably...
 */
common_character_representation		[\x20\x21\x23\x25\x26\x28-\x7E]|{esc_char}
double_byte_character_representation 	$\"|'|{double_byte_char}|{common_character_representation}
single_byte_character_representation 	$'|\"|{single_byte_char}|{common_character_representation}


double_byte_character_string	\"({double_byte_character_representation}*)\"
single_byte_character_string	'({single_byte_character_representation}*)'


/************************/
/* B 1.2.3.1 - Duration */
/************************/
fixed_point		{integer}\.{integer}


/* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration,
 *       as its definition does not match the standard's text.
 *       IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that
 *       definition instead!
 *
 *       duration::= ('T' | 'TIME') '#' ['+'|'-'] interval
 *       interval::= days | hours | minutes | seconds | milliseconds
 *       fixed_point  ::= integer [ '.' integer]
 *       days         ::= fixed_point 'd' | integer 'd' ['_'] [ hours ]
 *       hours        ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ]
 *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ]
 *       seconds      ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ]
 *       milliseconds ::= fixed_point 'ms'
 * 
 * 
 *  The original IEC 61131-3 v2 definition is:
 *       duration ::= ('T' | 'TIME') '#' ['-'] interval
 *       interval ::= days | hours | minutes | seconds | milliseconds
 *       fixed_point  ::= integer [ '.' integer]
 *       days         ::= fixed_point 'd' | integer 'd' ['_'] hours
 *       hours        ::= fixed_point 'h' | integer 'h' ['_'] minutes
 *       minutes      ::= fixed_point 'm' | integer 'm' ['_'] seconds
 *       seconds      ::= fixed_point 's' | integer 's' ['_'] milliseconds
 *       milliseconds ::= fixed_point 'ms'
 */

interval_ms_X		({integer_0_999}(\.{integer})?)ms
interval_s_X		{integer_0_59}s(_?{interval_ms_X})?|({integer_0_59}(\.{integer})?s)
interval_m_X		{integer_0_59}m(_?{interval_s_X})?|({integer_0_59}(\.{integer})?m)
interval_h_X		{integer_0_23}h(_?{interval_m_X})?|({integer_0_23}(\.{integer})?h)

interval_ms		{integer}ms|({fixed_point}ms)
interval_s		{integer}s(_?{interval_ms_X})?|({fixed_point}s)
interval_m		{integer}m(_?{interval_s_X})?|({fixed_point}m)
interval_h		{integer}h(_?{interval_m_X})?|({fixed_point}h)
interval_d		{integer}d(_?{interval_h_X})?|({fixed_point}d)

interval		{interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d}


/* to help provide nice error messages, we also parse an incorrect but plausible interval... */
/* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not 
 *      be able to parse any other legal lexcial construct (besides a legal interval, but that
 *      is OK as this rule will appear _after_ the rule to parse legal intervals!).
 */
fixed_point_or_integer  {fixed_point}|{integer}
erroneous_interval	({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)?

/********************************************/
/* B.1.4.1   Directly Represented Variables */
/********************************************/
/* The correct definition, if the standard were to be followed... */

location_prefix			[IQM]
size_prefix			[XBWDL]
direct_variable_standard	%{location_prefix}({size_prefix}?){integer}((.{integer})*)


/* For the MatPLC, we will accept %<identifier>
 * as a direct variable, this being mapped onto the MatPLC point
 * named <identifier>
 */
/* TODO: we should not restrict it to only the accepted syntax
 * of <identifier> as specified by the standard. MatPLC point names
 * have a more permissive syntax.
 *
 * e.g. "P__234"
 *    Is a valid MatPLC point name, but not a valid <identifier> !!
 *    The same happens with names such as "333", "349+23", etc...
 *    How can we handle these more expressive names in our case?
 *    Remember that some direct variable may remain anonymous, with
 *    declarations such as:
 *    VAR
 *       AT %I3 : BYTE := 255;
 *    END_VAR
 *    in which case we are currently using "%I3" as the variable
 *    name.
 */
/* direct_variable_matplc		%{identifier} */
/* direct_variable			{direct_variable_standard}|{direct_variable_matplc} */
direct_variable			{direct_variable_standard}

/******************************************/
/* B 1.4.3 - Declaration & Initialisation */
/******************************************/
incompl_location	%[IQM]\*




%%
	/* fprintf(stderr, "flex: state %d\n", YY_START); */

	/*****************************************************/
	/*****************************************************/
	/*****************************************************/
	/*****                                           *****/
	/*****                                           *****/
	/*****   F I R S T    T H I N G S    F I R S T   *****/
	/*****                                           *****/
	/*****                                           *****/
	/*****************************************************/
	/*****************************************************/
	/*****************************************************/

	/***********************************************************/
	/* Handle requests sent by bison for flex to change state. */
	/***********************************************************/
	if (get_goto_body_state()) {
	  yy_push_state(body_state);
	  rst_goto_body_state();
	}

	if (get_goto_sfc_qualifier_state()) {
	  yy_push_state(sfc_qualifier_state);
	  rst_goto_sfc_qualifier_state();
	}

	if (get_goto_sfc_priority_state()) {
	  yy_push_state(sfc_priority_state);
	  rst_goto_sfc_priority_state();
	}

	if (get_goto_task_init_state()) {
	  yy_push_state(task_init_state);
	  rst_goto_task_init_state();
	}

	if (get_pop_state()) {
	  yy_pop_state();
	  rst_pop_state();
	}

	/***************************/
	/* Handle the pragmas!     */
	/***************************/

	/* We start off by searching for the pragmas we handle in the lexical parser. */
<INITIAL>{file_include_pragma}	unput_text(0); yy_push_state(include_beg);

	/* Pragmas sent to syntax analyser (bison) */
	/* NOTE: In the vardecl_list_state we only process the pragmas between two consecutive VAR .. END_VAR blocks.
	 *       We do not process any pragmas trailing after the last END_VAR. We leave that to the body_state.
	 *       This is because the pragmas are stored in a statement_list or instruction_list (in bison),
	 *       but these lists must start with the special tokens start_IL_body_token/start_ST_body_token.
	 *       This means that these special tokens must be generated (by the body_state) before processing
	 *       the pragme => we cannot process the trailing pragmas in the vardecl_list_state state.
	 */
{disable_code_generation_pragma}				return disable_code_generation_pragma_token;
{enable_code_generation_pragma}					return enable_code_generation_pragma_token;
<vardecl_list_state>{disable_code_generation_pragma}/(VAR)	return disable_code_generation_pragma_token; 
<vardecl_list_state>{enable_code_generation_pragma}/(VAR)	return enable_code_generation_pragma_token;  
<body_state>{disable_code_generation_pragma}			append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
<body_state>{enable_code_generation_pragma}			append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
	/* Any other pragma we find, we just pass it up to the syntax parser...   */
	/* Note that the <body_state> state is exclusive, so we have to include it here too. */
<body_state>{pragma}					append_bodystate_buffer(yytext); /* in body state we do not process any tokens, we simply store them for later processing! */
{pragma}	{/* return the pragmma without the enclosing '{' and '}' */
		 int cut = yytext[1]=='{'?2:1;
		 yytext[strlen(yytext)-cut] = '\0';
		 yylval.ID=strdup(yytext+cut);
		 return pragma_token;
		}
<vardecl_list_state>{pragma}/(VAR) {/* return the pragmma without the enclosing '{' and '}' */
		 int cut = yytext[1]=='{'?2:1;
		 yytext[strlen(yytext)-cut] = '\0';
		 yylval.ID=strdup(yytext+cut);
		 return pragma_token;
		}


	/*********************************/
	/* Handle the file includes!     */
	/*********************************/
<include_beg>{file_include_pragma_beg}	BEGIN(include_filename);

<include_filename>{file_include_pragma_filename}	{
			  /* set the internal state variables of lexical analyser to process a new include file */
			  include_file(yytext);
			  /* switch to whatever state was active before the include file */
			  yy_pop_state();
			  /* now process the new file... */
			}


<<EOF>>			{     /* NOTE: Currently bison is incorrectly using END_OF_INPUT in many rules
			       *       when checking for syntax errors in the input source code.
			       *       This means that in reality flex will be asked to carry on reading the input
			       *       even after it has reached the end of all (including the main) input files.
			       *       In other owrds, we will be called to return more tokens, even after we have
			       *       already returned an END_OF_INPUT token. In this case, we must carry on returning
			       *       more END_OF_INPUT tokens.
			       * 
			       *       However, in the above case we will be asked to carry on reading more tokens 
			       *       from the main input file, after we have reached the end. For this to work
			       *       correctly, we cannot close the main input file!
			       * 
			       *       This is why we WILL be called with include_stack_ptr == 0 multiple times,
			       *       and why we must handle it as a special case
			       *       that leaves the include_stack_ptr unchanged, and returns END_OF_INPUT once again.
			       * 
			       *       As a corollory, flex can never safely close the main input file, and we must ask
			       *       bison to close it!
			       */
			  if (include_stack_ptr == 0) {
			      // fclose(yyin);           // Must not do this!!
			      // FreeTracking(current_tracking); // Must not do this!!
			      /* yyterminate() terminates the scanner and returns a 0 to the 
			       * scanner's  caller, indicating "all done".
			       *	
			       * Our syntax parser (written with bison) has the token	
			       * END_OF_INPUT associated to the value 0, so even though
			       * we don't explicitly return the token END_OF_INPUT
			       * calling yyterminate() is equivalent to doing that. 
			       */ 	
			    yyterminate();
			  } else {
			    fclose(yyin);
			    FreeTracking(current_tracking);
			    --include_stack_ptr;
			    yy_delete_buffer(YY_CURRENT_BUFFER);
			    yy_switch_to_buffer((include_stack[include_stack_ptr]).buffer_state);
			    current_tracking = include_stack[include_stack_ptr].env;
			      /* removing constness of char *. This is safe actually,
			       * since the only real const char * that is stored on the stack is
			       * the first one (i.e. the one that gets stored in include_stack[0],
			       * which is never free'd!
			       */
			    /* NOTE: We do __NOT__ free the malloc()'d memory since 
			     *       pointers to this filename will be kept by many objects
			     *       in the abstract syntax tree.
			     *       This will later be used to provide correct error
			     *       messages during semantic analysis (stage 3)
			     */
			    /* free((char *)current_filename); */
			    current_filename = include_stack[include_stack_ptr].filename;
			    yy_push_state(include_end);
			  }
			}

<include_end>{file_include_pragma_end}	yy_pop_state();
	/* handle the artificial file includes created by include_string(), which do not end with a '}' */
<include_end>.				unput_text(0); yy_pop_state(); 


	/*********************************/
	/* Handle all the state changes! */
	/*********************************/

	/* INITIAL -> header_state */
<INITIAL>{
FUNCTION{st_whitespace} 		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION;
FUNCTION_BLOCK{st_whitespace}		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return FUNCTION_BLOCK;
PROGRAM{st_whitespace}			if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(header_state);/* printf("\nChanging to header_state\n"); */} return PROGRAM;
CONFIGURATION{st_whitespace}		if (get_preparse_state()) BEGIN(get_pou_name_state); else {BEGIN(config_state);/* printf("\nChanging to config_state\n"); */} return CONFIGURATION;
}

<get_pou_name_state>{
{identifier}			BEGIN(ignore_pou_state); yylval.ID=strdup(yytext); return identifier_token;
.				BEGIN(ignore_pou_state); unput_text(0);
}

<ignore_pou_state>{
END_FUNCTION			unput_text(0); BEGIN(INITIAL);
END_FUNCTION_BLOCK		unput_text(0); BEGIN(INITIAL);
END_PROGRAM			unput_text(0); BEGIN(INITIAL);
END_CONFIGURATION		unput_text(0); BEGIN(INITIAL);
.|\n				{}/* Ignore text inside POU! (including the '\n' character!)) */
}


	/* header_state -> (vardecl_list_state) */
	/* NOTE: This transition assumes that all POUs with code (Function, FB, and Program) will always contain
	 *       at least one VAR_XXX block.
	 *      How about functions that do not declare variables, and go directly to the body_state???
	 *      - According to Section 2.5.1.3 (Function Declaration), item 2 in the list, a FUNCTION
	 *        must have at least one input argument, so a correct declaration will have at least
	 *        one VAR_INPUT ... VAR_END construct!
	 *      - According to Section 2.5.2.2 (Function Block Declaration), a FUNCTION_BLOCK
	 *        must have at least one input argument, so a correct declaration will have at least
	 *        one VAR_INPUT ... VAR_END construct!
	 *      - According to Section 2.5.3 (Programs), a PROGRAM must have at least one input
	 *        argument, so a correct declaration will have at least one VAR_INPUT ... VAR_END
	 *        construct!
	 *
	 *       All the above means that we needn't worry about PROGRAMs, FUNCTIONs or
	 *       FUNCTION_BLOCKs that do not have at least one VAR_END before the body_state.
	 *       If the code has an error, and no VAR_END before the body, we will simply
	 *       continue in the <vardecl_state> state, until the end of the FUNCTION, FUNCTION_BLOCK
	 *       or PROGAM.
	 * 
	 * WARNING: From 2016-05 (May 2016) onwards, matiec supports a non-standard option in which a Function
	 *          may be declared with no Input, Output or IN_OUT variables. This means that the above 
	 *          assumption is no longer valid.
	 * 
	 * NOTE: Some code being parsed may be erroneous and not contain any VAR END_VAR block.
	 *       To generate error messages that make sense, the flex state machine should not get lost
	 *       in these situations. We therefore consider the possibility of finding 
	 *       END_FUNCTION, END_FUNCTION_BLOCK or END_PROGRAM when inside the header_state.
	 */
<header_state>{
VAR				| /* execute the next rule's action, i.e. fall-through! */
VAR_INPUT			|
VAR_OUTPUT			|
VAR_IN_OUT			|
VAR_EXTERNAL			|
VAR_GLOBAL			|
VAR_TEMP			|
VAR_CONFIG			|
VAR_ACCESS			unput_text(0); BEGIN(vardecl_list_state);

END_FUNCTION			| /* execute the next rule's action, i.e. fall-through! */
END_FUNCTION_BLOCK		| 
END_PROGRAM			unput_text(0); BEGIN(vardecl_list_state); 
				/* Notice that we do NOT go directly to body_state, as that requires a push().
				 * If we were to puch to body_state here, then the corresponding pop() at the
				 *end of body_state would return to header_state.
				 * After this pop() header_state would not return to INITIAL as it should, but
				 * would instead enter an infitie loop push()ing again to body_state
				 */
}


	/* vardecl_list_state -> (vardecl_state | body_state | INITIAL) */
<vardecl_list_state>{
				/* NOTE: vardecl_list_state is an exclusive state, i.e. when in this state
				 *       default rules do not apply! This means that when in this state identifiers
				 *       are not recognised!
				 * NOTE: Notice that we only change to vardecl_state if the VAR*** is followed by 
				 *       at least one whitespace. This is to dintinguish the VAR declaration
				 *       from identifiers starting with 'var' (e.g. a variable named 'varint')
				 * NOTE: Notice that we cannot use st_whitespace here, as it can legally be empty.
				 *       We therefore use st_whitespace_char instead.
				 */  
VAR_INPUT{st_whitespace_char}		| /* execute the next rule's action, i.e. fall-through! */
VAR_OUTPUT{st_whitespace_char}		|
VAR_IN_OUT{st_whitespace_char}		|
VAR_EXTERNAL{st_whitespace_char}	|
VAR_GLOBAL{st_whitespace_char}		|
VAR_TEMP{st_whitespace_char}		|
VAR_CONFIG{st_whitespace_char}		|
VAR_ACCESS{st_whitespace_char}		|
VAR{st_whitespace_char}			unput_text(0); yy_push_state(vardecl_state); //printf("\nChanging to vardecl_state\n");

END_FUNCTION{st_whitespace}		unput_text(0); BEGIN(INITIAL);
END_FUNCTION_BLOCK{st_whitespace}	unput_text(0); BEGIN(INITIAL);
END_PROGRAM{st_whitespace}		unput_text(0); BEGIN(INITIAL);

				/* NOTE: Handling of whitespace...
				 *   - Must come __before__ the next rule for any single character '.'
				 *   - If the rules were reversed, any whitespace with a single space (' ') 
				 *     would be handled by the '.' rule instead of the {whitespace} rule!
				 */
{st_whitespace}			/* Eat any whitespace */ 

				/* anything else, just change to body_state! */
.				unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");
}


	/* vardecl_list_state -> pop to $previous_state (vardecl_list_state) */
<vardecl_state>{
END_VAR				yy_pop_state(); return END_VAR; /* pop back to vardecl_list_state */
}


	/* body_state -> (il_state | st_state | sfc_state) */
<body_state>{
{st_whitespace}			{/* In body state we do not process any tokens,
				  * we simply store them for later processing!
				  * NOTE: we must return ALL text when in body_state, including
				  * all comments and whitespace, so as not
				  * to lose track of the line_number and column number
				  * used when printing debugging messages.
				  * NOTE: some of the following rules depend on the fact that 
				  * the body state buffer is either empty or only contains white space up to
				  * that point. Since the vardecl_list_state will eat up all
				  * whitespace before entering the body_state, the contents of the bodystate_buffer
				  * will _never_ start with whitespace if the previous state was vardecl_list_state. 
				  * However, it is possible to enter the body_state from other states (e.g. when 
				  * parsing SFC code, that contains transitions or actions in other languages)
				  */
				 append_bodystate_buffer(yytext, 1 /* is whitespace */); 
				}
	/* 'INITIAL_STEP' always used in beginning of SFCs !! */
INITIAL_STEP			{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(sfc_state);}
				  else					{append_bodystate_buffer(yytext);}
				}
 
	/* ':=', at the very beginning of a 'body', occurs only in transitions and not Function, FB, or Program bodies! */
:=				{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(st_state);} /* We do _not_ return a start_ST_body_token here, as bison does not expect it! */
				  else				 	{append_bodystate_buffer(yytext);}
				}
 
	/* check if ';' occurs before an END_FUNCTION, END_FUNCTION_BLOCK, END_PROGRAM, END_ACTION or END_TRANSITION. (If true => we are parsing ST; If false => parsing IL). */
END_ACTION			| /* execute the next rule's action, i.e. fall-through! */
END_FUNCTION			|
END_FUNCTION_BLOCK		|
END_TRANSITION   		|
END_PROGRAM			{ append_bodystate_buffer(yytext); unput_bodystate_buffer(); BEGIN(il_state); /*printf("returning start_IL_body_token\n");*/ return start_IL_body_token;}
.|\n				{ append_bodystate_buffer(yytext);
				  if (strcmp(yytext, ";") == 0)
				    {unput_bodystate_buffer(); BEGIN(st_state); /*printf("returning start_ST_body_token\n");*/ return start_ST_body_token;}
				}
	/* The following rules are not really necessary. They just make compilation faster in case the ST Statement List starts with one fot he following... */
RETURN				| /* execute the next rule's action, i.e. fall-through! */
IF				|
CASE				|
FOR				|
WHILE				|
EXIT				|
REPEAT				{ if (isempty_bodystate_buffer())	{unput_text(0); BEGIN(st_state); return start_ST_body_token;}
				  else				 	{append_bodystate_buffer(yytext);}
				}

}	/* end of body_state lexical parser */


	/* (il_state | st_state) -> pop to $previous_state (vardecl_list_state or sfc_state) */
<il_state,st_state>{
END_FUNCTION		yy_pop_state(); unput_text(0);
END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
END_PROGRAM		yy_pop_state(); unput_text(0);
END_TRANSITION		yy_pop_state(); unput_text(0);
END_ACTION		yy_pop_state(); unput_text(0);
}

	/* sfc_state -> pop to $previous_state (vardecl_list_state or sfc_state) */
<sfc_state>{
END_FUNCTION		yy_pop_state(); unput_text(0);
END_FUNCTION_BLOCK	yy_pop_state(); unput_text(0);
END_PROGRAM		yy_pop_state(); unput_text(0);
}

	/* config -> INITIAL */
END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;



	/***************************************/
	/* Next is to to remove all whitespace */
	/***************************************/
	/* NOTE: pragmas are handled right at the beginning... */

	/* The whitespace */
<INITIAL,header_state,config_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace}	/* Eat any whitespace */
<il_state>{il_whitespace}		/* Eat any whitespace */
 /* NOTE: Due to the need of having the following rule have higher priority,
  *        the following rule was moved to an earlier position in this file.
<body_state>{st_whitespace}		{...}
 */

	/* The comments */
<get_pou_name_state,ignore_pou_state,body_state,vardecl_list_state>{comment_beg}		yy_push_state(comment_state);
{comment_beg}						yy_push_state(comment_state);
<comment_state>{
{comment_beg}						{if (get_opt_nested_comments()) yy_push_state(comment_state);}
{comment_end}						yy_pop_state();
.							/* Ignore text inside comment! */
\n							/* Ignore text inside comment! */
}

	/*****************************************/
	/* B.1.1 Letters, digits and identifiers */
	/*****************************************/
	/* NOTE: 'R1', 'IN', etc... are IL operators, and therefore tokens
	 *       On the other hand, the spec does not define them as keywords,
	 *       which means they may be re-used for variable names, etc...!
	 *       The syntax parser already caters for the possibility of these
	 *       tokens being used for variable names in their declarations.
	 *       When they are declared, they will be added to the variable symbol table!
	 *       Further appearances of these tokens must no longer be parsed
	 *       as R1_tokens etc..., but rather as variable_name_tokens!
	 *
	 *       That is why the first thing we do with identifiers, even before
	 *       checking whether they may be a 'keyword', is to check whether
	 *       they have been previously declared as a variable name,
	 *
	 *       However, we have a dilema! Should we here also check for
	 *       prev_declared_derived_function_name_token?
	 *       If we do, then the 'MOD' default library function (defined in
	 *       the standard) will always be returned as a function name, and
	 *       it will therefore not be possible to use it as an operator as 
	 *       in the following ST expression 'X := Y MOD Z;' !
	 *       If we don't, then even it will not be possible to use 'MOD'
	 *       as a funtion as in 'X := MOD(Y, Z);'
	 *       We solve this by NOT testing for function names here, and
	 *       handling this function and keyword clash in bison!
	 */
	/* NOTE: The following code has been commented out as most users do not want matiec
	 *       to allow the use of 'R1', 'IN' ... IL operators as identifiers, 
	 *       even though a literal reading of the standard allows this.
	 *       We could add this as a commadnd line option, but it is not yet done.
	 *       For now we just comment out the code, but leave it the commented code
	 *       in so we can re-activate quickly (without having to go through old commits
	 *       in the mercurial repository to figure out the missing code!
	 */
 /*
{identifier} 	{int token = get_identifier_token(yytext);
		 // fprintf(stderr, "flex: analysing identifier '%s'...", yytext); 
		 if ((token == prev_declared_variable_name_token) ||
//		     (token == prev_declared_derived_function_name_token) || // DO NOT add this condition!
		     (token == prev_declared_fb_name_token)) {
		 // if (token != identifier_token)
		 // * NOTE: if we replace the above uncommented conditions with
                  *       the simple test of (token != identifier_token), then 
                  *       'MOD' et al must be removed from the 
                  *       library_symbol_table as a default function name!
		  * //
		   yylval.ID=strdup(yytext);
		   // fprintf(stderr, "returning token %d\n", token); 
		   return token;
		 }
		 // otherwise, leave it for the other lexical parser rules... 
		 // fprintf(stderr, "rejecting\n"); 
		 REJECT;
		}
 */

	/******************************************************/
	/******************************************************/
	/******************************************************/
	/*****                                            *****/
	/*****                                            *****/
	/*****   N O W    D O   T H E   K E Y W O R D S   *****/
	/*****                                            *****/
	/*****                                            *****/
	/******************************************************/
	/******************************************************/
	/******************************************************/


REF	{if (get_opt_ref_standard_extensions()) return REF;        else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
DREF	{if (get_opt_ref_standard_extensions()) return DREF;       else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
REF_TO	{if (get_opt_ref_standard_extensions()) return REF_TO;     else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */
NULL	{if (get_opt_ref_standard_extensions()) return NULL_token; else{REJECT;}}		/* Keyword in IEC 61131-3 v3 */

EN	return EN;			/* Keyword */
ENO	return ENO;			/* Keyword */


	/******************************/
	/* B 1.2.1 - Numeric Literals */
	/******************************/
TRUE		return TRUE;		/* Keyword */
BOOL#1  	return boolean_true_literal_token;
BOOL#TRUE	return boolean_true_literal_token;
SAFEBOOL#1	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
SAFEBOOL#TRUE	{if (get_opt_safe_extensions()) {return safeboolean_true_literal_token;} else{REJECT;}} /* Keyword (Data Type) */

FALSE		return FALSE;		/* Keyword */
BOOL#0  	return boolean_false_literal_token;
BOOL#FALSE  	return boolean_false_literal_token;
SAFEBOOL#0	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */ 
SAFEBOOL#FALSE	{if (get_opt_safe_extensions()) {return safeboolean_false_literal_token;} else{REJECT;}} /* Keyword (Data Type) */


	/************************/
	/* B 1.2.3.1 - Duration */
	/************************/
t#		return T_SHARP;		/* Delimiter */
T#		return T_SHARP;		/* Delimiter */
TIME		return TIME;		/* Keyword (Data Type) */


	/************************************/
	/* B 1.2.3.2 - Time of day and Date */
	/************************************/
TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */
TOD		return TIME_OF_DAY;	/* Keyword (Data Type) */
DATE		return DATE;		/* Keyword (Data Type) */
d#		return D_SHARP;		/* Delimiter */
D#		return D_SHARP;		/* Delimiter */
DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
DT		return DATE_AND_TIME;	/* Keyword (Data Type) */


	/***********************************/
	/* B 1.3.1 - Elementary Data Types */
	/***********************************/
BOOL		return BOOL;		/* Keyword (Data Type) */

BYTE		return BYTE;		/* Keyword (Data Type) */
WORD		return WORD;		/* Keyword (Data Type) */
DWORD		return DWORD;		/* Keyword (Data Type) */
LWORD		return LWORD;		/* Keyword (Data Type) */

SINT		return SINT;		/* Keyword (Data Type) */
INT		return INT;		/* Keyword (Data Type) */
DINT		return DINT;		/* Keyword (Data Type) */
LINT		return LINT;		/* Keyword (Data Type) */

USINT		return USINT;		/* Keyword (Data Type) */
UINT		return UINT;		/* Keyword (Data Type) */
UDINT		return UDINT;		/* Keyword (Data Type) */
ULINT		return ULINT;		/* Keyword (Data Type) */

REAL		return REAL;		/* Keyword (Data Type) */
LREAL		return LREAL;		/* Keyword (Data Type) */

WSTRING		return WSTRING;		/* Keyword (Data Type) */
STRING		return STRING;		/* Keyword (Data Type) */

TIME		return TIME;		/* Keyword (Data Type) */
DATE		return DATE;		/* Keyword (Data Type) */
DT		return DT;		/* Keyword (Data Type) */
TOD		return TOD;		/* Keyword (Data Type) */
DATE_AND_TIME	return DATE_AND_TIME;	/* Keyword (Data Type) */
TIME_OF_DAY	return TIME_OF_DAY;	/* Keyword (Data Type) */

					/* A non-standard extension! */
VOID		{if (runtime_options.allow_void_datatype) {return VOID;}          else {REJECT;}} 


	/*****************************************************************/
	/* Keywords defined in "Safety Software Technical Specification" */
	/*****************************************************************/
        /* 
         * NOTE: The following keywords are define in 
         *       "Safety Software Technical Specification,
         *        Part 1: Concepts and Function Blocks,  
         *        Version 1.0 – Official Release"
         *        written by PLCopen - Technical Committee 5
         *
         *        We only support these extensions and keywords
         *        if the apropriate command line option is given.
         */
SAFEBOOL	     {if (get_opt_safe_extensions()) {return SAFEBOOL;}          else {REJECT;}} 

SAFEBYTE	     {if (get_opt_safe_extensions()) {return SAFEBYTE;}          else {REJECT;}} 
SAFEWORD	     {if (get_opt_safe_extensions()) {return SAFEWORD;}          else {REJECT;}} 
SAFEDWORD	     {if (get_opt_safe_extensions()) {return SAFEDWORD;}         else{REJECT;}}
SAFELWORD	     {if (get_opt_safe_extensions()) {return SAFELWORD;}         else{REJECT;}}
               
SAFEREAL	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
SAFELREAL    	     {if (get_opt_safe_extensions()) {return SAFELREAL;}         else{REJECT;}}
                  
SAFESINT	     {if (get_opt_safe_extensions()) {return SAFESINT;}          else{REJECT;}}
SAFEINT	             {if (get_opt_safe_extensions()) {return SAFEINT;}           else{REJECT;}}
SAFEDINT	     {if (get_opt_safe_extensions()) {return SAFEDINT;}          else{REJECT;}}
SAFELINT             {if (get_opt_safe_extensions()) {return SAFELINT;}          else{REJECT;}}

SAFEUSINT            {if (get_opt_safe_extensions()) {return SAFEUSINT;}         else{REJECT;}}
SAFEUINT             {if (get_opt_safe_extensions()) {return SAFEUINT;}          else{REJECT;}}
SAFEUDINT            {if (get_opt_safe_extensions()) {return SAFEUDINT;}         else{REJECT;}}
SAFEULINT            {if (get_opt_safe_extensions()) {return SAFEULINT;}         else{REJECT;}}

 /* SAFESTRING and SAFEWSTRING are not yet supported, i.e. checked correctly, in the semantic analyser (stage 3) */
 /*  so it is best not to support them at all... */
 /*
SAFEWSTRING          {if (get_opt_safe_extensions()) {return SAFEWSTRING;}       else{REJECT;}}
SAFESTRING           {if (get_opt_safe_extensions()) {return SAFESTRING;}        else{REJECT;}}
 */

SAFETIME             {if (get_opt_safe_extensions()) {return SAFETIME;}          else{REJECT;}}
SAFEDATE             {if (get_opt_safe_extensions()) {return SAFEDATE;}          else{REJECT;}}
SAFEDT               {if (get_opt_safe_extensions()) {return SAFEDT;}            else{REJECT;}}
SAFETOD              {if (get_opt_safe_extensions()) {return SAFETOD;}           else{REJECT;}}
SAFEDATE_AND_TIME    {if (get_opt_safe_extensions()) {return SAFEDATE_AND_TIME;} else{REJECT;}}
SAFETIME_OF_DAY      {if (get_opt_safe_extensions()) {return SAFETIME_OF_DAY;}   else{REJECT;}}

	/********************************/
	/* B 1.3.2 - Generic data types */
	/********************************/
	/* Strangely, the following symbols do not seem to be required! */
	/* But we include them so they become reserved words, and do not
	 * get passed up to bison as an identifier...
	 */
ANY		return ANY;		/* Keyword (Data Type) */
ANY_DERIVED	return ANY_DERIVED;	/* Keyword (Data Type) */
ANY_ELEMENTARY	return ANY_ELEMENTARY;	/* Keyword (Data Type) */
ANY_MAGNITUDE	return ANY_MAGNITUDE;	/* Keyword (Data Type) */
ANY_NUM		return ANY_NUM;		/* Keyword (Data Type) */
ANY_REAL	return ANY_REAL;	/* Keyword (Data Type) */
ANY_INT		return ANY_INT;		/* Keyword (Data Type) */
ANY_BIT		return ANY_BIT;		/* Keyword (Data Type) */
ANY_STRING	return ANY_STRING;	/* Keyword (Data Type) */
ANY_DATE	return ANY_DATE;	/* Keyword (Data Type) */


	/********************************/
	/* B 1.3.3 - Derived data types */
	/********************************/
":="		return ASSIGN;		/* Delimiter */
".."		return DOTDOT;		/* Delimiter */
TYPE		return TYPE;		/* Keyword */
END_TYPE	return END_TYPE;	/* Keyword */
ARRAY		return ARRAY;		/* Keyword */
OF		return OF;		/* Keyword */
STRUCT		return STRUCT;		/* Keyword */
END_STRUCT	return END_STRUCT;	/* Keyword */


	/*********************/
	/* B 1.4 - Variables */
	/*********************/

	/******************************************/
	/* B 1.4.3 - Declaration & Initialisation */
	/******************************************/
VAR_INPUT	return VAR_INPUT;	/* Keyword */
VAR_OUTPUT	return VAR_OUTPUT;	/* Keyword */
VAR_IN_OUT	return VAR_IN_OUT;	/* Keyword */
VAR_EXTERNAL	return VAR_EXTERNAL;	/* Keyword */
VAR_GLOBAL	return VAR_GLOBAL;	/* Keyword */
END_VAR		return END_VAR;		/* Keyword */
RETAIN		return RETAIN;		/* Keyword */
NON_RETAIN	return NON_RETAIN;	/* Keyword */
R_EDGE		return R_EDGE;		/* Keyword */
F_EDGE		return F_EDGE;		/* Keyword */
AT		return AT;		/* Keyword */


	/***********************/
	/* B 1.5.1 - Functions */
	/***********************/
	/* Note: The following END_FUNCTION rule includes a BEGIN(INITIAL); command.
	 *       This is necessary in case the input program being parsed has syntax errors that force
	 *       flex's main state machine to never change to the il_state or the st_state
	 *       after changing to the body_state.
	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
	 *       the input stream even in the presence of buggy code!
	 */
FUNCTION			return FUNCTION;			/* Keyword */
END_FUNCTION	BEGIN(INITIAL);	return END_FUNCTION;			/* Keyword */  /* see Note above */
VAR				return VAR;				/* Keyword */
CONSTANT			return CONSTANT;			/* Keyword */


	/*****************************/
	/* B 1.5.2 - Function Blocks */
	/*****************************/
	/* Note: The following END_FUNCTION_BLOCK rule includes a BEGIN(INITIAL); command.
	 *       This is necessary in case the input program being parsed has syntax errors that force
	 *       flex's main state machine to never change to the il_state or the st_state
	 *       after changing to the body_state.
	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
	 *       the input stream even in the presence of buggy code!
	 */
FUNCTION_BLOCK				return FUNCTION_BLOCK;		/* Keyword */
END_FUNCTION_BLOCK	BEGIN(INITIAL);	return END_FUNCTION_BLOCK;	/* Keyword */  /* see Note above */
VAR_TEMP				return VAR_TEMP;		/* Keyword */
VAR					return VAR;			/* Keyword */
NON_RETAIN				return NON_RETAIN;		/* Keyword */
END_VAR					return END_VAR;			/* Keyword */


	/**********************/
	/* B 1.5.3 - Programs */
	/**********************/
	/* Note: The following END_PROGRAM rule includes a BEGIN(INITIAL); command.
	 *       This is necessary in case the input program being parsed has syntax errors that force
	 *       flex's main state machine to never change to the il_state or the st_state
	 *       after changing to the body_state.
	 *       Ths BEGIN(INITIAL) command forces the flex state machine to re-synchronise with 
	 *       the input stream even in the presence of buggy code!
	 */
PROGRAM				return PROGRAM;				/* Keyword */
END_PROGRAM	BEGIN(INITIAL);	return END_PROGRAM;			/* Keyword */  /* see Note above */


	/********************************************/
	/* B 1.6 Sequential Function Chart elements */
	/********************************************/
	/* NOTE: the following identifiers/tokens clash with the R and S IL operators, as well
	.* as other identifiers that may be used as variable names inside IL and ST programs.
	 * They will have to be handled when we include parsing of SFC... For now, simply
	 * ignore them!
	 */
	 
ACTION		return ACTION;			/* Keyword */
END_ACTION	return END_ACTION;		/* Keyword */

TRANSITION	return TRANSITION;		/* Keyword */
END_TRANSITION	return END_TRANSITION;		/* Keyword */
FROM		return FROM;			/* Keyword */
TO		return TO;			/* Keyword */

INITIAL_STEP	return INITIAL_STEP;		/* Keyword */
STEP		return STEP;			/* Keyword */
END_STEP	return END_STEP;		/* Keyword */

	/* PRIORITY is not a keyword, so we only return it when 
	 * it is explicitly required and we are not expecting any identifiers
	 * that could also use the same letter sequence (i.e. an identifier: piority)
	 */
<sfc_priority_state>PRIORITY	return PRIORITY;

<sfc_qualifier_state>{
L		return L;
D		return D;
SD		return SD;
DS		return DS;
SL		return SL;
N		return N;
P		return P;
P0		return P0;
P1		return P1;
R		return R;
S		return S;
}


	/********************************/
	/* B 1.7 Configuration elements */
	/********************************/
	/* Note: The following END_CONFIGURATION rule will never get to be used, as we have
	 *       another identical rule above (closer to the rules handling the transitions
	 *       of the main state machine) that will always execute before this one.
	 * Note: The following END_CONFIGURATION rule includes a BEGIN(INITIAL); command.
	 *       This is nt strictly necessary, but I place it here so it follwos the same
	 *       pattern used in END_FUNCTION, END_PROGRAM, and END_FUNCTION_BLOCK
	 */
CONFIGURATION				return CONFIGURATION;		/* Keyword */
END_CONFIGURATION	BEGIN(INITIAL); return END_CONFIGURATION;	/* Keyword */   /* see 2 Notes above! */
TASK					return TASK;			/* Keyword */
RESOURCE				return RESOURCE;		/* Keyword */
ON					return ON;			/* Keyword */
END_RESOURCE				return END_RESOURCE;		/* Keyword */
VAR_CONFIG				return VAR_CONFIG;		/* Keyword */
VAR_ACCESS				return VAR_ACCESS;		/* Keyword */
END_VAR					return END_VAR;			/* Keyword */
WITH					return WITH;			/* Keyword */
PROGRAM					return PROGRAM;			/* Keyword */
RETAIN					return RETAIN;			/* Keyword */
NON_RETAIN				return NON_RETAIN;		/* Keyword */
READ_WRITE				return READ_WRITE;		/* Keyword */
READ_ONLY				return READ_ONLY;		/* Keyword */

	/* PRIORITY, SINGLE and INTERVAL are not a keywords, so we only return them when 
	 * it is explicitly required and we are not expecting any identifiers
	 * that could also use the same letter sequence (i.e. an identifier: piority, ...)
	 */
<task_init_state>{
PRIORITY		return PRIORITY;
SINGLE			return SINGLE;
INTERVAL		return INTERVAL;
}

	/***********************************/
	/* B 2.1 Instructions and Operands */
	/***********************************/
<il_state>\n		return EOL;


	/*******************/
	/* B 2.2 Operators */
	/*******************/
	/* NOTE: we can't have flex return the same token for
	 *       ANDN and &N, neither for AND and &, since
	 *       AND and ANDN are considered valid variable
	 *       function or functionblock type names!
	 *       This means that the parser may decide that the
	 *       AND or ANDN strings found in the source code
	 *       are being used as variable names
	 *       and not as operators, and will therefore transform
	 *       these tokens into indentifier tokens!
	 *       We can't have the parser thinking that the source
	 *       code contained the string AND (which may be interpreted
	 *       as a vairable name) when in reality the source code
	 *       merely contained the character &, so we use two
	 *       different tokens for & and AND (and similarly
	 *       ANDN and &N)!
	 */
 /* The following tokens clash with ST expression operators and Standard Functions */
 /* They are also keywords! */
AND		return AND;		/* Keyword */
MOD		return MOD;		/* Keyword */
OR		return OR;		/* Keyword */
XOR		return XOR;		/* Keyword */
NOT		return NOT;		/* Keyword */

 /* The following tokens clash with Standard Functions */
 /* They are keywords because they are a function name */
<il_state>{
ADD		return ADD;		/* Keyword (Standard Function) */
DIV		return DIV;		/* Keyword (Standard Function) */
EQ		return EQ;		/* Keyword (Standard Function) */
GE		return GE;		/* Keyword (Standard Function) */
GT		return GT;		/* Keyword (Standard Function) */
LE		return LE;		/* Keyword (Standard Function) */
LT		return LT;		/* Keyword (Standard Function) */
MUL		return MUL;		/* Keyword (Standard Function) */
NE		return NE;		/* Keyword (Standard Function) */
SUB		return SUB;		/* Keyword (Standard Function) */
}

 /* The following tokens clash with SFC action qualifiers */
 /* They are not keywords! */
<il_state>{
S		return S;
R		return R;
}

 /* The following tokens clash with ST expression operators */
&		return AND2;		/* NOT a Delimiter! */

 /* The following tokens have no clashes */
 /* They are not keywords! */
<il_state>{
LD		return LD;
LDN		return LDN;
ST		return ST;
STN		return STN;
S1		return S1;
R1		return R1;
CLK		return CLK;
CU		return CU;
CD		return CD;
PV		return PV;
IN		return IN;
PT		return PT;
ANDN		return ANDN;
&N		return ANDN2;
ORN		return ORN;
XORN		return XORN;
CAL		return CAL;
CALC		return CALC;
CALCN		return CALCN;
RET		return RET;
RETC		return RETC;
RETCN		return RETCN;
JMP		return JMP;
JMPC		return JMPC;
JMPCN		return JMPCN;
}

	/***********************/
	/* B 3.1 - Expressions */
	/***********************/
"**"		return OPER_EXP;	/* NOT a Delimiter! */
"<>"		return OPER_NE;		/* NOT a Delimiter! */
">="		return OPER_GE;		/* NOT a Delimiter! */
"<="		return OPER_LE;		/* NOT a Delimiter! */
&		return AND2;		/* NOT a Delimiter! */
AND		return AND;		/* Keyword */
XOR		return XOR;		/* Keyword */
OR		return OR;		/* Keyword */
NOT		return NOT;		/* Keyword */
MOD		return MOD;		/* Keyword */


	/*****************************************/
	/* B 3.2.2 Subprogram Control Statements */
	/*****************************************/
:=		return ASSIGN;		/* Delimiter */
=>		return SENDTO;		/* Delimiter */
RETURN		return RETURN;		/* Keyword */


	/********************************/
	/* B 3.2.3 Selection Statements */
	/********************************/
IF		return IF;		/* Keyword */
THEN		return THEN;		/* Keyword */
ELSIF		return ELSIF;		/* Keyword */
ELSE		return ELSE;		/* Keyword */
END_IF		return END_IF;		/* Keyword */

CASE		return CASE;		/* Keyword */
OF		return OF;		/* Keyword */
ELSE		return ELSE;		/* Keyword */
END_CASE	return END_CASE;	/* Keyword */


	/********************************/
	/* B 3.2.4 Iteration Statements */
	/********************************/
FOR		return FOR;		/* Keyword */
TO		return TO;		/* Keyword */
BY		return BY;		/* Keyword */
DO		return DO;		/* Keyword */
END_FOR		return END_FOR;		/* Keyword */

WHILE		return WHILE;		/* Keyword */
DO		return DO;		/* Keyword */
END_WHILE	return END_WHILE;	/* Keyword */

REPEAT		return REPEAT;		/* Keyword */
UNTIL		return UNTIL;		/* Keyword */
END_REPEAT	return END_REPEAT;	/* Keyword */

EXIT		return EXIT;		/* Keyword */






	/********************************************************/
	/********************************************************/
	/********************************************************/
	/*****                                              *****/
	/*****                                              *****/
	/*****  N O W    W O R K    W I T H    V A L U E S  *****/
	/*****                                              *****/
	/*****                                              *****/
	/********************************************************/
	/********************************************************/
	/********************************************************/


	/********************************************/
	/* B.1.4.1   Directly Represented Variables */
	/********************************************/
{direct_variable}   {yylval.ID=strdup(yytext); return get_direct_variable_token(yytext);}


	/******************************************/
	/* B 1.4.3 - Declaration & Initialisation */
	/******************************************/
{incompl_location}	{yylval.ID=strdup(yytext); return incompl_location_token;}


	/************************/
	/* B 1.2.3.1 - Duration */
	/************************/
{fixed_point}		{yylval.ID=strdup(yytext); return fixed_point_token;}
{interval}		{/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);}
{erroneous_interval}	{return erroneous_interval_token;}

<time_literal_state>{
{integer}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;}
{integer}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;}
{integer}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;}
{integer}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;}
{integer}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;}
{fixed_point}d		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;}
{fixed_point}h		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;}
{fixed_point}m		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;}
{fixed_point}s		{yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;}
{fixed_point}ms		{yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;}

_			/* do nothing - eat it up!*/
\#			{/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;}
.			{/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;}
\n			{ERROR;}
}
	/*******************************/
	/* B.1.2.2   Character Strings */
	/*******************************/
{double_byte_character_string} {yylval.ID=strdup(yytext); return double_byte_character_string_token;}
{single_byte_character_string} {yylval.ID=strdup(yytext); return single_byte_character_string_token;}


	/******************************/
	/* B.1.2.1   Numeric literals */
	/******************************/
{integer}		{yylval.ID=strdup(yytext); return integer_token;}
{real}			{yylval.ID=strdup(yytext); return real_token;}
{binary_integer}	{yylval.ID=strdup(yytext); return binary_integer_token;}
{octal_integer} 	{yylval.ID=strdup(yytext); return octal_integer_token;}
{hex_integer} 		{yylval.ID=strdup(yytext); return hex_integer_token;}


	/*****************************************/
	/* B.1.1 Letters, digits and identifiers */
	/*****************************************/
<st_state>{identifier}/({st_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
<il_state>{identifier}/({il_whitespace_or_pragma_or_comment})"=>"	{yylval.ID=strdup(yytext); return sendto_identifier_token;}
{identifier} 				{yylval.ID=strdup(yytext);
					 // printf("returning identifier...: %s, %d\n", yytext, get_identifier_token(yytext));
					 return get_identifier_token(yytext);}






	/************************************************/
	/************************************************/
	/************************************************/
	/*****                                      *****/
	/*****                                      *****/
	/*****   T H E    L E F T O V E R S . . .   *****/
	/*****                                      *****/
	/*****                                      *****/
	/************************************************/
	/************************************************/
	/************************************************/

	/* do the single character tokens...
	 *
	 *  e.g.:  ':'  '('  ')'  '+'  '*'  ...
	 */
.	{return yytext[0];}


%%


/*************************/
/* Tracking Functions... */
/*************************/

#define MAX_LINE_LENGTH 1024

tracking_t *GetNewTracking(FILE* in_file) {
  tracking_t* new_env = new tracking_t;
  new_env->eof         = 0;
  new_env->lineNumber  = 1;
  new_env->currentChar = 0;
  new_env->lineLength  = 0;
  new_env->currentTokenStart = 0;
  new_env->in_file = in_file;
  return new_env;
}


void FreeTracking(tracking_t *tracking) {
  delete tracking;
}


void UpdateTracking(const char *text) {
  const char *newline, *token = text;
  while ((newline = strchr(token, '\n')) != NULL) {
    token = newline + 1;
    current_tracking->lineNumber++;
    current_tracking->currentChar = 1;
  }
  current_tracking->currentChar += strlen(token);
}


/* GetNextChar: reads a character from input */
int GetNextChar(char *b, int maxBuffer) {
  int res = fgetc(current_tracking->in_file);
  if ( res == EOF ) 
    return 0;
  *b = (char)res;
  return 1;
}



/***********************************/
/* Utility function definitions... */
/***********************************/

/* print the include file stack to stderr... */
void print_include_stack(void) {
  int i;

  if ((include_stack_ptr - 1) >= 0)
    fprintf (stderr, "in file "); 
  for (i = include_stack_ptr - 1; i >= 0; i--)
    fprintf (stderr, "included from file %s:%d\n", include_stack[i].filename, include_stack[i].env->lineNumber);
}



/* set the internal state variables of lexical analyser to process a new include file */
void handle_include_file_(FILE *filehandle, const char *filename) {
  if (include_stack_ptr >= MAX_INCLUDE_DEPTH) {
    fprintf(stderr, "Includes nested too deeply\n");
    exit( 1 );
  }
  
  yyin = filehandle;
  
  include_stack[include_stack_ptr].buffer_state = YY_CURRENT_BUFFER;
  include_stack[include_stack_ptr].env = current_tracking;
  include_stack[include_stack_ptr].filename = current_filename;
  
  current_filename = strdup(filename);
  current_tracking = GetNewTracking(yyin);
  include_stack_ptr++;

  /* switch input buffer to new file... */
  yy_switch_to_buffer(yy_create_buffer(yyin, YY_BUF_SIZE));
}



/* insert the code (in <source_code>) into the source code we are parsing.
 * This is done by creating an artificial file with that new source code, and then 'including' the file
 */
void include_string_(const char *source_code) {
  FILE *tmp_file = tmpfile();
  
  if(tmp_file == NULL) {
    perror("Error creating temp file.");
    exit(EXIT_FAILURE);
  }

  fwrite((void *)source_code, 1, strlen(source_code), tmp_file);
  rewind(tmp_file);

  /* now parse the tmp file, by asking flex to handle it as if it had been included with the (*#include ... *) pragma... */
  handle_include_file_(tmp_file, "");
//fclose(tmp_file);  /* do NOT close file. It must only be closed when we finish reading from it! */
}



/* Open an include file, and set the internal state variables of lexical analyser to process a new include file */
void include_file(const char *filename) {
  FILE *filehandle = NULL;
  
  for (int i = 0; (INCLUDE_DIRECTORIES[i] != NULL) && (filehandle == NULL); i++) {
    char *full_name;
    full_name = strdup3(INCLUDE_DIRECTORIES[i], "/", filename);
    if (full_name == NULL) {
      fprintf(stderr, "Out of memory!\n");
      exit( 1 );
    }
    filehandle = fopen(full_name, "r");
    free(full_name);
  }

  if (NULL == filehandle) {
    fprintf(stderr, "Error opening included file %s\n", filename);
    exit( 1 );
  }

  /* now process the new file... */
  handle_include_file_(filehandle, filename);
}



/* return the specified character to the input stream */
/* WARNING: this function destroys the contents of yytext */
void unput_char(const char c) {
  /* NOTE: The following uncomented code is not necessary as we currently use a different algorithm:
   *          - make a backup/snapshot of the current tracking data (in previous_tracking variable)
   *             (done in YY_USER_ACTION)
   *          - restore the previous tracking state when we unput any text...
   *             (in unput_text() and unput_and_mark() )
   */
//   /* We will later be processing this same character again when it is read from the input strem,
//    * and therefore we will be incrementing the line number and character column acordingly.
//    * We must therefore try to 'undo' the changes to the line number and character column
//    * so this character is not counted twice!
//    */
//   if        (c == '\n') {
//     current_tracking->lineNumber--;
//     /* We should now set the current_tracking->currentChar to the length of the previous line
//      * But we currently have no way of knowing it, so we simply set it to 0.
//      * I (msousa) don't think this is currently an issue because I don't believe the code
//      * ever calls unput_char() with a '\n', so we leave it for now
//      */
//     current_tracking->currentChar = 0;
//   } else if (current_tracking->currentChar > 0) {
//     current_tracking->currentChar--;
//   }

  unput(c); // unput() destroys the contents of yytext !!
}


/* return all the text in the current token back to the input stream, except the first n chars. */
void unput_text(int n) {
  if (n < 0) ERROR;
  signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !!

  char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
  for (int i = yyleng-1; i >= n; i--)
    unput_char(yycopy[i]);

  *current_tracking = previous_tracking;
  yycopy[n] = '\0';
  UpdateTracking(yycopy);
  
  free(yycopy);
}



/* return all the text in the current token back to the input stream, 
 * but first return to the stream an additional character to mark the end of the token. 
 */
void unput_and_mark(const char mark_char) {
  char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
  unput_char(mark_char);
  for (int i = yyleng-1; i >= 0; i--)
    unput_char(yycopy[i]);

  free(yycopy);
  *current_tracking = previous_tracking;
}



/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
 * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
 * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
 * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
 * the body_state.
 */
/* The buffer used by the body_state state */
char *bodystate_buffer        = NULL;
bool  bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace.
tracking_t bodystate_init_tracking;

/* append text to bodystate_buffer */
void  append_bodystate_buffer(const char *text, int is_whitespace) {
  // printf("<<<append_bodystate_buffer>>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
  long int old_len = 0;
  // make backup of tracking if we are starting off a new body_state_buffer
  if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking;
  // set bodystate_is_whitespace flag if we are starting a new buffer
  if (NULL == bodystate_buffer) bodystate_is_whitespace = 1;
  // set bodystate_is_whitespace flag to FALSE if we are adding non white space to buffer
  if (!is_whitespace)           bodystate_is_whitespace = 0;

  if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer);
  bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1);
  if (NULL == bodystate_buffer) ERROR;
  strcpy(bodystate_buffer + old_len, text);
  //printf("=<%s> %d %d\n", (NULL != bodystate_buffer)?bodystate_buffer:NULL, old_len + strlen(text) + 1, bodystate_buffer);
}

/* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */
void   unput_bodystate_buffer(void) {
  if (NULL == bodystate_buffer) ERROR;
  // printf("<<<unput_bodystate_buffer>>>\n%s\n", bodystate_buffer);
  
  for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--)
    unput_char(bodystate_buffer[i]);
  
  free(bodystate_buffer);
  bodystate_buffer        = NULL;
  bodystate_is_whitespace = 1;  
  *current_tracking = bodystate_init_tracking;
}


/* Return true if bodystate_buffer is empty or ony contains whitespace!! */
int  isempty_bodystate_buffer(void) {
  if (NULL == bodystate_buffer) return 1;
  if (bodystate_is_whitespace)  return 1;
  return 0;
}




/* Called by flex when it reaches the end-of-file */
int yywrap(void)
{
  /* We reached the end of the input file... */

  /* Should we continue with another file? */
  /* If so:
   *   open the new file...
   *   return 0;
   */

  /* to stop processing...
   *   return 1;
   */

  return 1;  /* Stop scanning at end of input file. */
}



/*******************************/
/* Public Interface for Bison. */
/*******************************/

/* The following functions will be called from inside bison code! */

void include_string(const char *source_code) {include_string_(source_code);}


/* Tell flex which file to parse. This function will not imediately start parsing the file.
 * To parse the file, you then need to call yyparse()
 *
 * Returns NULL on error opening the file (and a valid errno), or 0 on success.
 * Caller must close the file!
 */
FILE *parse_file(const char *filename) {
  FILE *filehandle = NULL;

  if((filehandle = fopen(filename, "r")) != NULL) {
    yyin = filehandle;
    current_filename = strdup(filename);
    current_tracking = GetNewTracking(yyin);
  }
  return filehandle;
}






/*************************************/
/* Include a main() function to test */
/* the token parsing by flex....     */
/*************************************/
#ifdef TEST_MAIN

#include "../util/symtable.hh"

yystype yylval;
YYLTYPE yylloc;




int get_identifier_token(const char *identifier_str) {return 0;}
int get_direct_variable_token(const char *direct_variable_str) {return 0;}


int main(int argc, char **argv) {

  FILE *in_file;
  int res;
	
  if (argc == 1) {
    /* Work as an interactive (command line) parser... */
    while((res=yylex()))
      fprintf(stderr, "(line %d)token: %d\n", yylineno, res);
  } else {
    /* Work as non-interactive (file) parser... */
    if((in_file = fopen(argv[1], "r")) == NULL) {
      char *errmsg = strdup2("Error opening main file ", argv[1]);
      perror(errmsg);
      free(errmsg);
      return -1;
    }

    /* parse the file... */
    yyin = in_file;
    current_filename = argv[1];
    while(1) {
      res=yylex();
      fprintf(stderr, "(line %d)token: %d (%s)\n", yylineno, res, yylval.ID);
    }
  }
	
	return 0;

}
#endif