diff -r aad38592bdde -r c0bda77b37a0 stage1_2/iec_flex.ll --- a/stage1_2/iec_flex.ll Tue Aug 14 19:40:01 2012 +0200 +++ b/stage1_2/iec_flex.ll Wed Aug 22 16:46:17 2012 +0200 @@ -91,8 +91,10 @@ */ %option noyy_top_state -/* We will not be using unput() in our flex code... */ +/* We will be using unput() in our flex code, so we cannot set the following option!... */ +/* %option nounput +*/ /**************************************************/ /* External Variable and Function declarations... */ @@ -171,8 +173,9 @@ * track of the locations, in order to give * more meaningful error messages! */ -extern YYLTYPE yylloc; - +/* + *extern YYLTYPE yylloc; +b*/ #define YY_INPUT(buf,result,max_size) {\ result = GetNextChar(buf, max_size);\ if ( result <= 0 )\ @@ -208,7 +211,6 @@ current_order++; \ } - /* Since this lexical parser we defined only works in ASCII based * systems, we might as well make sure it is being compiled on * one... @@ -241,6 +243,10 @@ %{ /* return all the text in the current token back to the input stream. */ void unput_text(unsigned int n); +/* return all the text in the current token back to the input stream, + * but first return to the stream an additional character to mark the end of the token. + */ +void unput_and_mark(const char c); %} @@ -340,6 +346,16 @@ * expecting any action qualifiers, flex does not return these tokens, and is free * to interpret them as previously defined variables/functions/... as the case may be. * + * The time_literal_state is required because TIME# literals are decomposed into + * portions, and wewant to send these portions one by one to bison. Each poertion will + * represent the value in days/hours/minutes/seconds/ms. + * Unfortunately, some of these portions may also be lexically analysed as an identifier. So, + * we need to disable lexical identification of identifiers while parsing TIME# literals! + * e.g.: TIME#55d_4h_56m + * We would like to return to bison the tokens 'TIME' '#' '55d' '_' '4h' '_' '56m' + * Unfortunately, flex will join '_' and '4h' to create a legal {identifier} '_4h', + * and return that identifier instead! So, we added this state! + * * The state machine has 7 possible states (INITIAL, config, decl, body, st, il, sfc) * Possible state changes are: * INITIAL -> goto(decl_state) @@ -418,7 +434,8 @@ /* we are parsing sfc code, and expecting the priority token. */ %s sfc_priority_state - +/* we are parsing a TIME# literal. We must not return any {identifier} tokens. */ +%x time_literal_state /*******************/ @@ -598,6 +615,15 @@ /* B.1.2.1 Numeric literals */ /******************************/ integer {digit}((_?{digit})*) + +/* Some helper symbols for parsing TIME literals... */ +integer_0_59 (0(_?))*([0-5](_?))?{digit} +integer_0_19 (0(_?))*([0-1](_?))?{digit} +integer_20_23 (0(_?))*2(_?)[0-3] +integer_0_23 {integer_0_19}|{integer_20_23} +integer_0_999 {digit}((_?{digit})?)((_?{digit})?) + + binary_integer 2#{bit}((_?{bit})*) bit [0-1] octal_integer 8#{octal_digit}((_?{octal_digit})*) @@ -673,21 +699,54 @@ /************************/ fixed_point {integer}\.{integer} -fixed_point_d {fixed_point}d -integer_d {integer}d - -fixed_point_h {fixed_point}h -integer_h {integer}h - -fixed_point_m {fixed_point}m -integer_m {integer}m - -fixed_point_s {fixed_point}s -integer_s {integer}s - -fixed_point_ms {fixed_point}ms -integer_ms {integer}ms - + +/* NOTE: The IEC 61131-3 v2 standard has an incorrect formal syntax definition of duration, + * as its definition does not match the standard's text. + * IEC 61131-3 v3 (committee draft) seems to have this fixed, so we use that + * definition instead! + * + * duration::= ('T' | 'TIME') '#' ['+'|'-'] interval + * interval::= days | hours | minutes | seconds | milliseconds + * fixed_point ::= integer [ '.' integer] + * days ::= fixed_point 'd' | integer 'd' ['_'] [ hours ] + * hours ::= fixed_point 'h' | integer 'h' ['_'] [ minutes ] + * minutes ::= fixed_point 'm' | integer 'm' ['_'] [ seconds ] + * seconds ::= fixed_point 's' | integer 's' ['_'] [ milliseconds ] + * milliseconds ::= fixed_point 'ms' + * + * + * The original IEC 61131-3 v2 definition is: + * duration ::= ('T' | 'TIME') '#' ['-'] interval + * interval ::= days | hours | minutes | seconds | milliseconds + * fixed_point ::= integer [ '.' integer] + * days ::= fixed_point 'd' | integer 'd' ['_'] hours + * hours ::= fixed_point 'h' | integer 'h' ['_'] minutes + * minutes ::= fixed_point 'm' | integer 'm' ['_'] seconds + * seconds ::= fixed_point 's' | integer 's' ['_'] milliseconds + * milliseconds ::= fixed_point 'ms' + + */ + +interval_ms_X ({integer_0_999}(\.{integer})?)ms +interval_s_X {integer_0_59}s(_?{interval_ms_X})? +interval_m_X {integer_0_59}m(_?{interval_s_X})? +interval_h_X {integer_0_23}h(_?{interval_m_X})? + +interval_ms {integer}ms|({fixed_point}ms) +interval_s {integer}s(_?{interval_ms_X})?|({fixed_point}s) +interval_m {integer}m(_?{interval_s_X})?|({fixed_point}m) +interval_h {integer}h(_?{interval_m_X})?|({fixed_point}h) +interval_d {integer}d(_?{interval_h_X})?|({fixed_point}d) + +interval {interval_ms}|{interval_s}|{interval_m}|{interval_h}|{interval_d} + +/* to help provide nice error messages, we also parse an incorrect but plausible interval... */ +/* NOTE that this erroneous interval will be parsed outside the time_literal_state, so must not + * be able to parse any other legal lexcial construct (besides a legal interval, but that + * is OK as this rule will appear _after_ the rule to parse legal intervals!). + */ +fixed_point_or_integer {fixed_point}|{integer} +erroneous_interval ({fixed_point_or_integer}d_?)?({fixed_point_or_integer}h_?)?({fixed_point_or_integer}m_?)?({fixed_point_or_integer}s_?)?({fixed_point_or_integer}ms)? /********************************************/ /* B.1.4.1 Directly Represented Variables */ @@ -719,9 +778,9 @@ * in which case we are currently using "%I3" as the variable * name. */ -direct_variable_matplc %{identifier} - -direct_variable {direct_variable_standard}|{direct_variable_matplc} +/* direct_variable_matplc %{identifier} */ +/* direct_variable {direct_variable_standard}|{direct_variable_matplc} */ +direct_variable {direct_variable_standard} /******************************************/ /* B 1.4.3 - Declaration & Initialisation */ @@ -1541,23 +1600,26 @@ /* B 1.2.3.1 - Duration */ /************************/ {fixed_point} {yylval.ID=strdup(yytext); return fixed_point_token;} - -{fixed_point_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} -{integer_d} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} - -{fixed_point_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} -{integer_h} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} - -{fixed_point_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} -{integer_m} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} - -{fixed_point_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} -{integer_s} {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} - -{fixed_point_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} -{integer_ms} {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} - - +{interval} {/*fprintf(stderr, "entering time_literal_state ##%s##\n", yytext);*/ unput_and_mark('#'); yy_push_state(time_literal_state);} +{erroneous_interval} {return erroneous_interval_token;} + +{ +{integer}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_d_token;} +{integer}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_h_token;} +{integer}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_m_token;} +{integer}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return integer_s_token;} +{integer}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return integer_ms_token;} +{fixed_point}d {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_d_token;} +{fixed_point}h {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_h_token;} +{fixed_point}m {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_m_token;} +{fixed_point}s {yylval.ID=strdup(yytext); yylval.ID[yyleng-1] = '\0'; return fixed_point_s_token;} +{fixed_point}ms {yylval.ID=strdup(yytext); yylval.ID[yyleng-2] = '\0'; return fixed_point_ms_token;} + +_ /* do nothing - eat it up!*/ +\# {/*fprintf(stderr, "popping from time_literal_state (###)\n");*/ yy_pop_state(); return end_interval_token;} +. {/*fprintf(stderr, "time_literal_state: found invalid character '%s'. Aborting!\n", yytext);*/ ERROR;} +\n {ERROR;} +} /*******************************/ /* B.1.2.2 Character Strings */ /*******************************/ @@ -1645,6 +1707,20 @@ } +/* return all the text in the current token back to the input stream, + * but first return to the stream an additional character to mark the end of the token. + */ +void unput_and_mark(const char c) { + char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */ + unput(c); + for (int i = yyleng-1; i >= 0; i--) + unput(yycopy[i]); + + free(yycopy); +} + + + /* Called by flex when it reaches the end-of-file */ int yywrap(void) {