diff -r 57c08195c962 -r ce7b65e24676 stage1_2/iec_flex.ll --- a/stage1_2/iec_flex.ll Mon Jul 03 20:31:47 2017 +0100 +++ b/stage1_2/iec_flex.ll Thu Aug 03 22:16:35 2017 +0100 @@ -178,20 +178,24 @@ * back to the bison parser... */ #define YY_USER_ACTION {\ - yylloc.first_line = current_tracking->lineNumber; \ - yylloc.first_column = current_tracking->currentTokenStart; \ - yylloc.first_file = current_filename; \ - yylloc.first_order = current_order; \ - yylloc.last_line = current_tracking->lineNumber; \ - yylloc.last_column = current_tracking->currentChar - 1; \ - yylloc.last_file = current_filename; \ - yylloc.last_order = current_order; \ + previous_tracking =*current_tracking; \ + yylloc.first_line = current_tracking->lineNumber; \ + yylloc.first_column = current_tracking->currentChar; \ + yylloc.first_file = current_filename; \ + yylloc.first_order = current_order; \ + \ + UpdateTracking(yytext); \ + \ + yylloc.last_line = current_tracking->lineNumber; \ + yylloc.last_column = current_tracking->currentChar - 1; \ + yylloc.last_file = current_filename; \ + yylloc.last_order = current_order; \ + \ current_tracking->currentTokenStart = current_tracking->currentChar; \ current_order++; \ } - /* Since this lexical parser we defined only works in ASCII based * systems, we might as well make sure it is being compiled on * one... @@ -222,22 +226,25 @@ /***************************************************/ %{ +void UpdateTracking(const char *text); +/* return the character back to the input stream. */ +void unput_char(const char c); /* return all the text in the current token back to the input stream. */ -void unput_text(unsigned int n); +void unput_text(int n); /* return all the text in the current token back to the input stream, * but first return to the stream an additional character to mark the end of the token. */ -void unput_and_mark(const char c); +void unput_and_mark(const char mark_char); void include_file(const char *include_filename); /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION - * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule. - * However, we must store any text we consume in every rule, so we can push it back into the buffer + * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule. + * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by * the body_state. */ -void append_bodystate_buffer(const char *yytext); +void append_bodystate_buffer(const char *text); void unput_bodystate_buffer(void); int isempty_bodystate_buffer(void); @@ -557,7 +564,6 @@ int currentChar; int lineLength; int currentTokenStart; - char *buffer; FILE *in_file; } tracking_t; @@ -573,7 +579,8 @@ const char *filename; } include_stack_t; -tracking_t *current_tracking = NULL; +tracking_t * current_tracking = NULL; +tracking_t previous_tracking; include_stack_t include_stack[MAX_INCLUDE_DEPTH]; int include_stack_ptr = 0; @@ -1165,7 +1172,15 @@ END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL); END_PROGRAM unput_text(0); BEGIN(INITIAL); -. unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");/* anything else, just change to body_state! */ + /* NOTE: Handling of whitespace... + * - Must come __before__ the next rule for any single character '.' + * - If the rules were reversed, any whitespace with a single space (' ') + * would be handled by the '.' rule instead of the {whitespace} rule! + */ +{st_whitespace} /* Eat any whitespace */ + + /* anything else, just change to body_state! */ +. unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n"); } @@ -1179,14 +1194,17 @@ { {st_whitespace} {/* In body state we do not process any tokens, * we simply store them for later processing! - * NOTE: all whitespace in the begining - * of body_state must be removed so we can - * detect ':=' in the beginning of TRANSACTION - * conditions preceded by whitespace. - * => only add to bodystate_buffer when not in beginning. + * NOTE: we must return ALL text when in body_state, including + * all comments and whitespace, so as not + * to lose track of the line_number and column number + * used when printing debugging messages. + * Note that some of the following rules depend on the fact that + * the body state buffer is either empty or only contains white space up to + * that point. However, since the vardecl_list_state will eat up all + * whitespace before entering the body_state, the contents of the bodystate_buffer + * will _never_ start with whitespace. */ - if (!isempty_bodystate_buffer()) - append_bodystate_buffer(yytext); + append_bodystate_buffer(yytext); } /* 'INITIAL_STEP' always used in beginning of SFCs !! */ INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(sfc_state);} @@ -1249,7 +1267,7 @@ /* NOTE: pragmas are handled right at the beginning... */ /* The whitespace */ -{st_whitespace} /* Eat any whitespace */ +{st_whitespace} /* Eat any whitespace */ {il_whitespace} /* Eat any whitespace */ /* NOTE: Due to the need of having the following rule have higher priority, * the following rule was moved to an earlier position in this file. @@ -1903,59 +1921,40 @@ tracking_t *GetNewTracking(FILE* in_file) { tracking_t* new_env = new tracking_t; - new_env->eof = 0; - new_env->lineNumber = 0; + new_env->eof = 0; + new_env->lineNumber = 1; new_env->currentChar = 0; - new_env->lineLength = 0; + new_env->lineLength = 0; new_env->currentTokenStart = 0; - new_env->buffer = (char*)malloc(MAX_LINE_LENGTH); new_env->in_file = in_file; return new_env; } void FreeTracking(tracking_t *tracking) { - free(tracking->buffer); delete tracking; } +void UpdateTracking(const char *text) { + const char *newline, *token = text; + while ((newline = strchr(token, '\n')) != NULL) { + token = newline + 1; + current_tracking->lineNumber++; + current_tracking->currentChar = 1; + } + current_tracking->currentChar += strlen(token); +} + + /* GetNextChar: reads a character from input */ int GetNextChar(char *b, int maxBuffer) { - char *p; - - if ( current_tracking->eof ) + int res = fgetc(current_tracking->in_file); + if ( res == EOF ) return 0; - - while ( current_tracking->currentChar >= current_tracking->lineLength ) { - current_tracking->currentChar = 0; - current_tracking->currentTokenStart = 1; - current_tracking->eof = false; - - p = fgets(current_tracking->buffer, MAX_LINE_LENGTH, current_tracking->in_file); - if ( p == NULL ) { - if ( ferror(current_tracking->in_file) ) - return 0; - current_tracking->eof = true; - return 0; - } - - current_tracking->lineLength = strlen(current_tracking->buffer); - - /* only increment line number if the buffer was big enough to read the whole line! */ - char last_char = current_tracking->buffer[current_tracking->lineLength - 1]; - if (('\n' == last_char) || ('\r' == last_char)) // '\r' ---> CR, '\n' ---> LF - current_tracking->lineNumber++; - } - - b[0] = current_tracking->buffer[current_tracking->currentChar]; - if (b[0] == ' ' || b[0] == '\t') - current_tracking->currentTokenStart++; - current_tracking->currentChar++; - - return b[0]==0?0:1; -} - + *b = (char)res; + return 1; +} @@ -2045,55 +2044,87 @@ +/* return the specified character to the input stream */ +/* WARNING: this function destroys the contents of yytext */ +void unput_char(const char c) { + /* NOTE: The following uncomented code is not necessary as we currently use a different algorithm: + * - make a backup/snapshot of the current tracking data (in previous_tracking variable) + * (done in YY_USER_ACTION) + * - restore the previous tracking state when we unput any text... + * (in unput_text() and unput_and_mark() ) + */ +// /* We will later be processing this same character again when it is read from the input strem, +// * and therefore we will be incrementing the line number and character column acordingly. +// * We must therefore try to 'undo' the changes to the line number and character column +// * so this character is not counted twice! +// */ +// if (c == '\n') { +// current_tracking->lineNumber--; +// /* We should now set the current_tracking->currentChar to the length of the previous line +// * But we currently have no way of knowing it, so we simply set it to 0. +// * I (msousa) don't think this is currently an issue because I don't believe the code +// * ever calls unput_char() with a '\n', so we leave it for now +// */ +// current_tracking->currentChar = 0; +// } else if (current_tracking->currentChar > 0) { +// current_tracking->currentChar--; +// } + + unput(c); // unput() destroys the contents of yytext !! +} /* return all the text in the current token back to the input stream, except the first n chars. */ -void unput_text(unsigned int n) { - /* it seems that flex has a bug in that it will not correctly count the line numbers - * if we return newlines back to the input stream. These newlines will be re-counted - * a second time when they are processed again by flex. - * We therefore determine how many newlines are in the text we are returning, - * and decrement the line counter acordingly... - */ - /* - unsigned int i; +void unput_text(int n) { + if (n < 0) ERROR; + signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !! + + char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */ + for (int i = yyleng-1; i >= n; i--) + unput_char(yycopy[i]); + + *current_tracking = previous_tracking; + yycopy[n] = '\0'; + UpdateTracking(yycopy); - for (i = n; i < strlen(yytext); i++) - if (yytext[i] == '\n') - current_tracking->lineNumber--; - */ - /* now return all the text back to the input stream... */ - yyless(n); -} + free(yycopy); +} + /* return all the text in the current token back to the input stream, * but first return to the stream an additional character to mark the end of the token. */ -void unput_and_mark(const char c) { - char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */ - unput(c); +void unput_and_mark(const char mark_char) { + char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */ + unput_char(mark_char); for (int i = yyleng-1; i >= 0; i--) - unput(yycopy[i]); + unput_char(yycopy[i]); free(yycopy); + *current_tracking = previous_tracking; } /* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION - * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule. - * However, we must store any text we consume in every rule, so we can push it back into the buffer + * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule. + * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer * once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by * the body_state. */ /* The buffer used by the body_state state */ -char *bodystate_buffer = NULL; +char *bodystate_buffer = NULL; +bool bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace. +tracking_t bodystate_init_tracking; /* append text to bodystate_buffer */ void append_bodystate_buffer(const char *text) { - //printf("<<>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL"); + // printf("<<>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL"); long int old_len = 0; + // make backup of tracking if we are starting off a new body_state_buffer + if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking; + if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer); bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1); if (NULL == bodystate_buffer) ERROR; @@ -2104,19 +2135,21 @@ /* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */ void unput_bodystate_buffer(void) { if (NULL == bodystate_buffer) ERROR; - //printf("<<>>\n%s\n", bodystate_buffer); + // printf("<<>>\n%s\n", bodystate_buffer); for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--) - unput(bodystate_buffer[i]); + unput_char(bodystate_buffer[i]); free(bodystate_buffer); - bodystate_buffer = NULL; -} - - -/* Return true if bodystate_buffer is empty */ + bodystate_buffer = NULL; + *current_tracking = bodystate_init_tracking; +} + + +/* Return true if bodystate_buffer is empty or ony contains whitespace!! */ int isempty_bodystate_buffer(void) { - return (NULL == bodystate_buffer); + if (NULL == bodystate_buffer) return 1; + return 0; }