--- a/stage1_2/iec_flex.ll Mon Jul 03 20:31:47 2017 +0100
+++ b/stage1_2/iec_flex.ll Thu Aug 03 22:16:35 2017 +0100
@@ -178,20 +178,24 @@
* back to the bison parser...
*/
#define YY_USER_ACTION {\
- yylloc.first_line = current_tracking->lineNumber; \
- yylloc.first_column = current_tracking->currentTokenStart; \
- yylloc.first_file = current_filename; \
- yylloc.first_order = current_order; \
- yylloc.last_line = current_tracking->lineNumber; \
- yylloc.last_column = current_tracking->currentChar - 1; \
- yylloc.last_file = current_filename; \
- yylloc.last_order = current_order; \
+ previous_tracking =*current_tracking; \
+ yylloc.first_line = current_tracking->lineNumber; \
+ yylloc.first_column = current_tracking->currentChar; \
+ yylloc.first_file = current_filename; \
+ yylloc.first_order = current_order; \
+ \
+ UpdateTracking(yytext); \
+ \
+ yylloc.last_line = current_tracking->lineNumber; \
+ yylloc.last_column = current_tracking->currentChar - 1; \
+ yylloc.last_file = current_filename; \
+ yylloc.last_order = current_order; \
+ \
current_tracking->currentTokenStart = current_tracking->currentChar; \
current_order++; \
}
-
/* Since this lexical parser we defined only works in ASCII based
* systems, we might as well make sure it is being compiled on
* one...
@@ -222,22 +226,25 @@
/***************************************************/
%{
+void UpdateTracking(const char *text);
+/* return the character back to the input stream. */
+void unput_char(const char c);
/* return all the text in the current token back to the input stream. */
-void unput_text(unsigned int n);
+void unput_text(int n);
/* return all the text in the current token back to the input stream,
* but first return to the stream an additional character to mark the end of the token.
*/
-void unput_and_mark(const char c);
+void unput_and_mark(const char mark_char);
void include_file(const char *include_filename);
/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
- * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule.
- * However, we must store any text we consume in every rule, so we can push it back into the buffer
+ * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
+ * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
* once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
* the body_state.
*/
-void append_bodystate_buffer(const char *yytext);
+void append_bodystate_buffer(const char *text);
void unput_bodystate_buffer(void);
int isempty_bodystate_buffer(void);
@@ -557,7 +564,6 @@
int currentChar;
int lineLength;
int currentTokenStart;
- char *buffer;
FILE *in_file;
} tracking_t;
@@ -573,7 +579,8 @@
const char *filename;
} include_stack_t;
-tracking_t *current_tracking = NULL;
+tracking_t * current_tracking = NULL;
+tracking_t previous_tracking;
include_stack_t include_stack[MAX_INCLUDE_DEPTH];
int include_stack_ptr = 0;
@@ -1165,7 +1172,15 @@
END_FUNCTION_BLOCK unput_text(0); BEGIN(INITIAL);
END_PROGRAM unput_text(0); BEGIN(INITIAL);
-. unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");/* anything else, just change to body_state! */
+ /* NOTE: Handling of whitespace...
+ * - Must come __before__ the next rule for any single character '.'
+ * - If the rules were reversed, any whitespace with a single space (' ')
+ * would be handled by the '.' rule instead of the {whitespace} rule!
+ */
+{st_whitespace} /* Eat any whitespace */
+
+ /* anything else, just change to body_state! */
+. unput_text(0); yy_push_state(body_state); //printf("\nChanging to body_state\n");
}
@@ -1179,14 +1194,17 @@
<body_state>{
{st_whitespace} {/* In body state we do not process any tokens,
* we simply store them for later processing!
- * NOTE: all whitespace in the begining
- * of body_state must be removed so we can
- * detect ':=' in the beginning of TRANSACTION
- * conditions preceded by whitespace.
- * => only add to bodystate_buffer when not in beginning.
+ * NOTE: we must return ALL text when in body_state, including
+ * all comments and whitespace, so as not
+ * to lose track of the line_number and column number
+ * used when printing debugging messages.
+ * Note that some of the following rules depend on the fact that
+ * the body state buffer is either empty or only contains white space up to
+ * that point. However, since the vardecl_list_state will eat up all
+ * whitespace before entering the body_state, the contents of the bodystate_buffer
+ * will _never_ start with whitespace.
*/
- if (!isempty_bodystate_buffer())
- append_bodystate_buffer(yytext);
+ append_bodystate_buffer(yytext);
}
/* 'INITIAL_STEP' always used in beginning of SFCs !! */
INITIAL_STEP { if (isempty_bodystate_buffer()) {unput_text(0); BEGIN(sfc_state);}
@@ -1249,7 +1267,7 @@
/* NOTE: pragmas are handled right at the beginning... */
/* The whitespace */
-<INITIAL,header_state,config_state,vardecl_list_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace} /* Eat any whitespace */
+<INITIAL,header_state,config_state,vardecl_state,st_state,sfc_state,task_init_state,sfc_qualifier_state>{st_whitespace} /* Eat any whitespace */
<il_state>{il_whitespace} /* Eat any whitespace */
/* NOTE: Due to the need of having the following rule have higher priority,
* the following rule was moved to an earlier position in this file.
@@ -1903,59 +1921,40 @@
tracking_t *GetNewTracking(FILE* in_file) {
tracking_t* new_env = new tracking_t;
- new_env->eof = 0;
- new_env->lineNumber = 0;
+ new_env->eof = 0;
+ new_env->lineNumber = 1;
new_env->currentChar = 0;
- new_env->lineLength = 0;
+ new_env->lineLength = 0;
new_env->currentTokenStart = 0;
- new_env->buffer = (char*)malloc(MAX_LINE_LENGTH);
new_env->in_file = in_file;
return new_env;
}
void FreeTracking(tracking_t *tracking) {
- free(tracking->buffer);
delete tracking;
}
+void UpdateTracking(const char *text) {
+ const char *newline, *token = text;
+ while ((newline = strchr(token, '\n')) != NULL) {
+ token = newline + 1;
+ current_tracking->lineNumber++;
+ current_tracking->currentChar = 1;
+ }
+ current_tracking->currentChar += strlen(token);
+}
+
+
/* GetNextChar: reads a character from input */
int GetNextChar(char *b, int maxBuffer) {
- char *p;
-
- if ( current_tracking->eof )
+ int res = fgetc(current_tracking->in_file);
+ if ( res == EOF )
return 0;
-
- while ( current_tracking->currentChar >= current_tracking->lineLength ) {
- current_tracking->currentChar = 0;
- current_tracking->currentTokenStart = 1;
- current_tracking->eof = false;
-
- p = fgets(current_tracking->buffer, MAX_LINE_LENGTH, current_tracking->in_file);
- if ( p == NULL ) {
- if ( ferror(current_tracking->in_file) )
- return 0;
- current_tracking->eof = true;
- return 0;
- }
-
- current_tracking->lineLength = strlen(current_tracking->buffer);
-
- /* only increment line number if the buffer was big enough to read the whole line! */
- char last_char = current_tracking->buffer[current_tracking->lineLength - 1];
- if (('\n' == last_char) || ('\r' == last_char)) // '\r' ---> CR, '\n' ---> LF
- current_tracking->lineNumber++;
- }
-
- b[0] = current_tracking->buffer[current_tracking->currentChar];
- if (b[0] == ' ' || b[0] == '\t')
- current_tracking->currentTokenStart++;
- current_tracking->currentChar++;
-
- return b[0]==0?0:1;
-}
-
+ *b = (char)res;
+ return 1;
+}
@@ -2045,55 +2044,87 @@
+/* return the specified character to the input stream */
+/* WARNING: this function destroys the contents of yytext */
+void unput_char(const char c) {
+ /* NOTE: The following uncomented code is not necessary as we currently use a different algorithm:
+ * - make a backup/snapshot of the current tracking data (in previous_tracking variable)
+ * (done in YY_USER_ACTION)
+ * - restore the previous tracking state when we unput any text...
+ * (in unput_text() and unput_and_mark() )
+ */
+// /* We will later be processing this same character again when it is read from the input strem,
+// * and therefore we will be incrementing the line number and character column acordingly.
+// * We must therefore try to 'undo' the changes to the line number and character column
+// * so this character is not counted twice!
+// */
+// if (c == '\n') {
+// current_tracking->lineNumber--;
+// /* We should now set the current_tracking->currentChar to the length of the previous line
+// * But we currently have no way of knowing it, so we simply set it to 0.
+// * I (msousa) don't think this is currently an issue because I don't believe the code
+// * ever calls unput_char() with a '\n', so we leave it for now
+// */
+// current_tracking->currentChar = 0;
+// } else if (current_tracking->currentChar > 0) {
+// current_tracking->currentChar--;
+// }
+
+ unput(c); // unput() destroys the contents of yytext !!
+}
/* return all the text in the current token back to the input stream, except the first n chars. */
-void unput_text(unsigned int n) {
- /* it seems that flex has a bug in that it will not correctly count the line numbers
- * if we return newlines back to the input stream. These newlines will be re-counted
- * a second time when they are processed again by flex.
- * We therefore determine how many newlines are in the text we are returning,
- * and decrement the line counter acordingly...
- */
- /*
- unsigned int i;
+void unput_text(int n) {
+ if (n < 0) ERROR;
+ signed int i; // must be signed! The iterartion may end with -1 when this function is called with n=0 !!
+
+ char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
+ for (int i = yyleng-1; i >= n; i--)
+ unput_char(yycopy[i]);
+
+ *current_tracking = previous_tracking;
+ yycopy[n] = '\0';
+ UpdateTracking(yycopy);
- for (i = n; i < strlen(yytext); i++)
- if (yytext[i] == '\n')
- current_tracking->lineNumber--;
- */
- /* now return all the text back to the input stream... */
- yyless(n);
-}
+ free(yycopy);
+}
+
/* return all the text in the current token back to the input stream,
* but first return to the stream an additional character to mark the end of the token.
*/
-void unput_and_mark(const char c) {
- char *yycopy = strdup( yytext ); /* unput() destroys yytext, so we copy it first */
- unput(c);
+void unput_and_mark(const char mark_char) {
+ char *yycopy = strdup( yytext ); /* unput_char() destroys yytext, so we copy it first */
+ unput_char(mark_char);
for (int i = yyleng-1; i >= 0; i--)
- unput(yycopy[i]);
+ unput_char(yycopy[i]);
free(yycopy);
+ *current_tracking = previous_tracking;
}
/* The body_state tries to find a ';' before a END_PROGRAM, END_FUNCTION or END_FUNCTION_BLOCK or END_ACTION
- * To do so, it must ignore comments and pragmas. This means that we cannot do this in a signle lex rule.
- * However, we must store any text we consume in every rule, so we can push it back into the buffer
+ * and ignores ';' inside comments and pragmas. This means that we cannot do this in a signle lex rule.
+ * Body_state therefore stores ALL text we consume in every rule, so we can push it back into the buffer
* once we have decided if we are parsing ST or IL code. The following functions manage that buffer used by
* the body_state.
*/
/* The buffer used by the body_state state */
-char *bodystate_buffer = NULL;
+char *bodystate_buffer = NULL;
+bool bodystate_is_whitespace = 1; // TRUE (1) if buffer is empty, or only contains whitespace.
+tracking_t bodystate_init_tracking;
/* append text to bodystate_buffer */
void append_bodystate_buffer(const char *text) {
- //printf("<<<append_bodystate_buffer>>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
+ // printf("<<<append_bodystate_buffer>>> %d <%s><%s>\n", bodystate_buffer, text, (NULL != bodystate_buffer)?bodystate_buffer:"NULL");
long int old_len = 0;
+ // make backup of tracking if we are starting off a new body_state_buffer
+ if (NULL == bodystate_buffer) bodystate_init_tracking = *current_tracking;
+
if (NULL != bodystate_buffer) old_len = strlen(bodystate_buffer);
bodystate_buffer = (char *)realloc(bodystate_buffer, old_len + strlen(text) + 1);
if (NULL == bodystate_buffer) ERROR;
@@ -2104,19 +2135,21 @@
/* Return all data in bodystate_buffer back to flex, and empty bodystate_buffer. */
void unput_bodystate_buffer(void) {
if (NULL == bodystate_buffer) ERROR;
- //printf("<<<unput_bodystate_buffer>>>\n%s\n", bodystate_buffer);
+ // printf("<<<unput_bodystate_buffer>>>\n%s\n", bodystate_buffer);
for (long int i = strlen(bodystate_buffer)-1; i >= 0; i--)
- unput(bodystate_buffer[i]);
+ unput_char(bodystate_buffer[i]);
free(bodystate_buffer);
- bodystate_buffer = NULL;
-}
-
-
-/* Return true if bodystate_buffer is empty */
+ bodystate_buffer = NULL;
+ *current_tracking = bodystate_init_tracking;
+}
+
+
+/* Return true if bodystate_buffer is empty or ony contains whitespace!! */
int isempty_bodystate_buffer(void) {
- return (NULL == bodystate_buffer);
+ if (NULL == bodystate_buffer) return 1;
+ return 0;
}