سؤال

First I attach the grammar and lexical file here for reference:

grammar.y

%{
#include <stdio.h>
extern int yylineno;
int yylex ();
int yyerror ();
//extern char* yytext;

%}

%union{
    int integer;
    float flt;
    char *str;
}

%token <str> IDENTIFIER 
%token <flt> CONSTANTF
%token <integer> CONSTANTI
%token LEFT_BRACKET RIGHT_BRACKET
%token EQ INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP

%token SUB_ASSIGN MUL_ASSIGN ADD_ASSIGN
%token TYPE_NAME
%token INT FLOAT VOID
%token IF ELSE WHILE RETURN FOR
%start program
%%

primary_expression
: IDENTIFIER    {printf("use identifier: %s, length is %d\n", $1, strlen($1));}
| CONSTANTI     {printf("use constant Int: %d\n", $1);}
| CONSTANTF     {printf("use constant Float: %f\n", $1);}
| LEFT_BRACKET expression RIGHT_BRACKET
| IDENTIFIER LEFT_BRACKET RIGHT_BRACKET {printf("non-param methodCall identifier is: %s\n", $1);}
| IDENTIFIER LEFT_BRACKET argument_expression_list RIGHT_BRACKET    {printf("param methodCall identifier is: %s\n", $1);}
| IDENTIFIER INC_OP
| IDENTIFIER DEC_OP
;

postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
;

argument_expression_list
: expression
| argument_expression_list ',' expression
;

unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator unary_expression
;

unary_operator
: '-'
;

multiplicative_expression
: unary_expression
| multiplicative_expression '*' unary_expression
| multiplicative_expression '/' unary_expression
;

additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;

comparison_expression
: additive_expression
| additive_expression '<' additive_expression
| additive_expression '>' additive_expression
| additive_expression LE_OP additive_expression
| additive_expression GE_OP additive_expression
| additive_expression EQ_OP additive_expression
| additive_expression NE_OP additive_expression
;

expression
: unary_expression assignment_operator comparison_expression
| comparison_expression
;

assignment_operator
: EQ
| MUL_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
;

declaration
: type_name declarator_list ';'
;

declarator_list
: declarator
| declarator_list ',' declarator
;

type_name
: VOID  
| INT   
| FLOAT
;

declarator
: IDENTIFIER    {printf("declare an identifer: %s\n",$1);}
| LEFT_BRACKET declarator RIGHT_BRACKET
| declarator '[' CONSTANTI ']'
| declarator '[' ']'
| declarator LEFT_BRACKET parameter_list RIGHT_BRACKET
| declarator LEFT_BRACKET RIGHT_BRACKET
;

parameter_list
: parameter_declaration
| parameter_list ',' parameter_declaration
;

parameter_declaration
: type_name declarator
;

statement
: compound_statement
| expression_statement 
| selection_statement
| iteration_statement
| jump_statement
;

compound_statement
: '{' '}'
| '{' statement_list '}'
| '{' declaration_list statement_list '}'
;

declaration_list
: declaration
| declaration_list declaration
;

statement_list
: statement
| statement_list statement
;

expression_statement
: ';'
| expression ';'
;

selection_statement
: IF '(' expression ')' statement
| IF '(' expression ')' statement ELSE statement
| FOR '(' expression_statement expression_statement expression ')' statement
;

iteration_statement
: WHILE '(' expression ')' statement
;

jump_statement
: RETURN ';'
| RETURN expression ';'
;

program
: external_declaration
| program external_declaration
;

external_declaration
: function_definition
| declaration
;

function_definition
: type_name declarator compound_statement
;

%%
#include <stdio.h>
#include <string.h>

extern char yytext[];
extern int column;
extern int yylineno;
extern FILE *yyin;

char *file_name = NULL;

int yyerror (char *s) {
    fflush (stdout);
    fprintf (stderr, "%s:%d:%d: %s\n", file_name, yylineno, column, s);
    return 0;
}


int main (int argc, char *argv[]) {
    FILE *input = NULL;
    if (argc==2) {
    input = fopen (argv[1], "r");
    file_name = strdup (argv[1]);
    if (input) {
        yyin = input;
    }
    else {
      fprintf (stderr, "%s: Could not open %s\n", *argv, argv[1]);
        return 1;
    }
    }
    else {
    fprintf (stderr, "%s: error: no input file\n", *argv);
    return 1;
    }
    yyparse ();
    free (file_name);
    return 0;
}

scanner.l

%{
#include <stdio.h>
#include <string.h>
#include "grammar.tab.h"
void count();
int comment ();
int check_type ();
%}
D[0-9]
L[a-zA-Z_]
%option yylineno
%%
"/*"        { comment(); }
"//".*          { count(); }
"float"     { count(); return(FLOAT); }
"if"        { count(); return(IF); }
"else"      { count(); return(ELSE); }
"int"       { count(); return(INT); }
"return"    { count(); return(RETURN); }
"void"      { count(); return(VOID); }
"while"     { count(); return(WHILE); }
"for"       { count(); return(FOR); }
[$]?{L}({L}|{D})*   { count(); yylval.str=yytext; return(IDENTIFIER); printf("recognize identifier");}
{D}+        { count(); yylval.integer=atoi(yytext); return(CONSTANTI); }
{D}+"."{D}*     { count(); yylval.flt=atof(yytext); return(CONSTANTF); }
"("         {count();return(LEFT_BRACKET);}
")"         {count();return(RIGHT_BRACKET);}
"="         {count();return(EQ);}
"+="        { count(); return(ADD_ASSIGN); }
"-="        { count(); return(SUB_ASSIGN); }
"*="        { count(); return(MUL_ASSIGN); }
"++"        { count(); return(INC_OP); }
"--"        { count(); return(DEC_OP); }
"<="        { count(); return(LE_OP); }
">="        { count(); return(GE_OP); }
"=="        { count(); return(EQ_OP); }
"!="        { count(); return(NE_OP); }
";"     { count(); return(';'); }
"{"         { count(); return('{'); }
"}"         { count(); return('}'); }
","     { count(); return(','); }
"/"     { count(); return('/'); }
"["         { count(); return('['); }
"]"         { count(); return(']'); }
"."     { count(); return('.'); }
"!"     { count(); return('!'); }
"-"     { count(); return('-'); }
"+"     { count(); return('+'); }
"*"     { count(); return('*'); }
"<"     { count(); return('<'); }
">"     { count(); return('>'); }
[ \t\v\n\f] { count(); }
.       { /* ignore bad characters */ }

%%
int yywrap() {
    return 1;
}

int comment() {
    char c, c1;

loop:
    while ((c = input()) != '*' && c != 0);
    if ((c1 = input()) != '/' && c != 0) {
        unput(c1);
        goto loop;
    }
    return 0;
}

int column = 0;

void count() {
    int i;
    for (i = 0; yytext[i] != '\0'; i++) {
        if (yytext[i] == '\n')
            column = 0;
        else if (yytext[i] == '\t')
            column += 8 - (column % 8);
        else
            column++;
    }
}

And the Makefile:

LEX=lex
YACC=yacc
CFLAGS=-Wall
CC=gcc

all:parse

parse:grammar.c scanner.c
    $(CC) $(CFLAGS) -o $@ $^

grammar.c:grammar.y
    $(YACC) -d -o $@ --defines=grammar.tab.h $^

%.c:%.l
    $(LEX) -o $@ $^

clean:
    rm -f grammar.c scanner.c

==============================================================================

The problem is that: When I parse an input file like:

int a;
int fhu;
float fs;

int drive(float te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}

I got the following output:

declare an identifer: a
declare an identifer: fhu
declare an identifer: fs
declare an identifer: drive
declare an identifer: te
declare an identifer: b
use identifier: b=, length is 2
use constant Int: 1
use identifier: fhu =, length is 5
use identifier: fs), length is 3
param methodCall identifier is: drive(fs)
use identifier: fs =, length is 4
use constant Float: 0.400000
use constant Int: 0

I got quite confused that why "b=","fhu =", "fs)" are recognized with the unexpected character '=', ' =', and ')'. You can see that in the declaration statements, identifiers are correctly recognized, but not in the primary statements.

Why they use the same lexical rules but produce different results?

Is there anyone could help me on this problem? I am new to yacc and any suggestions may be helpful and welcomed! Please, thanks!

هل كانت مفيدة؟

المحلول

As I noted in my first comment, the problem is related to not storing a copy of the string returned by the lexical scanner. I was able to reproduce the problem pretty much as described (working on Mac OS X 10.9.1 Mavericks rather than Ubuntu, so it isn't platform specific).

The 'trivial' fix is:

[$]?{L}({L}|{D})*   { count(); yylval.str=strdup(yytext); printf("recognize identifier (%s)\n", yylval.str); return(IDENTIFIER);}

Three changes in there:

  1. Move the printf() before the return so it is executed.
  2. Have the printf() print the identifier string.
  3. The critical one: use strdup() to make a copy of the string!

The word 'trivial' is in quotes (twice) because the immediate issue with allocating memory is 'where is it released' and the current answer is 'nowhere — until the program exits', and that is unlikely to be a suitable long-term solution. So, you will need to review how you use the identifiers returned by the IDENTIFIER token type, ensuring that the memory is released. But this will get you back on track.

Output from running parse on your sample file:

recognize identifier (a)
declare an identifer: a
recognize identifier (fhu)
declare an identifer: fhu
recognize identifier (fs)
declare an identifer: fs
recognize identifier (drive)
declare an identifer: drive
recognize identifier (te)
declare an identifer: te
recognize identifier (b)
declare an identifer: b
recognize identifier (b)
use identifier: b, length is 1
use constant Int: 1
recognize identifier (fhu)
use identifier: fhu, length is 3
recognize identifier (drive)
recognize identifier (fs)
use identifier: fs, length is 2
param methodCall identifier is: drive
recognize identifier (fs)
use identifier: fs, length is 2
use constant Float: 0.400000
use constant Int: 0

The last thing I want to know is why, when the code directly assigns yytext to yylval.str, does the grammar parser get "b=" rather than "b"? How does yytext change when doing the grammar parsing?

Try the following addition to your code — specifically, the functions push_identifier() and dump_identifiers() in grammary.y — and use both the 'with strdup()' and 'without strdup()' versions of scanner.l.

%{
#include <stdio.h>
#include <string.h>
extern int yylineno;
int yylex(void);
int yyerror(char *str);
static void push_identifier(char *str);
//extern char* yytext;

%}

%expect 1
%union{
    int integer;
    float flt;
    char *str;
}

%token <str> IDENTIFIER 
%token <flt> CONSTANTF
%token <integer> CONSTANTI
%token LEFT_BRACKET RIGHT_BRACKET
%token EQ INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP

%token SUB_ASSIGN MUL_ASSIGN ADD_ASSIGN
%token TYPE_NAME
%token INT FLOAT VOID
%token IF ELSE WHILE RETURN FOR
%start program

%%

primary_expression
    : IDENTIFIER    {printf("use identifier: %s, length is %zu\n", $1, strlen($1)); push_identifier($1);}
    | CONSTANTI     {printf("use constant Int: %d\n", $1);}
    | CONSTANTF     {printf("use constant Float: %f\n", $1);}
    | LEFT_BRACKET expression RIGHT_BRACKET
    | IDENTIFIER LEFT_BRACKET RIGHT_BRACKET {printf("non-param methodCall identifier is: %s\n", $1); push_identifier($1);}
    | IDENTIFIER LEFT_BRACKET argument_expression_list RIGHT_BRACKET    {printf("param methodCall identifier is: %s\n", $1); push_identifier($1);}
    | IDENTIFIER INC_OP { push_identifier($1); }
    | IDENTIFIER DEC_OP { push_identifier($1); }
    ;

postfix_expression
    : primary_expression
    | postfix_expression '[' expression ']'
    ;

argument_expression_list
    : expression
    | argument_expression_list ',' expression
    ;

unary_expression
    : postfix_expression
    | INC_OP unary_expression
    | DEC_OP unary_expression
    | unary_operator unary_expression
    ;

unary_operator
    : '-'
    ;

multiplicative_expression
    : unary_expression
    | multiplicative_expression '*' unary_expression
    | multiplicative_expression '/' unary_expression
    ;

additive_expression
    : multiplicative_expression
    | additive_expression '+' multiplicative_expression
    | additive_expression '-' multiplicative_expression
    ;

comparison_expression
    : additive_expression
    | additive_expression '<' additive_expression
    | additive_expression '>' additive_expression
    | additive_expression LE_OP additive_expression
    | additive_expression GE_OP additive_expression
    | additive_expression EQ_OP additive_expression
    | additive_expression NE_OP additive_expression
    ;

expression
    : unary_expression assignment_operator comparison_expression
    | comparison_expression
    ;

assignment_operator
    : EQ
    | MUL_ASSIGN
    | ADD_ASSIGN
    | SUB_ASSIGN
    ;

declaration
    : type_name declarator_list ';'
    ;

declarator_list
    : declarator
    | declarator_list ',' declarator
    ;

type_name
    : VOID  
    | INT   
    | FLOAT
    ;

declarator
    : IDENTIFIER    {printf("declare an identifer: %s\n",$1); push_identifier($1); }
    | LEFT_BRACKET declarator RIGHT_BRACKET
    | declarator '[' CONSTANTI ']'
    | declarator '[' ']'
    | declarator LEFT_BRACKET parameter_list RIGHT_BRACKET
    | declarator LEFT_BRACKET RIGHT_BRACKET
    ;

parameter_list
    : parameter_declaration
    | parameter_list ',' parameter_declaration
    ;

parameter_declaration
    : type_name declarator
    ;

statement
    : compound_statement
    | expression_statement 
    | selection_statement
    | iteration_statement
    | jump_statement
    ;

compound_statement
    : '{' '}'
    | '{' statement_list '}'
    | '{' declaration_list statement_list '}'
    ;

declaration_list
    : declaration
    | declaration_list declaration
    ;

statement_list
    : statement
    | statement_list statement
    ;

expression_statement
    : ';'
    | expression ';'
    ;

selection_statement
    : IF '(' expression ')' statement
    | IF '(' expression ')' statement ELSE statement
    | FOR '(' expression_statement expression_statement expression ')' statement
    ;

iteration_statement
    : WHILE '(' expression ')' statement
    ;

jump_statement
    : RETURN ';'
    | RETURN expression ';'
    ;

program
    : external_declaration
    | program external_declaration
    ;

external_declaration
    : function_definition
    | declaration
    ;

function_definition
    : type_name declarator compound_statement
    ;

%%

#include <stdio.h>
#include <string.h>

extern char yytext[];
extern int column;
extern int yylineno;
extern FILE *yyin;

char *file_name = NULL;

int yyerror(char *s)
{
    fflush(stdout);
    fprintf(stderr, "%s:%d:%d: %s\n", file_name, yylineno, column, s);
    return 0;
}

static char *list[20];
static int   sp = 0;
static void push_identifier(char *str)
{
    list[sp++] = str;  // Appalling lack of error checking - not fit for production
}

static void dump_identifiers(void)
{
    printf("Identifiers (%d):\n", sp);
    for (int i = 0; i < sp; i++)
        printf("[%2d] = <<%s>>\n", i, list[i]);
}

int main(int argc, char *argv[])
{
    FILE *input = NULL;
    if (argc == 2)
    {
        input = fopen(argv[1], "r");
        file_name = strdup(argv[1]);
        if (input)
        {
            yyin = input;
        }
        else
        {
            fprintf(stderr, "%s: Could not open %s\n", *argv, argv[1]);
            return 1;
        }
    }
    else
    {
        fprintf(stderr, "%s: error: no input file\n", *argv);
        return 1;
    }
    yyparse();
    dump_identifiers();
    free(file_name);
    return 0;
}

Using the 'with strdup()' version, the output ends:

use constant Int: 0
Identifiers (11):
[ 0] = <<a>>
[ 1] = <<fhu>>
[ 2] = <<fs>>
[ 3] = <<drive>>
[ 4] = <<te>>
[ 5] = <<b>>
[ 6] = <<b>>
[ 7] = <<fhu>>
[ 8] = <<fs>>
[ 9] = <<drive>>
[10] = <<fs>>

Using the 'without strdup()' version, the output ends:

use constant Int: 0
Identifiers (11):
[ 0] = <<a;
int fhu;
float fs;

int drive(float te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 1] = <<fhu;
float fs;

int drive(float te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 2] = <<fs;

int drive(float te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 3] = <<drive(float te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 4] = <<te){
    int b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 5] = <<b;
    b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 6] = <<b=1;
    fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 7] = <<fhu = drive(fs);
    fs = 0.4;
    return 0;
}
>>
[ 8] = <<fs);
    fs = 0.4;
    return 0;
}
>>
[ 9] = <<drive(fs);
    fs = 0.4;
    return 0;
}
>>
[10] = <<fs = 0.4;
    return 0;
}
>>

In other words, the code in the scanner is reusing the buffer that yytext points to for its own purposes. This is what happens at the end; I'm not sure what is going on while the code is being parsed — you'd need to include a call to dump_identifiers() every time push_identifier() is called. Printing addresses in push_identifier() and in dump_identifiers() might also be illuminating.

مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top