سؤال

I have a lex file , a yacc file and main.cpp file.

My main.cpp looks like

int main(int argc, char **argv)
{
    if (argc == 1)
    {   int token;
        curr_filename = "<stdin>";
        yyin = stdin;
        yyparse();
    }
    else
    {
        for (int i = 1; i < argc; ++i)
        {
            curr_filename = argv[i];
            yyin = std::fopen(argv[i], "r");

            if (yyin)
            {    

                yyparse();  

                std::fclose(yyin);
            }
            else
            {
                utility::print_error(argv[i], "cannot be opened");
            }
        }
    }

    if (yynerrs > 0)
    {
        std::cerr << "Compilation halted due to lexical or syntax errors.\n";
        exit(1);
    }

This is helping to do a parse .But now i want to print token generated from the lex file also. So i do a little amendment to it by calling yylex() as follows

    int main(int argc, char **argv)
    {
        if (argc == 1)
        {   int token;
            curr_filename = "<stdin>";

       yyin = stdin;
// calling yylex to get token 
     while(token= yylex())
     {
        switch(token){
        case 258 : 
        std::cout << "class" ;
        default : 
        std::cout << "token " ;

                 }


            yyparse();
        }
//rest of the code same

but nothing is getting printed to output.

Any help how to get tokens printed on standard output or file

flex file

%option noyywrap
%option yylineno

%{

#include "flexbison.hpp"
#include "tokentable.hpp"
#include "symboltable.hpp"
#include "y.tab.h"
#include <stdio.h>

#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno; 

static const int MAX_STR_CONST = 1025;

char string_buf[MAX_STR_CONST];  // buffer to store string contstants encountered in source file
char *string_buf_ptr;



int num_comment = 0;      // count to keep track how many opening comment tokens have been encountered
std::size_t curr_lineno = 0;      // keep track of current line number of source file
bool str_too_long = false;   // used to handle string constant size error check


%}

%x COMMENT
%x LINECOMMENT
%x STRING

DARROW =>

%%

"(*" {
    BEGIN(COMMENT);
    num_comment++;
}

"*)" {
    if (num_comment <= 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }
}

<COMMENT>"*)" {
    num_comment--;
    if (num_comment < 0) {
        yylval.error_msg = "Unmatched *)";
        return ERROR;
    }

    if (num_comment == 0) {
        BEGIN(INITIAL);
    }
}

<COMMENT>"(*" {
    num_comment++;
}

<COMMENT>[^\n] {
    // eat everything within comments
}

<COMMENT>\n {
    ++curr_lineno;
}

"--"[^\n]* {
    BEGIN(LINECOMMENT);
}

<LINECOMMENT>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
}

<COMMENT><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in comment";
    return ERROR;
}

"=>" {
    return DARROW; 
}

(?i:class) {
    return CLASS;
}

(?i:else) {
    return ELSE;
}

(?i:in) {
    return IN;
}

(?i:then) {
    return THEN;
}

(?i:fi) {
    return FI;
}

(?i:if) {
    return IF;
}

(?i:inherits) {
    return INHERITS;
}

(?i:let) {
    return LET;
}

(?i:loop) {
    return LOOP;
}

(?i:pool) {
    return POOL;
}

(?i:while) {
    return WHILE;
}

(?i:case) {
    return CASE;
}

(?i:esac) {
    return ESAC;
}

(?i:of) {
    return OF;
}

(?i:new) {
    return NEW;
}

(?i:isvoid) {
    return ISVOID;
}

(?i:not) {
    return NOT;
}

t(?i:rue) {
    yylval.boolean = true;
    return BOOL_CONST;
}

f(?i:alse) {
    yylval.boolean = false;
    return BOOL_CONST;
}

[0-9]+ {
    yylval.symbol = inttable().add(yytext);
    return INT_CONST;
}

"<=" {
    return LE;
}

"<-" {
    return ASSIGN;
}


[A-Z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return TYPEID;
}


[a-z][a-zA-Z0-9_]* {
    yylval.symbol = idtable().add(yytext);
    return OBJECTID;
}

";"|","|"{"|"}"|":"|"("|")"|"+"|"-"|"*"|"/"|"="|"~"|"<"|"."|"@" { 
    return *yytext;
}

\n {
    ++curr_lineno;
}

[ \f\r\t\v] {
    // eat whitespace
}

 /*
  *  String constants (C syntax)
  *  Escape sequence \c is accepted for all characters c. Except for 
  *  \n \t \b \f, the result is c.
  *
  */

\" {
    BEGIN(STRING);
    string_buf_ptr = string_buf;
    memset(string_buf, 0, MAX_STR_CONST);
}

<STRING>\" {
    BEGIN(INITIAL);
    yylval.symbol = stringtable().add(string_buf);
    return STR_CONST;
}

<STRING>\0[^\n]*\" {
    BEGIN(INITIAL);
    if (str_too_long) {
        str_too_long = false;
    }
    else {
        yylval.error_msg = "String contains null character";
        return ERROR;
    }
}

<STRING>\0[^"]*\n {
    if (str_too_long) {
        yyinput(); /* eat quote */
        BEGIN(INITIAL);
        str_too_long = false;
    }
    else {
        if (yytext[yyleng - 1] != '\\') {
            BEGIN(INITIAL);
            yylval.error_msg = "String contains null character";
            return ERROR;
        }
    }
}

<STRING><<EOF>> {
    BEGIN(INITIAL);
    yylval.error_msg = "EOF in string constant";
    return ERROR;
}

<STRING>\\ {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    char ahead = yyinput();
    switch (ahead) {
        case 'b':
            *string_buf_ptr++ = '\b';
            break;
        case 't':
            *string_buf_ptr++ = '\t';
            break;
        case 'n':
            *string_buf_ptr++ = '\n';
            break;
        case 'f':
            *string_buf_ptr++ = '\f';
            break;
        case '\n':
            ++curr_lineno;
            *string_buf_ptr++ = '\n';
            break;
        case '\0':
            unput(ahead);
            break;
        default:
            *string_buf_ptr++ = ahead;
    }
}

<STRING>\n {
    ++curr_lineno;
    BEGIN(INITIAL);
    yylval.error_msg = "Unterminated string constant";
    return ERROR;
}

<STRING>. {
    if (strlen(string_buf) >= MAX_STR_CONST - 1) {
        str_too_long = true;
        unput('\0');
        yylval.error_msg = "String constant too long";
        return ERROR;
    }

    *string_buf_ptr++ = *yytext;
}

. /* error for invalid tokens */ {
    yylval.error_msg = std::string(yytext) + " is not a valid character in the current context.";
    return ERROR;
}

%%

bison file

%{

#include "flexbison.hpp"
#include "symboltable.hpp"
#include "tokentable.hpp"
#include "ast.hpp"

#include <iostream>

// convinience function for setting location of each ast node
#define SETLOC(lval,node) (lval)->setloc((node).first_line, curr_filename)

// both defined in main.cpp
extern ProgramPtr ast_root;
extern std::string curr_filename;

// both defined in lexer
extern int yylex();
extern int yylineno;

void yyerror(char *);        
%}

%token CLASS 258 ELSE 259 FI 260 IF 261 IN 262 
%token INHERITS 263 LET 264 LOOP 265 POOL 266 THEN 267 WHILE 268
%token CASE 269 ESAC 270 OF 271 DARROW 272 NEW 273 ISVOID 274
%token <symbol>  STR_CONST 275 INT_CONST 276 
%token <boolean> BOOL_CONST 277
%token <symbol>  TYPEID 278 OBJECTID 279 
%token ASSIGN 280 NOT 281 LE 282 ERROR 283

%type <program> program
%type <clazz> class
%type <classes> class_list
%type <attribute> attribute
%type <attributes> attribute_list
%type <method> method
%type <methods> method_list
%type <expression> expression
%type <expression> let_expr 
%type <expressions> expression_list
%type <expressions> method_expr_list
%type <formal> formal
%type <formals> formal_list
%type <branch> case
%type <cases> case_list

%nonassoc '='
%left LET
%right ASSIGN
%left NOT
%left '+' '-'
%left '*' '/' 
%left ISVOID
%left '~'
%left '@'
%left '.'
%nonassoc LE '<'

%%
program : class_list    { @$ = @1; ast_root = std::make_shared<Program>($1); }
;

class_list : class { $$ = Classes(); $$.push_back($1); }
            | class_list class { $$.push_back($2); }
;

class : CLASS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, idtable().add("Object"), $4, $5); SETLOC($$, @1); }
        | CLASS TYPEID INHERITS TYPEID '{' attribute_list method_list '}' ';' { $$ = std::make_shared<Class>($2, $4, $6, $7); SETLOC($$, @1); }
        | error ';' { yyerrok; } 
;

attribute_list : attribute ';' { $$ = Attributes(); $$.push_back($1); }
               | attribute_list attribute ';' { $$.push_back($2); }
               | error ';' { yyerrok; }
;

attribute : OBJECTID ':' TYPEID { $$ = std::make_shared<Attribute>($1, $3, std::make_shared<NoExpr>()); SETLOC($$, @1); }
          | OBJECTID ':' TYPEID ASSIGN expression { $$ = std::make_shared<Attribute>($1, $3, $5); SETLOC($$, @5); }
;

method_list : method ';' { $$ = Methods(); $$.push_back($1); }
            | method_list method ';' { $$.push_back($2); }
            | error ';' { yyerrok; }
;

method : OBJECTID '(' formal_list ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $6, $3, $8); SETLOC($$, @1); }
       | OBJECTID '(' ')' ':' TYPEID '{' expression '}' { $$ = std::make_shared<Method>($1, $5, Formals(), $7); SETLOC($$, @1); }
;

formal_list : formal { $$ = Formals(); $$.push_back($1); }
            | formal_list ',' formal { $$.push_back($3); } 
;

formal : OBJECTID ':' TYPEID { $$ = std::make_shared<Formal>($1, $3); SETLOC($$, @1); }
;

case_list : case { $$ = Cases(); $$.push_back($1); }
            | case_list case { $$.push_back($2); }
;

case : OBJECTID ':' TYPEID DARROW expression ';' { $$ = std::make_shared<CaseBranch>($1, $3, $5); SETLOC($$, @5); }
;

method_expr_list : expression { $$ = Expressions(); $$.push_back($1); }
                    | method_expr_list ',' expression { $$.push_back($3); }
;

expression_list : expression ';' { $$ = Expressions(); $$.push_back($1); }
                | expression_list expression ';' { $$.push_back($2); }
                | error ';' { yyerrok; }
;

let_expr : OBJECTID ':' TYPEID IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression IN expression %prec LET { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ',' let_expr { $$ = std::make_shared<Let>($1, $3, std::make_shared<NoExpr>(), $5); SETLOC($$, @5); }
            | OBJECTID ':' TYPEID ASSIGN expression ',' let_expr { $$ = std::make_shared<Let>($1, $3, $5, $7); SETLOC($$, @4); }
            | error ',' let_expr { yyerrok; }
;


expression : OBJECTID ASSIGN expression { $$ = std::make_shared<Assign>($1, $3); SETLOC($$, @3); }
            | expression '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, $5); SETLOC($$, @1); }
            | expression '.' OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>($1, $3, Expressions()); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, $7); SETLOC($$, @1); }
            | expression '@' TYPEID '.' OBJECTID '(' ')' { $$ = std::make_shared<StaticDispatch>($1, $3, $5, Expressions()); SETLOC($$, @1);}
            | OBJECTID '(' method_expr_list ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, $3); 
                                                  SETLOC($$, @1); } 
            | OBJECTID '(' ')' { $$ = std::make_shared<DynamicDispatch>(std::make_shared<Object>(idtable().add("self")), $1, Expressions()); 
                                 SETLOC($$, @1); } 
            | IF expression THEN expression ELSE expression FI { $$ = std::make_shared<If>($2, $4, $6); SETLOC($$, @2); }
            | WHILE expression LOOP expression POOL { $$ = std::make_shared<While>($2, $4); SETLOC($$, @2); }
            | '{' expression_list '}' { $$ = std::make_shared<Block>($2); SETLOC($$, @2); }
            | LET let_expr { $$ = $2; SETLOC($$, @2); }
            | CASE expression OF case_list ESAC { $$ = std::make_shared<Case>($2, $4); SETLOC($$, @2); }
            | NEW TYPEID { $$ = std::make_shared<New>($2); SETLOC($$, @2); }
            | ISVOID expression { $$ = std::make_shared<IsVoid>($2); SETLOC($$, @2); }
            | expression '+' expression { $$ = std::make_shared<Plus>($1, $3); SETLOC($$, @1); }
            | expression '-' expression { $$ = std::make_shared<Sub>($1, $3); SETLOC($$, @1); }
            | expression '*' expression { $$ = std::make_shared<Mul>($1, $3); SETLOC($$, @1); }
            | expression '/' expression { $$ = std::make_shared<Div>($1, $3); SETLOC($$, @1); }
            | '~' expression { $$ = std::make_shared<Complement>($2); SETLOC($$, @2); }
            | expression '<' expression { $$ = std::make_shared<LessThan>($1, $3); SETLOC($$, @1); }
            | expression LE expression { $$ = std::make_shared<LessThanEqualTo>($1, $3); SETLOC($$, @1); }
            | expression '=' expression { $$ = std::make_shared<EqualTo>($1, $3); SETLOC($$, @1); }
            | NOT expression { $$ = std::make_shared<Not>($2); SETLOC($$, @2); }
            | '(' expression ')' { $$ = $2; SETLOC($$, @2); } 
            | OBJECTID { $$ = std::make_shared<Object>($1); SETLOC($$, @1); }
            | INT_CONST { $$ = std::make_shared<IntConst>($1); SETLOC($$, @1); }
            | STR_CONST { $$ = std::make_shared<StringConst>($1); SETLOC($$, @1); }
            | BOOL_CONST { $$ = std::make_shared<BoolConst>($1); SETLOC($$, @1); } 
;

%%

// utility function for converting bison tokens to its string representation
// for better error reporting
std::string convert_token(int token)
{
    std::string rep;

    switch (token)
    {
        case CLASS: rep = "class"; break;
        case ELSE: rep = "else"; break;
        case FI: rep = "fi"; break;
        case IF: rep = "if"; break;
        case IN: rep = "in"; break;
        case INHERITS: rep = "inherits"; break;
        case LET: rep = "let"; break;
        case LOOP: rep = "loop"; break;
        case POOL: rep = "pool"; break;
        case THEN: rep = "then"; break;
        case WHILE: rep = "while"; break;
        case CASE: rep = "case"; break;
        case ESAC: rep = "esac"; break;
        case OF: rep = "of"; break;
        case DARROW: rep = "=>"; break;
        case NEW: rep = "new"; break;
        case ISVOID: rep = "isvoid"; break;
        case ASSIGN: rep = "<-"; break;
        case NOT: rep = "not"; break;
        case LE: rep = "<="; break;
        case STR_CONST: rep = "STR_CONST = " + yylval.symbol.get_val(); break;
        case INT_CONST: rep = "INT_CONST = " + yylval.symbol.get_val(); break;
        case BOOL_CONST: rep = "BOOL_CONST = " + yylval.boolean; break;
        case TYPEID: rep = "TYPEID = " + yylval.symbol.get_val(); break;
        case OBJECTID: rep = "OBJECTID = " + yylval.symbol.get_val(); break;
        default: rep = (char) token;
    }     

    return rep;
}

void yyerror(char *)
{
    if (yylval.error_msg.length() <= 0)
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " <<  "syntax error near or at character or token '" << convert_token(yychar) << "'\n";
    else
        std::cerr << curr_filename << ":" << yylineno << ": " << "error: " << yylval.error_msg << "\n";
}
هل كانت مفيدة؟

المحلول

I'm not sure why you don't see any output, but I didn't look through all that code. If you call yylex from main, that will read and effectively discard one token. Then when you call yyparse, yyparse will call yylex itself until yylex returns 0. Presumably (but not certainly), the next time you call yylex from the while loop in main, it will again return 0 and the loop will end. The result should be that one word is printed from the while loop, followed by whatever output is produced by your yyparse (if any), which will possibly signal a syntax error since it never sees the first token from the input.

I doubt that is what you wanted to do, but it's not totally clear.

If you want to see the tokens as they are being lexed, then insert the statement to print the token in each lex action. Or tell flex to call the scanning function something else, like yylex_internal and create your own function called yylex() which calls yylex_internal and then prints the result before returning it.

If, as seems likely, you are only interested in this for debugging purposes, then you'd probably be better off using the -d command line option to flex, which will generate debugging output automatically. It might not be exactly the debugging format you want, but it's a lot easier to do and undo :)

To change the name of the yylex function generated by flex, insert something like the following in the code block at the top of the .l file:

#define YY_DECL int yylex_internal()

The flex-generated file declares the scanning function as follows:

YY_DECL {
  /* body of function
}

So you can rename the function or add arguments, or even change the return type by defining the YY_DECL macro. See the Generated Scanner section of the flex manual.

By the way, it's not generally considered good style to manually number all the terminal tokens, even though bison allows you to do it. You should just let bison number them, and include the definitions in a source file by #include "y.tab.h" (or whatever you've called the bison header file; you can easily change the name by using the -o option).

مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top