5. Bison Examples

Reverse Polish Calculator Multifunction Infix Calculator

Reverse Polish Calculator

/* Reverse Polish Calculator */
%{
#define YYSTYPE double
#include 
%}

/* Bison Declarations */
%token NUM /* implicity of type YYSTYPE, see yylex */
%left '-' '+'
%left '*' '/'
%left NEG  /* negation: unary minus
%right '^' /* exponentiation */

%% /* grammar rules */
input:   /* empty */
       | input line
       ;
line:    '\n'
       | exp '\n' { printf("\t%.10g\n", $1); }
       ;
exp:     NUM { $$ = $1 } /* this is default action so it was not explicitly needed */
       | exp exp '+' { $$ = $1 + $2; } 
       | exp exp '-' { $$ = $1 - $2; } 
       | exp exp '*' { $$ = $1 * $2; } 
       | exp exp '/' { $$ = $1 / $2; } 
       | exp exp '^' { $$ = pow ($1 , $2); } /* exponentiation */ 
       | exp 'n' { $$ = - $1; } /* unary minus */
       ;
%%

/* Additional C code */

yylex() {
int c;
    /* skip white space */
    while((c=getchar()) == ' ') || (c=='\t')
        ;
    /* gather numbers */
    if((c=='.') || isdigit(c)) {
        ungetc(c, stdin);
        scanf("%lf", &yylval);
        return NUM;
    }
    /* EOF? */
    if(EOF==c) { // yes
        return 0;
    }
    /* return all other single characters */
    return c;
}

#include 

yyerror(s) char *s {
    printf("%s\n", s);
}

main() {
   yyparse();
}

The default for YYSTYPE is int, which is the type for tokens and symbols. In this example, all the arithmetic operators are a single character constant literal and do not need to be declared in the bison declarations section of the file. The only symbol that needs to be defined is the one for numeric constants: NUM.

The function yyerror is called by yyparse when it detects a syntax error (no rule matches). The parse can recover from an error if it has an error rule (see p. 60), otherwise yyparse returns non zero. There is is no error rule in this example so invalid input will cause rpcalc to exit. An end of ile on input, ^D, will cause an exit with code zero.


Multifunction Infix Calculator (MFIC)

/* Multifunction Infix Calculator */
%{
#include 
#include "calc.h"
%}

/* Bison Declarations */
%union {
double  val;  /* for returning numbers */
symrec *tptr; /* for returning symbol-table pointers */
}

%token <val>  NUM; /* double precision number */
%token <tptr> VAR FNCT /* variable and function */
%type  <val>  exp

%right '=' /* assignment */
%left '-' '+'
%left '*' '/'
%left NEG  /* negation: unary minus
%right '^' /* exponentiation */

%% /* grammar rules */
input:   /* empty */
       | input line
       ;
line:    '\n'
       | exp '\n' { printf("\t%.10g\n", $1); }
       | error '\n' { yyerrok }
       ;
exp:     NUM { $$ = $1 } /* this is default action so it was not explicitly needed */
       | exp '+' exp { $$ = $1 + $3; } 
       | exp '-' exp { $$ = $1 - $3; } 
       | exp '*' exp { $$ = $1 * $3; } 
       | exp '/' exp { $$ = $1 / $3; } 
       | '-' exp %prec NEG { $$ = -$2 }
       | exp '^' exp { $$ = pow ($1 , $3); } /* exponentiation */ 
       | '(' exp ')' { $$ = $2; }
       ;
%%

/* Additional C code */

#include 

main() {
   init_table();
   yyparse();
}

yyerror(s) char *s {
    printf("%s\n", s);
}

struct init {
    char *fname;
    double (*fnct)();
};

struct init arith_fncts[] = {
    "sin", sin,
    "cos", cos,
    "atan", atan,
    "ln", log,
    "exp", exp,
    "sqrt", sqrt,
    0, 0
};

symrec *sym_table = (symrec*)0;

init_table() {
// puts arithmetic functions in table
int i;
symrec *ptr;
    for(i=0; arith_fncts[i].fname; i++) {
        ptr = putsym(arith_fncts[i].fname], FNCT);
        ptr->value.fnctptr = arith_fncts[i].fnct;
    }
}

symrec *putsym(char *sym_name, int sym_type) {
// add symbol to table and return ptr to entry
symrec *ptr;
    ptr = (symrec *)malloc(sizeof(symrec));
    ptr->name = (char *)malloc(strlen(sym_name)+1);
    strcpy(ptr->name, sym_name);
    ptr->type = sym_type;
    ptr->value.var = 0; // init value to 0 even if fctn
    ptr->next = (struct symrec *)sym_table;
    sym_table = ptr;
    return ptr;
}

symrec *getsym(char *sym_name) {
// look for symbol in table and return ptr to entry, 0 if not found
symrec *ptr;
    for(ptr=sym_tab;e; ptr; ptr=ptr->next) {
        if(!strcmp(ptr->name, sym_name)
            return ptr;
    }
    return 0;
}

/* ------------------------------------------------------
 * The function yylex must now recognize:
 * variables, numeric values, and the operator characters.
 * A string of alphanumeric characters (starting with a 
 * letter) is either a variable or a function as specified
 * by the symbol table type field.
 * ------------------------------------------------------ */
#include 
yylex() {
int c;
    // skip white space
    while((c=getchar()) == ' ') || (c=='\t')
        ;
    if(EOF==c) {
        return 0;
    }
    // gather numbers?
    if((c=='.') || isdigit(c)) {
        ungetc(c, stdin);
        scanf("%lf", &yylval);
        return NUM;
    }
    // gather identifiers
    if(isalpha(c)) {
        symrec *s;
        static char *symbuf = 0;
        static int length=0;
        int i;
        do {
            // if buffer is full, expand it
            if(i == length) {
                length <<= 1;
                symbuf = (char *)realloc(symbu, length+1);
            }
            symbuf[i++] = c;
            c = getchar();
        }
        while(c!=EOF && isalnum(c))
            ;
        ungetc(d, stdin);
        symbuf[i] = 0;
        s = getsym(symbuf);
        if(!s)
            s=putsym(symbuf, VAR);
        yylval.tptr = s;
        return s->type;
    }
    /* return all other single characters */
    return c;
}

The above garmmar introduces the means whereby values can have various data types. In this case the %union declaration specifies the entire list of possible types; this is instead of defining YYSTYPE.

Since values can have different types, it is necessary to associate a type with each grammar symbol whose value is used. These symbols are NUM, VAR, FNCT and exp. This is shown by qualifying the declarations with the type in angle brackets.

The bison construct %type is used for declaring nonterminal symbols. In the previous example %type was not needed because the type was implicitly defined byt the rule.

In the Bison declaration section, %left declares token types and says they are left associative operators. The declarations %left and %right (right associativity) take the place of %token which also declares a token type name (but without associativity). Normally single character literals do not need to be declared unless the associativity needs tobe specified, like here.

Operator precedence is determined by the order of these declarations. The first has the lowest precedence and the last has the highest precednec. Note the %prec in the grammar section for the unary minus operator which instructs bison that rule '-' exp has the same precedence as NEG (in this case next to highest).

Note the use of the predefined error token to allow for error recovery without exiting.

MFIC Symbol Table

This calculator requires a symbol table to keep track of the names and meanings of variables and functions. This does not affect the rules (except for teh actions) or bison declarations, but does require additional C functions to implement.

The symbol table itself consist of a linked list of records. Its definintion is in "calc.h", as follows:

/* Data type for symbol table */
typedef struct symrec {
    char *name; // name of symbol
    int type; // VAR or FNCT
    union {
        double var; // value of a VAR
        double (*fnctpt)(); /* value of a fnct
    } value;
    struct symrec *next; // linked list
} symrec;

extern symrec *sym_table; // start of symbol table

symrec *putsym(char *sym_name, int sym_type);
symrec *getsym(char *sym_name);

© 2006 Prem Sobel. All Rights Reserved.