Debugging Bison/Yacc Grammars

Inevitably when you write Bison/Yacc grammars you will run into a grammatical errors. In this case the grammar was my LBNF grammar (Fortran.cf, Fortran.y, Fortran.l) for Fortran and I'm running the terminal.for module from the old Galaxy program through my 'go' script (current files: debugging.7z). Here's the terminal output from my script run. Interspersed with the output from my echo statements are the outputs from the various programs run (including Bison/Yacc):

alan@Midnight$ ./go terminal.for  -d
--- Arguments ---
fpgm='terminal.for'
flag='-d'
extn='for'
fn='terminal'
--- Clean up ---
--- Compiling LBNF grammar with BNFC ---

148 rules accepted

no change to file ./Absyn.h
no change to file ./Absyn.c
writing file ./Fortran.l (saving old file as ./Fortran.l.bak)
writing file ./Fortran.y (saving old file as ./Fortran.y.bak)
writing file ./Parser.h (saving old file as ./Parser.h.bak)
no change to file ./Skeleton.h
no change to file ./Skeleton.c
no change to file ./Printer.h
no change to file ./Printer.c
no change to file ./Test.c
writing file ./Makefile (saving old file as ./Makefile.bak)
--- Cleaning symbols ---
--- Turning on Debug in Makefile ---
--- Makefile ---
    5c5
    < FLEX_OPTS = -PFortran --debug
    ---
    > FLEX_OPTS = -PFortran
    8c8
    < BISON_OPTS = -t -pFortran --debug -r all -g
    ---
    > BISON_OPTS = -t -pFortran
--- Fixup generated BNF Lexical Analyser ---
--- Show differences: Fortran.l ---
    34c34,35
    < "\n"       { ++yy_mylinenumber; return T_NEWLINE; };
    ---
    > "
    > "        return T_NEWLINE;
    91c92,93
    < [ \t\r]+        /* ignore white space. */;
    ---
    > \n ++yy_mylinenumber ;
    > [ \t\r\n\f]        /* ignore white space. */;
--- Making ---
flex -PFortran --debug -oLexer.c Fortran.l
gcc -g -W -Wall -c Lexer.c
Lexer.c:2053:16: warning: ‘input’ defined but not used [-Wunused-function]
     static int input  (void)
                ^~~~~
Lexer.c:2002:17: warning: ‘yyunput’ defined but not used [-Wunused-function]
     static void yyunput (int c, char * yy_bp )
                 ^~~~~~~
bison -t -pFortran --debug -r all -g Fortran.y -o Parser.c
Fortran.y: warning: 2 shift/reduce conflicts [-Wconflicts-sr]
Fortran.y: warning: 12 reduce/reduce conflicts [-Wconflicts-rr]
gcc -g -W -Wall -c Parser.c
gcc -g -W -Wall -c Test.c
Linking TestFortran...
gcc -g -W -Wall Absyn.o Lexer.o Parser.o Printer.o Test.o -o TestFortran
=========================================
--- Cleaning of input Fortran program ---
Counts:
  Comments:         45
  Continuations:     0
--- Compiling input Fortran program ---
--- Output in TestFortran.out ---
--(end of buffer or a NUL)
--accepting rule at line 86 ("// ESCAPE CHARACTER
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//*********  TERMINAL CONTROL ROUTINES  **************
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//  A TERMINAL WITH CURSOR POSITIONING AND CLEAR SCREEN IS REQUIRED
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//  MODIFY GTCHAR, TPOS, AND CLEAR FOR YOUR TERMINAL(S)
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//****************************************************
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//  BY WILLIAM WOOD, SEPTEMBER 1980
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("// Modified by Stuart Renes, WeCo, July 20th, 1981
")
--accepting rule at line 86 ("// to add Vt52 support and make ADM-3A /FT1 type.
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("// Added ADDS 520/580 support on August 14th, 1981 as
")
--accepting rule at line 86 ("// /FT2 type.
")
--accepting rule at line 86 ("//
")
--accepting rule at line 86 ("//   TPOS - PUT CHARS IN BUF TO POSITION CURSOR AT IROW, ICOL
")
--accepting rule at line 86 ("// WPW 9/19/80
")
--accepting rule at line 91 ("      ")
--accepting rule at line 82 ("SUBROUTINE")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("TPOS")
--accepting rule at line 35 ("(")
--accepting rule at line 87 ("IROW")
--accepting rule at line 39 (",")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("ICOL")
--accepting rule at line 37 (")")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 ("      ")
--accepting rule at line 61 ("COMMON")
--accepting rule at line 42 ("/")
--accepting rule at line 87 ("CURSOR")
--accepting rule at line 42 ("/")
--accepting rule at line 87 ("TTYPE")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 ("      ")
--accepting rule at line 74 ("INTEGER")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("TTYPE")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 ("      ")
--accepting rule at line 57 ("BYTE")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("ADMV")
--accepting rule at line 35 ("(")
--accepting rule at line 90 ("2")
--accepting rule at line 37 (")")
--accepting rule at line 39 (",")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("VT100V")
--accepting rule at line 35 ("(")
--accepting rule at line 90 ("2")
--accepting rule at line 37 (")")
--accepting rule at line 39 (",")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("vt52v")
--accepting rule at line 35 ("(")
--accepting rule at line 90 ("2")
--accepting rule at line 37 (")")
--accepting rule at line 39 (",")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("addsv")
--accepting rule at line 35 ("(")
--accepting rule at line 90 ("2")
--accepting rule at line 37 (")")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 ("      ")
--accepting rule at line 77 ("PARAMETER")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("ADM3A")
--accepting rule at line 91 (" ")
--accepting rule at line 40 ("=")
--accepting rule at line 91 (" ")
--accepting rule at line 90 ("1")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 ("      ")
--accepting rule at line 77 ("PARAMETER")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("VT100")
--accepting rule at line 91 (" ")
--accepting rule at line 40 ("=")
--accepting rule at line 91 (" ")
--accepting rule at line 90 ("2")
")accepting rule at line 91 ("
--accepting rule at line 34 ("
")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("parameter")
--accepting rule at line 91 (" ")
--accepting rule at line 87 ("vt52")
error: line 28: syntax error at vt52
alan@Midnight$

The important part for this post are the lines:

bison -t -pFortran --debug -r all -g Fortran.y -o Parser.c
Fortran.y: warning: 2 shift/reduce conflicts [-Wconflicts-sr]
Fortran.y: warning: 12 reduce/reduce conflicts [-Wconflicts-rr]

This tells us there are two types of errors occuring shift/reduce and reduce/reduce which prevents Bison/Yacc from creating a parser. There can be other types of errors, such as gramatical errors, which will only be discovered by thorough testing.

Understanding the errors

To understand exactly what shift/reduce and reduce/reduce errors a compiler course which includes the theory of LR parsers would be handy. However, the quick explanation is that as Bison/Yacc tries to find a complete rule in your grammar it shifts (lexical and rule) tokens onto the stack until it finds the last token in the rule then Bison/Yacc reduces (removes) all the tokens on the stack corresponding to that rule and replaces them with a single token of that rule type. Essentially it is building the AST (Abstract Synatx Tree) representation of the program being compiled.

To solve these types of errors we look at the Parser.output file which contains the output of the run of Bison/Yacc. This file contains a number of sections:

  • Error list
  • Abbreviated grammar
  • Terminal (leaf) nodes in the AST
  • Non-Terminal (internal) nodes in the AST
  • The generated grammar states.

The Error list just lists all the shift/reduce and reduce/reduce errors that need to be fixed before a working parser is generated. The Abbreviated grammar shows the raw grammar without any adornments that go along with the grammar (like your C-code). This abbreviated grammar is usually easier to read once the parser file becomes filled in.

The Terminal (leaf) node list gives a cross reference to the rules in the Abbreviated grammar where that terminal node appears. The first number in each line, in brackets (), is the token ID of the token (the lexer and parser use this number to communicate the token).

The Non-Terminal node list again has a node ID in brackets() and then lists all the grammar rules that contain that node on either the right-hand-side(output) or the left-hand-side(input).

The Generated grammar states are created corresponding to a NFA (Non-deterministic Finite Atomaton) which is like a DFA (Deterministic FA) but will take into account the Bison/Yacc grammar doesn't always have a terminal token at each location. For instance in the following grammar line when the parser tries to parse a 'statement' it has three (in this case) statements that it can follow. In a DFA it needs a terminal-token (e.g. 'a','b','c', ... , '(', ')' etc to make a decision on which rule to follow. What a NFA does is just follows all the possibilities until it finds a terminal token :

statement := dimension_statement | for_statement | assignment_statement

The gist of this is that the generated states in the Parser.output file will look very dissimilar to any one of your rules (usually). They will be, instead, a collection of your rules. Here is the generated state 2 from my grammar:

State 2

    1 Program: ListLblStm .  [$end]
    3 ListLblStm: ListLblStm . LblStm T_NEWLINE
    4 LblStm: . Labeled_stm
    5       | . Simple_stm
    6       | . %empty  [T_NEWLINE]
    7 Labeled_stm: . _INTEGER_ Simple_stm
    8 Simple_stm: . T_IMPL Type_Spec Type_Qual T_LPAREN T_NAME T_MINUS T_NAME T_RPAREN
    9           | . T_PARM ListNameValue
   10           | . T_DIMS ListNameDim
   11           | . Type_Spec Type_Qual ListNameDim
   12           | . Type_Spec ListNameDim
   13           | . T_DATA ListDataSeg
   14           | . T_COMM T_DIV T_NAME T_DIV ListName
   15           | . T_WRITE T_LPAREN ListAssignName T_RPAREN
   16           | . T_WRITE T_LPAREN ListAssignName T_RPAREN ListNameOrArray
   17           | . T_FMT T_LPAREN ListFmtSpecs T_RPAREN
   18           | . T_READ T_LPAREN ListAssignName T_RPAREN ListNameOrArray
   19           | . T_READ T_EQUALS LExp
   20           | . T_IF T_LPAREN LExp T_RPAREN IfThenPart
   21           | . T_NAME T_EQUALS LExp
   22           | . T_NAME T_LPAREN ListLExp T_RPAREN T_EQUALS LExp
   23           | . T_CALL T_NAME T_LPAREN ListSpecLExp T_RPAREN
   24           | . T_CALL T_NAME
   25           | . T_GO T_TO _INTEGER_
   26           | . T_OPEN T_LPAREN ListAssignName T_RPAREN
   27           | . T_CLOSE T_LPAREN ListAssignName T_RPAREN
   28           | . T_DO _INTEGER_ DoRangePart
   29           | . T_STOP
   30           | . T_STOP T_SQSTR
   31           | . T_END
   32           | . T_SUBR T_NAME T_LPAREN ListSpecLExp T_RPAREN
   33           | . T_SUBR T_NAME
   34           | . T_FUNC T_NAME T_LPAREN ListSpecLExp T_RPAREN
   35           | . T_FUNC T_NAME
   36           | . T_CONT
   37           | . T_RTN
   38           | . T_EQU T_LPAREN T_NAME T_COMMA NameOrArrRef T_RPAREN
  143 Type_Spec: . T_INT
  144          | . T_REAL
  145          | . T_DBL
  146          | . T_CHAR
  147          | . T_BYTE
  148          | . T_LOGI

    T_BYTE     shift, and go to state 4
    T_CALL     shift, and go to state 5
    T_CHAR     shift, and go to state 6
    T_CLOSE    shift, and go to state 7
    T_COMM     shift, and go to state 8
    T_CONT     shift, and go to state 9
    T_DATA     shift, and go to state 10
    T_DIMS     shift, and go to state 11
    T_DO       shift, and go to state 12
    T_DBL      shift, and go to state 13
    T_END      shift, and go to state 14
    T_EQU      shift, and go to state 15
    T_FMT      shift, and go to state 16
    T_FUNC     shift, and go to state 17
    T_GO       shift, and go to state 18
    T_IF       shift, and go to state 19
    T_IMPL     shift, and go to state 20
    T_INT      shift, and go to state 21
    T_LOGI     shift, and go to state 22
    T_OPEN     shift, and go to state 23
    T_PARM     shift, and go to state 24
    T_READ     shift, and go to state 25
    T_REAL     shift, and go to state 26
    T_RTN      shift, and go to state 27
    T_STOP     shift, and go to state 28
    T_SUBR     shift, and go to state 29
    T_WRITE    shift, and go to state 30
    T_NAME     shift, and go to state 31
    _INTEGER_  shift, and go to state 32

    T_NEWLINE  reduce using rule 6 (LblStm)
    $default   reduce using rule 1 (Program)

    LblStm       go to state 33
    Labeled_stm  go to state 34
    Simple_stm   go to state 35
    Type_Spec    go to state 36

In the numbered lines at the top corresponding to our grammar the period '.' corresponds to the location in all those rules that is the current position of parsing. Thus in lines 8-32 we are in the Simple_stm: part of our grammar waiting to get the first token of a new statement. If we get a T_WRITE token then two rules (15,16) will correspond. If we then go down to the next section which shows what we do when we get various tokens we see that a T_WRITE leads us to state 30 where we will process the next tokens from a write statement (e.g "WRITE(1, 13) (ISCORE(I), I = 1, 8)" ).

Shift/Reduce and Reduce/Reduce errors

So now lets look at the two types of errors shift/reduce and reduce/reduce errors in my program and go through the debugging process.

The shift/reduce error then is when Bison/Yacc can't decide whether to shift the next token onto the stack (for instance when we are in the middle of a statement) or remove a set of tokens and replace them with a single token (like when a statement has been completely parsed). Here is the top of my Parser.output file:

Terminals unused in grammar

   _ERROR_


State 174 conflicts: 1 shift/reduce
State 178 conflicts: 3 reduce/reduce
State 226 conflicts: 3 reduce/reduce
State 227 conflicts: 3 reduce/reduce
State 228 conflicts: 1 shift/reduce, 3 reduce/reduce

Error "State 174 conflicts: 1 shift/reduce"

First lets look at the "State 174 conflicts: 1 shift/reduce" error. Going down to "State 174" in this file we find:

State 174

   90 LExp: . LExp T_OR LExp2
   91     | . LExp T_AND LExp2
   92     | . LExp2
   93 LExp2: . LExp2 T_EQ LExp3
   94      | . LExp2 T_NE LExp3
   95      | . LExp3
   96 LExp3: . LExp3 T_LT LExp4
   97      | . LExp3 T_GT LExp4
   98      | . LExp3 T_LE LExp4
   99      | . LExp3 T_GE LExp4
  100      | . LExp4
  101 LExp4: . LExp4 T_PLUS LExp5
  102      | . LExp4 T_MINUS LExp5
  103      | . LExp5
  104 LExp5: . LExp5 T_MULT LExp6
  105      | . LExp5 T_DIV LExp6
  106      | . LExp6
  107 LExp6: . Unary_operator LExp7
  108      | . LExp7
  109 LExp8: . LExp5 T_POW LExp8
  110      | . LExp8 T_LPAREN T_RPAREN
  110      | LExp8 T_LPAREN . T_RPAREN
  111      | . LExp8 T_LPAREN ListSpecLExp T_RPAREN
  111      | LExp8 T_LPAREN . ListSpecLExp T_RPAREN
  112      | . LExp9
  113 LExp9: . TIntVar RangePart
  114      | . T_SQSTR
  115      | . LExp10
  118 TIntVar: . _INTEGER_
  119        | . T_TRUE
  120        | . T_FALSE
  121        | . T_NAME
  122        | . T_READ
  125 LExp7: . LExp8
  126 LExp10: . LExp11
  127 LExp11: . T_LPAREN LExp T_RPAREN
  128 Unary_operator: . T_PLUS
  129               | . T_MINUS
  130               | . T_NOT
  131 ListSpecLExp: . SpecLExp
  132             | . SpecLExp T_COMMA ListSpecLExp
  133 SpecLExp: . %empty  [T_RPAREN, T_COMMA]
  134         | . LExp

    T_LPAREN   shift, and go to state 93
    T_MINUS    shift, and go to state 94
    T_RPAREN   shift, and go to state 229
    T_PLUS     shift, and go to state 95
    T_TRUE     shift, and go to state 96
    T_FALSE    shift, and go to state 97
    T_NOT      shift, and go to state 98
    T_READ     shift, and go to state 99
    T_NAME     shift, and go to state 100
    T_SQSTR    shift, and go to state 101
    _INTEGER_  shift, and go to state 102

    T_RPAREN  [reduce using rule 133 (SpecLExp)]
    $default  reduce using rule 133 (SpecLExp)

    LExp            go to state 129
    LExp2           go to state 104
    LExp3           go to state 105
    LExp4           go to state 106
    LExp5           go to state 107
    LExp6           go to state 108
    LExp8           go to state 109
    LExp9           go to state 110
    TIntVar         go to state 111
    LExp7           go to state 112
    LExp10          go to state 113
    LExp11          go to state 114
    Unary_operator  go to state 115
    ListSpecLExp    go to state 230
    SpecLExp        go to state 131

The second section were Bison/Yacc show the SHIFTs and REDUCEs it wants to do you can see two lines that define the problem:

    T_RPAREN   shift, and go to state 229
              ...
    T_RPAREN  [reduce using rule 133 (SpecLExp)]

The parser wants to both SHIFT and REDUCE at this state when it gets a T_RPAREN or ')'. So now we have to go through our grammar (at the top of this state) to determine where we see a period '.' followed directly by a T_RPAREN. There are two places (grammar lines 110 and 133). In line 110 :

  110      | LExp8 T_LPAREN . T_RPAREN
  133 SpecLExp: . %empty  [T_RPAREN, T_COMMA]

This is a good time to go back to our original grammar (Fortran.cf) and look a the code corresponding to these two rules:

Epower.      LExp8 ::= LExp5 "**" LExp8;
Efunk.       LExp8 ::= LExp8 "(" ")";
Efunkpar.    LExp8 ::= LExp8 "(" [SpecLExp] ")";
...
(:[]).   [SpecLExp] ::= SpecLExp ;
(:).     [SpecLExp] ::= SpecLExp "," [SpecLExp];

SpLExpNil. SpecLExp ::= ;
SpLExpNot. SpecLExp ::= LExp;

You will notice that the Efunk. and Efunkpar. rules are almost identical except the second one has a list of SpecLExp tokens. Going further down the grammar file we notice that the SpecLExp can be either NILL (SpLExpNil.) or an LExp (SpLExpNot.). This essentially means there is two ways to have an empty function call in this grammar so the parser doesn't know if it should shift a "SpecLExp" token onto the stack or to reduce the LExp8 "(" ")" token sequence into a LExp8 token.

The solution, that I decided upon (there may be many ways to change the grammar to fix this problem) was to eliminate the 110 rule (Ffunk.). So I did that and recompiled and the "State 174" error disappeared (NOTE: The state names may change over compiles).

Error "State 178 conflicts: 3 reduce/reduce"

The solution to reduce/reduce errors is similar. In this case state 178 shows us:

State 178

  107 LExp6: Unary_operator LExp7 .  [T_NEWLINE, T_MINUS, T_RPAREN, T_MULT, T_COMMA, T_PLUS, T_DIV, T_OR, T_AND, T_EQ, T_NE, T_LT, T_GT, T_LE, T_GE, T_POW]
  108      | LExp7 .  [T_MULT, T_DIV, T_POW]

    T_MULT    reduce using rule 107 (LExp6)
    T_MULT    [reduce using rule 108 (LExp6)]
    T_DIV     reduce using rule 107 (LExp6)
    T_DIV     [reduce using rule 108 (LExp6)]
    T_POW     reduce using rule 107 (LExp6)
    T_POW     [reduce using rule 108 (LExp6)]
    $default  reduce using rule 107 (LExp6)

You will note here that the terminals T_MULT, T_DIV, and T_POW are all mentioned on two REDUCE lines each. These are the 3 reduce/reduce errors for this state. Looking at the BNFC grammar file (Fortran.cf) again we see what the problem is:

Elor.        LExp  ::= LExp ".OR." LExp2;
Eland.       LExp  ::= LExp ".AND." LExp2;
Eeq.         LExp2 ::= LExp2 ".EQ." LExp3;
Eneq.        LExp2 ::= LExp2 ".NE." LExp3;
Elthen.      LExp3 ::= LExp3 ".LT." LExp4;
Egrthen.     LExp3 ::= LExp3 ".GT." LExp4;
Ele.         LExp3 ::= LExp3 ".LE." LExp4;
Ege.         LExp3 ::= LExp3 ".GE." LExp4;
Eplus.       LExp4 ::= LExp4 "+" LExp5;
Eminus.      LExp4 ::= LExp4 "-" LExp5;
Etimes.      LExp5 ::= LExp5 "*" LExp6;
Ediv.        LExp5 ::= LExp5 "/" LExp6;
Epreop.      LExp6 ::= Unary_operator LExp7;
Epower.      LExp8 ::= LExp5 "**" LExp8;
Efunkpar.    LExp8 ::= LExp8 "(" [SpecLExp] ")";
Evar.        LExp9 ::= TIntVar RangePart ;
Estr.        LExp9 ::= SQString ;

ERangeNull. RangePart ::= ;
ERange.     RangePart ::= ":" TIntVar ;

ETInt.       TIntVar ::= Integer;
ETTrue.      TIntVar ::= ".TRUE.";
ETFalse.     TIntVar ::= ".FALSE.";
ETNameVar.   TIntVar ::= Name;
ETRead.      TIntVar ::= "READ";

(:[]).   [LExp] ::= LExp ;
(:).     [LExp] ::= LExp "," [LExp];

_. LExp   ::= LExp2 ;
_. LExp2  ::= LExp3 ;
_. LExp3  ::= LExp4 ;
_. LExp4  ::= LExp5 ;
_. LExp5  ::= LExp6 ;
_. LExp6  ::= LExp7 ;
_. LExp7  ::= LExp8 ;
_. LExp8  ::= LExp9 ;
_. LExp9  ::= LExp10 ;
_. LExp10 ::= LExp11 ;
_. LExp11 ::= "(" LExp ")" ;

OUnaryPlus.   Unary_operator ::= "+" ;
OUnaryMinus.  Unary_operator ::= "-" ;
OUnaryNot.    Unary_operator ::= ".NOT." ;

The chunk at the top of this listing shows the problem (the unary operators '+', '-' and '.NOT.' are the simple reason that this causes a problem in the lines "LExp? ::= LExp5 {*|/|** } ... " where Bison/Yacc doesn't know whether to reduce as a Epreop. token or as a LExp7 token. The overarching problem is not the unary operator it is me being lazy about thinking how logical and arithmetic expressions should be expressed in the grammar ... instead I just mushed everything together.

What this version of the grammar will do is create another type of error (for instance a coding error where logical expression like ".NOT. 6" is valid. This type of error would have to be detected with tests. For now I will just remove the unary operators and will fix the Logical/Arithmetic grammar error later.

Recompiling we see that removing the unary operator does fix the reduce/reduce error but also flags a number of tokens as unused:

Nonterminals useless in grammar

   Unary_operator

Terminals unused in grammar

   _ERROR_
   T_NOT

Rules useless in grammar

  143 Unary_operator: T_PLUS
  144               | T_MINUS
  145               | T_NOT

The rest of the error correction process is similar. In fact, looking at the remaining reduce/reduce and shift/reduce errors points to the same chunk of grammar. Therefore it appears my next job is to correct my grammar laziness.

So until I fix my grammar...

BNFC Quirks

BNFC is a great tool but it has some quirks that have slowed down the process of building a front-end for my fortran2c translator. However, the code generation, especially if you haven't built a compiler before moves you quite a distance forward. Here's what I've found so far...

Position dependent code:

First, in the lexical analyzer (the part that converts from a character stream to a token stream) BNFC is fairly inflexible. In the case of Fortran and other older languages there are position dependent tokens. Some examples come to mind: The comment, the continuation line, label-numbers and the sequence column. These are illustrated in the file segment (Maze.for) below:

C
C   MAZE DESCRIPTION
C
  180   WRITE(6,190) HEIGHT,WIDTH,DEPTH
  190   FORMAT('0',' YOUR MAZE HAS A HEIGHT OF',I5,/,
    1  '             AND A WIDTH OF',I5,/,
    1  '            WITH A DEPTH OF',I5,//,
    2  '  THE DIRECTION COMMANDS FOR MAZE ARE SINGLE LETTERS',/,
    2  '    N(ORTH), U(P),    OR 8 IS UP',/,
    2  '    E(AST) , R(IGHT), OR 6 IS RIGHT',/,
    2  '    S(OUTH), D(OWN),  OR 2 IS DOWN',/,
    2  '    W(EST) , L(EFT),  OR 4 IS LEFT',/,
    2  '    I(N)   ,          OR 9 IS IN TO SCREEN',/,
    2  '    O(UT)  ,          OR 7 IS OUT OF SCREEN',/,

Comments start with a "C" in column one and go to the end-of-line. Continuation lines start at the beginning with a tab or 5 spaces, have a continuation mark (usually 0-9 or '+') then a space and the body of the continuation. Labels are numbers preceded with spaces and ending at column 5.

Unfortunately I don't have a example of code with a sequence column. These were basically 8-digit numbers in columns 73-80. Code including the above continuation/label-number preface to statements was from column 1 to 72 with 73-80 being left over for the sequence number. I believe this sequence number was a holdover from the old punched card days when people would do a 52-card pickup with a program deck (which usually went far beyond 52 cards) and needed a way to sort it back into a program (by machine).

All these things need Flex code that allows for multiple states (i.e. <prefix>, <seqno>, <statement>) which doesn't seem to be a possibility in BNFC.

What I ended up doing was writing a small state-machine program (in C) that converted comments to C-like '//' comments and just joined continuation lines into one long line (since we aren't working on 80-column punch cards anymore). The code below can be compiled with the command (I'm running Ubuntu/linux with the GNU gcc compiler):

g++ -std=c++11 -g fixup.c -o fixup

/*
 * Program to do some preprocessing on a Fortran file to deal with:
 *     "\nC" ==> "\n//"                   -- Comments, and
 *     "[ \t]*\n     [0-9+][ \t]*" ==> "" -- Continuation lines
 *     "[ \t]*\n\t[0-9+][ \t]*"    ==> "" -- Continuation lines
 *
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
   
#define BUF_SZ 1000

#define DEBUG false

char buf[BUF_SZ];
int bufidx=-1;
int state=0;

int CntComments=0;
int CntContinue=0;

void save(int chr){
    buf[++bufidx]=(char)chr;

    if(bufidx==BUF_SZ){
        fprintf(stderr,"ERROR: Buffer Overflow\n");
        exit(1);
    }
}

void unsave(){
    buf[bufidx--]=0;

    if(bufidx<-1){
        fprintf(stderr,"ERROR: Buffer Underflow\n");
        exit(2);
    }
}

void reset(){
    memset(buf,0,BUF_SZ);
    bufidx=-1;
    state=0;
}

void purge(){
    printf("%s",buf);
    reset();
}


void asaprintf( const char * format, ... )
{
    va_list args;
    va_start (args, format);
    if(DEBUG) vprintf (format, args);
    va_end (args);
}

void newstate(int ns){
    state=ns;
    //asaprintf("<%d>",state);
}


int main(int argc,char* argv[]){

    int chr=0;
    int idx=0;

    reset();

    state=2; // Start in state 2 because first line in file may be a comment

    while((chr=getchar())!=EOF){
        asaprintf("%6d) state=%d chr='%c'(0x%02x)\n",idx++,state,chr,chr);
        save(chr);
        if(chr==0){
            reset();
        }else{
            switch(state){
                case 0:
                    switch(chr){
                        case ' ': break;
                        case '\t': break;
                        case '\n': newstate(2); break;
                        default: purge(); break;
                    };
                    break;

                case 2:
                    switch(chr){
                        case ' ': newstate(6); break;
                        case 'C': case 'c':
                            unsave(); purge(); printf("//"); CntComments++; break;
                        case '\t': newstate(10); break;
                        default: purge(); break;
                    };
                    break;
           
                case 6:
                    switch(chr){
                        case ' ': newstate(7); break;
                        default: purge(); break;
                    };
                    break;
           
                case 7:
                    switch(chr){
                        case ' ': newstate(8); break;
                        default: purge(); break;
                    };
                    break;
           
                case 8:
                    switch(chr){
                        case ' ': newstate(9); break;
                        default: purge(); break;
                    };
                    break;
           
                case 9:
                    switch(chr){
                        case ' ': newstate(10); break;
                        default: purge(); break;
                    };
                    break;
           
                case 10:
                    switch(chr){
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                        case '8':
                        case '9':
                        case '+':
                            newstate(11); break;
                        default: purge(); break;
                    };
                    break;
           
                case 11:
                    switch(chr){
                        case ' ': break;
                        case '\t': break;
                        default: reset(); putchar(chr); CntContinue++; break;
                    };
                    break;
           
            }
        }
    }
    printf("\n");

    fprintf(stderr, "Counts:\n");
    fprintf(stderr, "  Comments:      %5d\n",CntComments);
    fprintf(stderr, "  Continuations: %5d\n",CntContinue);
    return(0);
}

The Maze.for program then became (Maze_pp.for):

//
//  MAZE -  USES A VT100 TO WANDER AROUND.
//      THE VT100 MUST HAVE ADVANCED VIDEO OPTION.
//      ANSI VT100 ESCAPE SEQUENCES ARE USED.
//
//  WRITTEN BY DON MCLEAN
//  OF THE MACNEAL-SCHWENDLER CORP.
//
//  THE PURPOSE OF THIS PROGRAM WAS TO
//      1. LEARN SOMETHING ABOUT THE VT100 GRAPHICS.
//      2. KEEP MY KIDS BUSY ON WEEKENDS. WHILE I TRIED
//         TO GET SOMETHING ELSE DONE.
//
//  USE OF THIS PROGRAM FOR ANY PURPOSE OTHER THAN FUN
//  IS PROHIBITED.
//
    IMPLICIT INTEGER*4 (A-Z)
//
//  MAZE DIMENSIONS
//  HMAX AND WMAX SHOULD NOT BE LARGER THAN 22 AND 80 RESP.
//
    PARAMETER HMAX=22, WMAX=80, DMAX=4
//
    DIMENSION SLEEP(2)
//
//  DIMENSION IS HMAX*WMAX*DMAX
    INTEGER*2  EXIT(HMAX*WMAX*DMAX), MAT(HMAX*WMAX*DMAX)
    INTEGER*2  LCOUNT(DMAX)
//
    BYTE CLEAR(2)
//
    CHARACTER*200 INPUT
//
    COMMON /MAZECM/ STARTH,STARTW,STARTD,ENDH,ENDW,ENDD,NOBELL
//
//  CLEAR IS A VT100 RESET
//
    DATA CLEAR / 27, 'c' /
//
//  START - SEE IF AN OLD GAME IS TO BE USED.
//
    WRITE(6,10)
   10   FORMAT(' WELCOME TO MAZE')
//
   20   WRITE(6,30)
   30   FORMAT(' ARE YOU GOING TO PLAY A SAVED GAME? ',$)
    READ(5,40) NC,INPUT
   40   FORMAT(Q,A)
    IF(INDEX(INPUT(1:NC),'Y').NE.0) GO TO 120
    SAVE = 0
//
//  INPUT DIMENSION OF MAZE
//
   50   WRITE(6,60) HMAX
   60   FORMAT(' PLEASE INPUT HEIGHT OF MAZE - DEFAULT = ',I2,' ',$)
    READ(5,40) NC,INPUT
    READ(INPUT,70,ERR=50) HEIGHT
   70   FORMAT(BNI2)
    IF(HEIGHT.EQ.0) HEIGHT=HMAX
    IF(HEIGHT.LT.2) HEIGHT=2
    IF(HEIGHT.GT.HMAX) HEIGHT=HMAX
   80   WRITE(6,90) WMAX
   90   FORMAT(' PLEASE INPUT WIDTH  OF MAZE - DEFAULT = ',I2,' ',$)
    READ(5,40) NC,INPUT
    READ(INPUT,70,ERR=80) WIDTH
    IF(WIDTH.EQ.0) WIDTH = WMAX
    IF(WIDTH.LT.2) WIDTH=2
    IF(WIDTH.GT.WMAX) WIDTH=WMAX
  100   WRITE(6,110)
  110   FORMAT(' PLEASE INPUT DEPTH  OF MAZE - DEFAULT =  1 ',$)
    READ(5,40) NC,INPUT
    READ(INPUT,70,ERR=100) DEPTH
    IF(DEPTH.LE.0) DEPTH = 1
    IF(DEPTH.GT.DMAX) DEPTH = DMAX
    NTERMS = HEIGHT * WIDTH * DEPTH
...

Symbols:

The next problem that encountered were the symbols in the BNFC lexer/parser, _SYMB_43, for example. Yuck! I would have been shot at a code review for that. Here's what the lexer looked like (Fortran.l.bkp):

/* -*- c -*- This FLex file was machine-generated by the BNF converter */
%option noyywrap
%{
#define yylval Fortranlval
#define YY_BUFFER_APPEND Fortran_BUFFER_APPEND
#define YY_BUFFER_RESET Fortran_BUFFER_RESET
#define initialize_lexer Fortran_initialize_lexer
#include <string.h>
#include "Parser.h"
#define YY_BUFFER_LENGTH 4096
extern int yy_mylinenumber ;
char YY_PARSED_STRING[YY_BUFFER_LENGTH];
void YY_BUFFER_APPEND(char *s)
{
  strcat(YY_PARSED_STRING, s); //Do something better here!
}
void YY_BUFFER_RESET(void)
{
  int x;
  for(x = 0; x < YY_BUFFER_LENGTH; x++)
    YY_PARSED_STRING[x] = 0;
}

%}

LETTER [a-zA-Z]
CAPITAL [A-Z]
SMALL [a-z]
DIGIT [0-9]
IDENT [a-zA-Z0-9'_]
%START YYINITIAL COMMENT CHAR CHARESC CHAREND STRING ESCAPED
%%

<YYINITIAL>"
"        return _SYMB_0;
<YYINITIAL>"("           return _SYMB_1;
<YYINITIAL>"-"           return _SYMB_2;
<YYINITIAL>")"           return _SYMB_3;
<YYINITIAL>"*"           return _SYMB_4;
<YYINITIAL>","           return _SYMB_5;
<YYINITIAL>"="           return _SYMB_6;
<YYINITIAL>"+"           return _SYMB_7;
<YYINITIAL>"/"           return _SYMB_8;
<YYINITIAL>"$"           return _SYMB_9;
<YYINITIAL>".OR."        return _SYMB_10;
<YYINITIAL>".AND."           return _SYMB_11;
<YYINITIAL>".EQ."        return _SYMB_12;
<YYINITIAL>".NE."        return _SYMB_13;
<YYINITIAL>".LT."        return _SYMB_14;
<YYINITIAL>".GT."        return _SYMB_15;
<YYINITIAL>".LE."        return _SYMB_16;
<YYINITIAL>".GE."        return _SYMB_17;
<YYINITIAL>"**"          return _SYMB_18;
<YYINITIAL>":"           return _SYMB_19;
<YYINITIAL>".TRUE."          return _SYMB_20;
<YYINITIAL>".FALSE."         return _SYMB_21;
<YYINITIAL>".NOT."           return _SYMB_22;
<YYINITIAL>"BYTE"        return _SYMB_23;
<YYINITIAL>"CALL"        return _SYMB_24;
<YYINITIAL>"CHARACTER"           return _SYMB_25;
<YYINITIAL>"CLOSE"           return _SYMB_26;
<YYINITIAL>"COMMON"          return _SYMB_27;
<YYINITIAL>"CONTINUE"        return _SYMB_28;
<YYINITIAL>"DATA"        return _SYMB_29;
<YYINITIAL>"DIMENSION"           return _SYMB_30;
<YYINITIAL>"DO"          return _SYMB_31;
<YYINITIAL>"DOUBLE"          return _SYMB_32;
<YYINITIAL>"END"         return _SYMB_33;
<YYINITIAL>"EQUIVALENCE"         return _SYMB_34;
<YYINITIAL>"FORMAT"          return _SYMB_35;
<YYINITIAL>"FUNCTION"        return _SYMB_36;
<YYINITIAL>"GO"          return _SYMB_37;
<YYINITIAL>"IF"          return _SYMB_38;
<YYINITIAL>"IMPLICIT"        return _SYMB_39;
<YYINITIAL>"INTEGER"         return _SYMB_40;
<YYINITIAL>"LOGICAL"         return _SYMB_41;
<YYINITIAL>"OPEN"        return _SYMB_42;
<YYINITIAL>"PARAMETER"           return _SYMB_43;
<YYINITIAL>"READ"        return _SYMB_44;
<YYINITIAL>"REAL"        return _SYMB_45;
<YYINITIAL>"RETURN"          return _SYMB_46;
<YYINITIAL>"STOP"        return _SYMB_47;
<YYINITIAL>"SUBROUTINE"          return _SYMB_48;
<YYINITIAL>"TO"          return _SYMB_49;
<YYINITIAL>"WRITE"           return _SYMB_50;

<YYINITIAL>"//"[^\n]*\n     ++yy_mylinenumber;   /* BNFC single-line comment */;
<YYINITIAL>\%*{CAPITAL}({CAPITAL}|{DIGIT}|\$|\_)*        yylval.string_ = strdup(yytext); return _SYMB_51;
<YYINITIAL>'.+'          yylval.string_ = strdup(yytext); return _SYMB_52;
<YYINITIAL>{DIGIT}+\.{DIGIT}+((e|E)\-?{DIGIT}+)?(f|F)|{DIGIT}+(e|E)\-?{DIGIT}+(f|F)          yylval.string_ = strdup(yytext); return _SYMB_53;
<YYINITIAL>{DIGIT}+          yylval.int_ = atoi(yytext); return _INTEGER_;
\n ++yy_mylinenumber ;
<YYINITIAL>[ \t\r\n\f]           /* ignore white space. */;
<YYINITIAL>.         return _ERROR_;
%%
void initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }

The parser(Fortran.y.bkp) was worse ... I wish they had some way of converting these symbols to something more human readable:

%start Program
%%
Program : ListLblStm { $$ = make_Progr(reverseListLblStm($1)); YY_RESULT_Program_= $$; }
;
ListLblStm : /* empty */ { $$ = 0;  }
  | ListLblStm LblStm _SYMB_0 { $$ = make_ListLblStm($2, $1);  }
;
LblStm : Labeled_stm { $$ = make_SLabel($1); YY_RESULT_LblStm_= $$; }
  | Simple_stm { $$ = make_SSimple($1); YY_RESULT_LblStm_= $$; }
  | /* empty */ { $$ = make_SNill(); YY_RESULT_LblStm_= $$; }
;
Labeled_stm : _INTEGER_ Simple_stm { $$ = make_SLabelOne($1, $2);  }
;
Simple_stm : _SYMB_39 Type_Spec Type_Qual _SYMB_1 _SYMB_51 _SYMB_2 _SYMB_51 _SYMB_3 { $$ = make_SImplicit($2, $3, $5, $7);  }
  | _SYMB_43 ListNameValue { $$ = make_SParameter($2);  }
  | _SYMB_30 ListNameDim { $$ = make_SDiment($2);  }
  | Type_Spec Type_Qual ListNameDim { $$ = make_SDeclQual($1, $2, $3);  }
  | Type_Spec ListNameDim { $$ = make_SDecl($1, $2);  }
  | _SYMB_29 ListDataSeg { $$ = make_SData($2);  }
  | _SYMB_27 _SYMB_8 _SYMB_51 _SYMB_8 ListName { $$ = make_SCommon($3, $5);  }
  | _SYMB_50 _SYMB_1 ListAssignName _SYMB_3 { $$ = make_SWrtEmp($3);  }
  | _SYMB_50 _SYMB_1 ListAssignName _SYMB_3 ListNameOrArray { $$ = make_SWrite($3, $5);  }
  | _SYMB_35 _SYMB_1 ListFmtSpecs _SYMB_3 { $$ = make_SFormat($3);  }
  | _SYMB_44 _SYMB_1 ListAssignName _SYMB_3 ListNameOrArray { $$ = make_SRead($3, $5);  }
  | _SYMB_44 _SYMB_6 LExp { $$ = make_SAsignRead($3);  }
  | _SYMB_38 _SYMB_1 LExp _SYMB_3 IfThenPart { $$ = make_SIf($3, $5);  }
  | _SYMB_51 _SYMB_6 LExp { $$ = make_SAssign($1, $3);  }
  | _SYMB_51 _SYMB_1 ListLExp _SYMB_3 _SYMB_6 LExp { $$ = make_SAsnArr($1, $3, $6);  }
  | _SYMB_24 _SYMB_51 _SYMB_1 ListSpecLExp _SYMB_3 { $$ = make_SFunCall($2, $4);  }
  | _SYMB_24 _SYMB_51 { $$ = make_SFunCallNil($2);  }
  | _SYMB_37 _SYMB_49 _INTEGER_ { $$ = make_SGoto($3);  }
  | _SYMB_42 _SYMB_1 ListAssignName _SYMB_3 { $$ = make_SOpen($3);  }
  | _SYMB_26 _SYMB_1 ListAssignName _SYMB_3 { $$ = make_SClose($3);  }
  | _SYMB_31 _INTEGER_ DoRangePart { $$ = make_SDo($2, $3);  }
  | _SYMB_47 { $$ = make_SStop();  }
  | _SYMB_47 _SYMB_52 { $$ = make_SStopMsg($2);  }
  | _SYMB_33 { $$ = make_SEnd();  }
  | _SYMB_48 _SYMB_51 _SYMB_1 ListSpecLExp _SYMB_3 { $$ = make_SSubr($2, $4);  }
  | _SYMB_48 _SYMB_51 { $$ = make_SSubrNil($2);  }
  | _SYMB_36 _SYMB_51 _SYMB_1 ListSpecLExp _SYMB_3 { $$ = make_SFunct($2, $4);  }
  | _SYMB_36 _SYMB_51 { $$ = make_SFunctNil($2);  }
  | _SYMB_28 { $$ = make_SContinue();  }
  | _SYMB_46 { $$ = make_SReturn();  }
  | _SYMB_34 _SYMB_1 _SYMB_51 _SYMB_5 NameOrArrRef _SYMB_3 { $$ = make_SEquiv($3, $5);  }
;
Type_Qual : _SYMB_4 _INTEGER_ { $$ = make_QType($2);  }
;
ListNameValue : NameValue { $$ = make_ListNameValue($1, 0);  }
  | NameValue _SYMB_5 ListNameValue { $$ = make_ListNameValue($1, $3);  }
;
NameValue : _SYMB_51 _SYMB_6 _INTEGER_ { $$ = make_NVPair($1, $3);  }
;
ListNameDim : NameDim { $$ = make_ListNameDim($1, 0);  }
  | NameDim _SYMB_5 ListNameDim { $$ = make_ListNameDim($1, $3);  }
;
NameDim : _SYMB_51 _SYMB_1 ListDExp _SYMB_3 { $$ = make_PNameDim($1, $3);  }
  | _SYMB_51 { $$ = make_PNameDim2($1);  }
;
ListDExp : DExp { $$ = make_ListDExp($1, 0);  }
  | DExp _SYMB_5 ListDExp { $$ = make_ListDExp($1, $3);  }
;
DExp : DExp _SYMB_7 DExp1 { $$ = make_EDplus($1, $3);  }
  | DExp _SYMB_2 DExp1 { $$ = make_EDminus($1, $3);  }
  | DExp1 { $$ = $1;  }
;
DExp1 : DExp1 _SYMB_4 DExp2 { $$ = make_EDtimes($1, $3);  }
  | DExp1 _SYMB_8 DExp2 { $$ = make_EDdiv($1, $3);  }
  | DExp2 { $$ = $1;  }
;
DExp2 : _SYMB_1 DExp _SYMB_3 { $$ = $2;  }
  | _INTEGER_ { $$ = make_EDInt($1);  }
  | _SYMB_51 { $$ = make_EDName($1);  }
;
ListDataSeg : DataSeg { $$ = make_ListDataSeg($1, 0);  }
  | DataSeg _SYMB_5 ListDataSeg { $$ = make_ListDataSeg($1, $3);  }
;
DataSeg : ListVars _SYMB_8 ListDataVal _SYMB_8 { $$ = make_PDSeg($1, $3);  }
;
ListVars : Vars { $$ = make_ListVars($1, 0);  }
  | Vars _SYMB_5 ListVars { $$ = make_ListVars($1, $3);  }
;
Vars : _SYMB_51 { $$ = make_PVars($1);  }
;
ListDataVal : DataVal { $$ = make_ListDataVal($1, 0);  }
  | DataVal _SYMB_5 ListDataVal { $$ = make_ListDataVal($1, $3);  }
;
DataVal : _SYMB_7 DataValType { $$ = make_PDValPls($2);  }
  | _SYMB_2 DataValType { $$ = make_PDValNeg($2);  }
  | DataValType { $$ = make_PDValNil($1);  }
;
DataValType : _INTEGER_ { $$ = make_PDVInt($1);  }
  | _SYMB_53 { $$ = make_PDVFloat($1);  }
  | _SYMB_52 { $$ = make_PDVChar($1);  }
;
ListName : _SYMB_51 { $$ = make_ListName($1, 0);  }
  | _SYMB_51 _SYMB_5 ListName { $$ = make_ListName($1, $3);  }
;
ListFmtSpecs : FmtSpecs { $$ = make_ListFmtSpecs($1, 0);  }
  | FmtSpecs _SYMB_5 ListFmtSpecs { $$ = make_ListFmtSpecs($1, $3);  }
;
FmtSpecs : _SYMB_52 { $$ = make_FSString($1);  }
  | _SYMB_51 { $$ = make_FSName($1);  }
  | _SYMB_9 { $$ = make_FSINNL();  }
  | _SYMB_8 { $$ = make_FSSlash();  }
;
ListNameOrArray : NameOrArray { $$ = make_ListNameOrArray($1, 0);  }
  | NameOrArray _SYMB_5 ListNameOrArray { $$ = make_ListNameOrArray($1, $3);  }
;
NameOrArray : _SYMB_51 { $$ = make_PNALName($1);  }
  | _SYMB_1 _SYMB_51 _SYMB_1 ListName _SYMB_3 _SYMB_5 DoRangePart _SYMB_3 { $$ = make_PNALArry($2, $4, $7);  }
;
IfThenPart : _SYMB_37 _SYMB_49 _INTEGER_ { $$ = make_PIfGoto($3);  }
  | _SYMB_51 _SYMB_6 LExp { $$ = make_PIfAsgn($1, $3);  }
  | _SYMB_51 _SYMB_1 ListLExp _SYMB_3 _SYMB_6 LExp { $$ = make_PIFAsnArr($1, $3, $6);  }
  | _SYMB_46 { $$ = make_PIfRetn();  }
  | _SYMB_24 _SYMB_51 _SYMB_1 ListSpecLExp _SYMB_3 { $$ = make_PIfCall($2, $4);  }
  | _SYMB_24 _SYMB_51 { $$ = make_PIfCallNil($2);  }
;
LExp : LExp _SYMB_10 LExp2 { $$ = make_Elor($1, $3);  }
  | LExp _SYMB_11 LExp2 { $$ = make_Eland($1, $3);  }
  | LExp2 { $$ = $1;  }
;
LExp2 : LExp2 _SYMB_12 LExp3 { $$ = make_Eeq($1, $3);  }
  | LExp2 _SYMB_13 LExp3 { $$ = make_Eneq($1, $3);  }
  | LExp3 { $$ = $1;  }
;
LExp3 : LExp3 _SYMB_14 LExp4 { $$ = make_Elthen($1, $3);  }
  | LExp3 _SYMB_15 LExp4 { $$ = make_Egrthen($1, $3);  }
  | LExp3 _SYMB_16 LExp4 { $$ = make_Ele($1, $3);  }
  | LExp3 _SYMB_17 LExp4 { $$ = make_Ege($1, $3);  }
  | LExp4 { $$ = $1;  }
;
LExp4 : LExp4 _SYMB_7 LExp5 { $$ = make_Eplus($1, $3);  }
  | LExp4 _SYMB_2 LExp5 { $$ = make_Eminus($1, $3);  }
  | LExp5 { $$ = $1;  }
;
LExp5 : LExp5 _SYMB_4 LExp6 { $$ = make_Etimes($1, $3);  }
  | LExp5 _SYMB_8 LExp6 { $$ = make_Ediv($1, $3);  }
  | LExp6 { $$ = $1;  }
;
LExp6 : Unary_operator LExp8 { $$ = make_Epreop($1, $2);  }
  | LExp8 { $$ = $1;  }
;
LExp8 : LExp5 _SYMB_18 LExp8 { $$ = make_Epower($1, $3);  }
  | LExp8 _SYMB_1 _SYMB_3 { $$ = make_Efunk($1);  }
  | LExp8 _SYMB_1 ListSpecLExp _SYMB_3 { $$ = make_Efunkpar($1, $3);  }
  | LExp9 { $$ = $1;  }
;
LExp9 : TIntVar RangePart { $$ = make_Evar($1, $2);  }
  | _SYMB_52 { $$ = make_Estr($1);  }
  | LExp10 { $$ = $1;  }
;
RangePart : /* empty */ { $$ = make_ERangeNull();  }
  | _SYMB_19 TIntVar { $$ = make_ERange($2);  }
;
TIntVar : _INTEGER_ { $$ = make_ETInt($1);  }
  | _SYMB_20 { $$ = make_ETTrue();  }
  | _SYMB_21 { $$ = make_ETFalse();  }
  | _SYMB_51 { $$ = make_ETNameVar($1);  }
  | _SYMB_44 { $$ = make_ETRead();  }
;
ListLExp : LExp { $$ = make_ListLExp($1, 0);  }
  | LExp _SYMB_5 ListLExp { $$ = make_ListLExp($1, $3);  }
;
LExp10 : LExp11 { $$ = $1;  }
;
LExp11 : _SYMB_1 LExp _SYMB_3 { $$ = $2;  }
;
Unary_operator : _SYMB_7 { $$ = make_OUnaryPlus();  }
  | _SYMB_2 { $$ = make_OUnaryMinus();  }
  | _SYMB_22 { $$ = make_OUnaryNot();  }
;
ListSpecLExp : SpecLExp { $$ = make_ListSpecLExp($1, 0);  }
  | SpecLExp _SYMB_5 ListSpecLExp { $$ = make_ListSpecLExp($1, $3);  }
;
SpecLExp : LExp { $$ = make_SpLExpNot($1);  }
;
ListAssignName : AssignName { $$ = make_ListAssignName($1, 0);  }
  | AssignName _SYMB_5 ListAssignName { $$ = make_ListAssignName($1, $3);  }
;
AssignName : _SYMB_51 { $$ = make_PAsgnNm($1);  }
  | _INTEGER_ { $$ = make_PAsgnInt($1);  }
  | _SYMB_51 _SYMB_6 LExp { $$ = make_PAssign($1, $3);  }
;
DoRangePart : _SYMB_51 _SYMB_6 LExp _SYMB_5 LExp { $$ = make_PDoRange($1, $3, $5);  }
;
NameOrArrRef : _SYMB_51 { $$ = make_PNOAName($1);  }
  | _SYMB_51 _SYMB_1 ListLExp _SYMB_3 { $$ = make_PNOAArr($1, $3);  }
;
Type_Spec : _SYMB_40 { $$ = make_TInt();  }
  | _SYMB_45 { $$ = make_TFloat();  }
  | _SYMB_32 { $$ = make_TDouble();  }
  | _SYMB_25 { $$ = make_TChar();  }
  | _SYMB_23 { $$ = make_TByte();  }
  | _SYMB_41 { $$ = make_TLogi();  }
;

Fortunately, linux helps with that. I built a little script file that changes these cryptic symbols for something a little more tolerable:

# Clean up the symbols in both the parser and lexor
sed -f symbols Fortran.y &gt;Fortran.yy
cp Fortran.y Fortran.y.bkp
cp Fortran.yy Fortran.y

sed -f symbols Fortran.l &gt;Fortran.ll
cp Fortran.l Fortran.l.bkp
cp Fortran.ll Fortran.l

... and the associated 'symbols' file. NOTE the order of the 'symbols' commands. The _SYMB_1 was changed after the _SYMB_1? symbols otherwise the sed editor would have changed partial symbols:

s/_SYMB_10/T_OR/g
s/_SYMB_11/T_AND/g
s/_SYMB_12/T_EQ/g
s/_SYMB_13/T_NE/g
s/_SYMB_14/T_LT/g
s/_SYMB_15/T_GT/g
s/_SYMB_16/T_LE/g
s/_SYMB_17/T_GE/g
s/_SYMB_18/T_POW/g
s/_SYMB_19/T_COLON/g
s/_SYMB_20/T_TRUE/g
s/_SYMB_21/T_FALSE/g
s/_SYMB_22/T_NOT/g
s/_SYMB_23/T_BYTE/g
s/_SYMB_24/T_CALL/g
s/_SYMB_25/T_CHAR/g
s/_SYMB_26/T_CLOSE/g
s/_SYMB_27/T_COMM/g
s/_SYMB_28/T_CONT/g
s/_SYMB_29/T_DATA/g
s/_SYMB_30/T_DIMS/g
s/_SYMB_31/T_DO/g
s/_SYMB_32/T_DBL/g
s/_SYMB_33/T_END/g
s/_SYMB_34/T_EQU/g
s/_SYMB_35/T_FMT/g
s/_SYMB_36/T_FUNC/g
s/_SYMB_37/T_GO/g
s/_SYMB_38/T_IF/g
s/_SYMB_39/T_IMPL/g
s/_SYMB_40/T_INT/g
s/_SYMB_41/T_LOGI/g
s/_SYMB_42/T_OPEN/g
s/_SYMB_43/T_PARM/g
s/_SYMB_44/T_READ/g
s/_SYMB_45/T_REAL/g
s/_SYMB_46/T_RTN/g
s/_SYMB_47/T_STOP/g
s/_SYMB_48/T_SUBR/g
s/_SYMB_49/T_TO/g
s/_SYMB_50/T_WRITE/g
s/_SYMB_51/T_NAME/g
s/_SYMB_52/T_SQSTR/g
s/_SYMB_53/T_CFLT/g

s/_SYMB_0/T_NEWLINE/g
s/_SYMB_1/T_LPAREN/g
s/_SYMB_2/T_MINUS/g
s/_SYMB_3/T_RPAREN/g
s/_SYMB_4/T_MULT/g
s/_SYMB_5/T_COMMA/g
s/_SYMB_6/T_EQUALS/g
s/_SYMB_7/T_PLUS/g
s/_SYMB_8/T_DIV/g
s/_SYMB_9/T_DOLLAR/g

Makefile:

The Makefile was also a problem (Makefile.old). It is auto generated by BNFC which is great but it had no way to turn on parser debugging which is rather important when creating a frontend from scratch. Bison will often complain about shift/reduce and reduce/reduce errors and you need Bison's output file to debug these. So my next little bit of bash script dealt with that ... it just rewrites the Makefile with the appropriate flags set.

if [ "$flag" == "-d" ]; then
    # ----
    # Modify the Makefile to add debug flags so output is more verbose.
    cp Makefile Makefile.old
    cat Makefile \
        | sed "s/-PFortran$/-PFortran --debug/g" \
        | sed "s/-pFortran$/-pFortran --debug -r all -g/g" \
        &gt; Makefile.new
    cp Makefile.new Makefile

    # Show user the difference in the Makefiles
    echo "--- Makefile ---"
    diff Makefile Makefile.old | sed "s/^/    /g"
fi

This then produces the Parser.output file which we use to debug the BNFC input grammar. I'll explain the process of debugging the grammar when there are reduce/reduce and shift/reduce errors in another post:

Terminals unused in grammar

   _ERROR_


State 177 conflicts: 3 reduce/reduce
State 225 conflicts: 3 reduce/reduce
State 226 conflicts: 3 reduce/reduce
State 227 conflicts: 1 shift/reduce, 3 reduce/reduce


Grammar

    0 $accept: Program $end

    1 Program: ListLblStm

    2 ListLblStm: %empty
    3           | ListLblStm LblStm _SYMB_0

    4 LblStm: Labeled_stm
    5       | Simple_stm
    6       | %empty

    7 Labeled_stm: _INTEGER_ Simple_stm

    8 Simple_stm: _SYMB_39 Type_Spec Type_Qual _SYMB_1 _SYMB_51 _SYMB_2 _SYMB_51 _SYMB_3
    9           | _SYMB_43 ListNameValue
   10           | _SYMB_30 ListNameDim
   11           | Type_Spec Type_Qual ListNameDim
   12           | Type_Spec ListNameDim
   13           | _SYMB_29 ListDataSeg
   14           | _SYMB_27 _SYMB_8 _SYMB_51 _SYMB_8 ListName
   15           | _SYMB_50 _SYMB_1 ListAssignName _SYMB_3
   16           | _SYMB_50 _SYMB_1 ListAssignName _SYMB_3 ListNameOrArray
   17           | _SYMB_35 _SYMB_1 ListFmtSpecs _SYMB_3
   18           | _SYMB_44 _SYMB_1 ListAssignName _SYMB_3 ListNameOrArray
   19           | _SYMB_44 _SYMB_6 LExp
...

Generated Lexer fixups:

The generated lexer had a few changes that were needed because I was trying to make it conform to what I wanted to do instead of letting it do it's thing:

<YYINITIAL>"
"        return T_NEWLINE;
<YYINITIAL>"("           return T_LPAREN;
<YYINITIAL>"-"           return T_MINUS;
<YYINITIAL>")"           return T_RPAREN;
...
<YYINITIAL>"TO"          return T_TO;
<YYINITIAL>"WRITE"           return T_WRITE;

<YYINITIAL>"//"[^\n]*\n     ++yy_mylinenumber;   /* BNFC single-line comment */;
<YYINITIAL>\%*{CAPITAL}({CAPITAL}|{DIGIT}|\$|\_)*        yylval.string_ = strdup(yytext); return T_NAME;
<YYINITIAL>'.+'          yylval.string_ = strdup(yytext); return T_SQSTR;
<YYINITIAL>{DIGIT}+\.{DIGIT}+((e|E)\-?{DIGIT}+)?(f|F)|{DIGIT}+(e|E)\-?{DIGIT}+(f|F)          yylval.string_ = strdup(yytext); return T_CFLT;
<YYINITIAL>{DIGIT}+          yylval.int_ = atoi(yytext); return _INTEGER_;
\n ++yy_mylinenumber ;
<YYINITIAL>[ \t\r\n\f]           /* ignore white space. */;
<YYINITIAL>.         return _ERROR_;
%%
void initialize_lexer(FILE *inp) { yyrestart(inp); BEGIN YYINITIAL; }

First BNFC doesn't know what to do with a newline character as a token. So you see in the first two lines a broken Flex statement. These two lines were replaced with a line derived from the file 'l1':

"\n" { ++yy_mylinenumber; return T_NEWLINE; };

which treats the newline better. The other change is to get rid of the line that counts the line numbers (the ++yy_mylinenumber; line) and remove newlines and form-feeds from the "ignore white space" line. The following bash script segment does this:

# Do the other modifications to Fortran.l to fix the above problem.
cp Fortran.l Fortran.l.old
cat Fortran.l \
    | grep -v "^<YYINITIAL>"$" \
   | grep -v "
^..\ ++yy_mylinenumber.;$" \
   | sed "
s/^"[ \t]*return T_NEWLINE;$/${l1}/g" \
    | sed "s/\\\n\\\f]/]+/g" \
    > Fortran.l.new
cp Fortran.l.new Fortran.l
touch Fortran.y

# Show changes to user.
echo "--- Fortran.l ---"
diff Fortran.l Fortran.l.old | sed "s/^/    /g"

This produces the following lexer:

"\n"       { ++yy_mylinenumber; return T_NEWLINE; };
<YYINITIAL>"("           return T_LPAREN;
<YYINITIAL>"-"           return T_MINUS;
<YYINITIAL>")"           return T_RPAREN;
...
<YYINITIAL>"TO"          return T_TO;
<YYINITIAL>"WRITE"           return T_WRITE;

<YYINITIAL>"//"[^\n]*\n     ++yy_mylinenumber;   /* BNFC single-line comment */;
<YYINITIAL>\%*{CAPITAL}({CAPITAL}|{DIGIT}|\$|\_)*        yylval.string_ = strdup(yytext); return T_NAME;
<YYINITIAL>'.+'          yylval.string_ = strdup(yytext); return T_SQSTR;
<YYINITIAL>{DIGIT}+\.{DIGIT}+((e|E)\-?{DIGIT}+)?(f|F)|{DIGIT}+(e|E)\-?{DIGIT}+(f|F)          yylval.string_ = strdup(yytext); return T_CFLT;
<YYINITIAL>{DIGIT}+          yylval.int_ = atoi(yytext); return _INTEGER_;
<YYINITIAL>[ \t\r]+          /* ignore white space. */;
<YYINITIAL>.         return _ERROR_;
%%

Code Generation:

The generation of all the Flex/Bison and the pretty printer code is the saving grace. BNFC gets you most of the way to producing a front-end for your compiler and makes life a lot easier if, like me, you tend to forget how to write a Flex and Bison driver file in between doing other coding.

However, if you are new to building a compiler then BNFC may just make things worse by adding another level of complexity to an already complex problem. For this reason it might be best for novice compiler writers to ignore BNFC and read a good book on Flex/Bison and maybe a text book on compiler writing.

In my next post I will describe the code that is generated by BNFC.

Here are all the files (complete) that I've talked about above.

Fortran Translator Progress

After typing up my intro page for my idea of a Fortran2C translator and reading some of the documentation I mentioned I was chomping at the bit to try it out to see how far I could get. Well I've had some progress already!

Creating the front-end actually worked out easier than I was expecting. BNFC isn't perfect but, once you get used to it's quirks and adjust for it's shortcomings, it is a real boon to creating a language front-end. Mind you it still requires some fore-knowledge of the workings and use of Flex/Bison (or lex/yacc). What follows is my progress in the creation of a Fortran to C translator. I'll update this list as I get parts of the translator working:

Fortran 2 C Translator

I've had some success with a Basic 2 C translator in the past. In that project I used hand coded Flex and Bison but came at the project from a rather naive perspective. While the project worked and I produced working code from old Basic programs it bogged down when I tried to convert my parse tree into structured code and do some optimizations on it.

Recently I tripped across the BNFC (BNF Converter) which translates a BNF grammer to a lexical analyzer and parser with code to generate a parse tree. Thus I can now scrap my hobbled together lexor/parser and convert it to a much more uniform version. This should be much easier to keep up-to-date and should facilitate improvements such as structuring.

I need to read their book Implementing Programming Languages first. Well, actually, I just need to understand LBNF (Labelled BNF grammar) and its use in BNFC. On the BNFC website they give an example LBNF grammar for C (which I know quite well -- at least compared to Haskell). Hopefully that will detail how to deal with Fortran's position dependent code. This is going to take a while so stay tuned ...

Here's a snippet of code that I want to translate. This is the GALAXY.FOR file from the DECUS Galaxy project. This game is a multi-player game (somewhat of a rarity at the time) which was played on a mainframe computer (Dec-10) .

This file is actually in Ratfor. Ratfor is pre-processor for Fortran66 (and here) which adds (badly needed) structure to the Fortran source code:

C ASCII "0"
      PROGRAM PLAYER
C
C     MAY 1980  BILL CAEL AND BILL WOOD RECODED IN RATFOR
C     MAY 1980  BILL CAEL AND BILL WOOD ADDED DEFAULT SHIP AND DIRECTION
C     MAY 1980          BILL WOOD       ADDED ENERGY NETS
C     MAY 1980  BILL CAEL AND BILL WOOD RECODED OUTPUT
C     MAY 1980  BILL CAEL, BILL WOOD, AND BOB STODOLA
C                                       RECODED COMMAND ARG PROMPTING
C     NOV 1980  BILL WOOD               CONVERTED TO RUN ON VAX
C
C       July/August 1981        Stuart Renes, Western Electric Co.
C       Mixed Case messages, added sound effects
C       Improved Driver controls
C       ^C Intercept AST, Exit handler
C       Play periods (including weekend control)
C       VT100 advanced video features - renamed GALAXY V1.0
C       Lowered Refresh rates in half
C
C       Version 1.1     September 8th, 1981
C       Added purge of type-ahead when destroyed or at games end.
C
C       Version 1.2     September 10th, 1981
C       Added silent play option.
C
C       Version 1.3     September 17th, 1981
C       Added Shared Command Display and control.
C       More code cleanup.
C
C       Version 1.4     September 23rd, 1981
C       Tighten window in which users can beat the "SHARED" feature.
C
C       Version 1.5     September 27th, 1981
C       Fix bugs that creeped in from V1.4.
C
C       Version 1.6     October 2nd, 1981
C       Changed Initialization to force active players off.
C       Slight esthetic change at close of game period.
C       Describe previously un-documented features in HELP file.
C
     TEXT COMMON
      COMMON/TORPE/TLOCS(8, 10, 2), TDIR(8, 10), IT(8)
      COMMON/LEDFOR/ENERGY(8), SHIELD(8), XCORD(8), YCORD(8), TORPS(8),
     *HX, HY, HW, CDRAIN, NDRAIN, TDRAIN, SCAN(8), PHA(8), I1, I2, HYPER
     *(8), ISENT(8, 10), XPOD(8), YPOD(8), DPOD(8), IPOD(8), WPOD(8), XH
     *OM(8, 4), YHOM(8, 4), WHOM(8, 4), NHOM(8), TRBEAM(8), SCORE(8), CR
     *EW(8), DIR(8), WARP(8), LAUNCH(8), UNIV(100, 100), MESSAG(480), TH
     *RU, XSHIP(8), CLOAK(8), NET(8), BHOLE, EMPTY
      REAL LAUNCH, NDRAIN
      INTEGER SCAN, WHOM, CREW, HYPER, TORPS, TRBEAM
      LOGICAL*1THRU, XSHIP, CLOAK, NET
C NOTE: BHOLE AND EMPTY ARE THE CHARACTERS FOR THE BLACK HOLE
C       AND EMPTY SPACE AND ARE SET IN MTREKINI.
      BYTE UNIV, MESSAG, BHOLE, EMPTY
C     END COMMON
        common/local/shrflg(8)
        common/double/share(8),busy(8)
        logical*1 busy
        integer share,shrflg,day
        character*9 today
        character*80 header
        character*4 V
        data V /'V1.6'/
        common/signal/playtm
        integer timcnt
        data timcnt/4/
        logical*1 playtm,wizard,myplay,shpflg,yesshr
      LOGICAL*1OK, DONE, YES, WARN, REFRES, REFTOG
      LOGICAL QUIKUP,supflg
        common done
        common/silent/belflg
        logical*1 belflg
        character*19 paswrd,answer
        character*8 timbf
        byte pasbuf(19)
        data paswrd/'starship enterprise'/
      REAL SC(9), R(9)
      INTEGER DEFSHP, OLDSHP
      REAL DEFDIR, OLDDIR
      COMMON/DEFLTS/DEFSHP, DEFDIR, OLDDIR, OLDSHP, DEFSHD
      LOGICAL CLEARF, VERBOS
      COMMON/MESS/CLEARF, VERBOS
      BYTE BLANK(80), ALPHA, MESBUF(60)
      REAL D1(4)
      BYTE BLUNK(2)
      BYTE BLUNK2(2)
      BYTE OBUFF( - 9: + 9,  - 9: + 9)
      BYTE JUNK, NBUFF
      COMMON/BNDRY/IXX, IYY, ID, MINID8, JUNK, NBUFF( - 9: + 9,  - 9: +
     *9)
      INTEGER COMMND
        common/me/who
      INTEGER WHO
      EQUIVALENCE(BLUNK(2), NBUFF( - 9,  - 9))
      EQUIVALENCE(BLUNK2(2), OBUFF( - 9,  - 9))
      DATA BLANK/80*' '/
      DATA BLUNK, BLUNK2/4*' '/
      DATA SC/9* - 9999./
      DATA R/9* - 9999./
      DATA DONE/.FALSE./
      DATA OK/.FALSE./
        wizard=.false.
      DATA DEFDIR/0.0/, DEFSHD/0.0/
C
C  QUIKUP IS SET TRUE IF TERMINAL SPEED EXCEEDS A THRESHOLD
C  DETERMINED IN GTCHAR.
C  IF QUIKUP IS TRUE, A FULL SCREEN UPDATE OCCURS EVERY 1/2 SECOND;
C  AT SLOWER SPEEDS, 1/2 THE SCREEN IS UPDATED EVERY SECOND ON THE
C  HALF SECOND, THE OTHER 1/2 IS UPDATED EVERY SECOND ON THE SECOND.
C  THIS ALLOWS ENJOYABLE GAMES ON TERMINALS AS SLOW AS 1200 BAUD.
C
C GET QUIKUP, INITIALIZE TERMINAL IO
C
C If supflg is .FALSE. then terminal type is NOT supported and we will
C exit and give him an explaination.
C If MYPLAY is .FALSE. then it must be OFFHOURS and we won't let him in!
C
      CALL GTCHAR(QUIKUP,supflg)
        If (supflg) goto 1000
        write(5,1999)
1999    format(' You obviously didn''t read the instructions
        1 about terminal setup.'
)
        call exit
1000    call daytim(myplay)
        call video('<')
        If (myplay) goto 1998
        call clear
        call outch(0,0)
        write(5,1995)
1995    format(' I''m sorry but the Galaxy is CLOSED now.',//,
        1' Entrance is allowed only to those who can',/,
        1'$demonstrate their worthiness. ')
        call inchar(pasbuf,19,.false.,3,nc,ierr)
        call clear
        call outch(0,0)
        if (ierr .ge. 0) goto 1993
        If (ierr .eq. -2) then
        call video('5')
        write(5,1992)
1992    format(' I''m sorry but you are clearly unworthy!',/
        1,' Come back during regular hours of play.')
        call video('0')
        call exit
        endif
1993    encode(19,9934,answer)pasbuf
9934    format(19a1)
        If (answer .ne. paswrd) goto 1996
        wizard=.true.
        goto 1998
1996    call video('5')
        write(5,1992)
        call video('0')
        call exit
1998    call clear
        call outch(0,0)
        call weekday(day)
        call date(today)
        If (day .eq. 0)header='Welcome to GALAXY '//V//' on Wednesday, '//today
        If (day .eq. 1) header='Welcome to GALAXY '//V//' on Thursday, '//today
        If (day .eq. 2) header='Welcome to GALAXY '//V//' on Friday, '//today
        If (day .eq. 3) header='Welcome to GALAXY '//V//' on Saturday, '//today
        If (day .eq. 4) header='Welcome to GALAXY '//V//' on Sunday, '//today
        If (day .eq. 5) header='Welcome to GALAXY '//V//' on Monday, '//today
        If (day .eq. 6) header='Welcome to GALAXY '//V//' on Tuesday, '//today
        If (day .gt. 6) header='Welcome to GALAXY '//V//', '//today
        ih=index(header,'-')
      WRITE(5, 1001)header(1:ih+6)
1001    format(' ',a)
2000  CONTINUE
        WRITE(5, 1011)
1011    FORMAT('0The following Vessels are available:'/)
        shpflg=.false.
        DO 2030 I = 1, 8
          IF (.NOT.(.NOT.XSHIP(I))) GOTO 2050
        If (busy(i)) goto 2050
            WRITE(5, 1021) I
1021        FORMAT(' Ship ', I1)
        shpflg=.true.
2050      CONTINUE
2030    CONTINUE
2040    CONTINUE
        if (shpflg) goto 1030
        write(5,1032)
1032    format(' I''m sorry but all Ships have launched...',/,
        1' It may be possible to share a Command...')
1030        WRITE(5, 1031)
1031    FORMAT(/'$Pick a Ship from our starfleet: ')
        CALL GETINT(0, IW, OK, 1, 8, 0)
        IF (.NOT.(OK)) GOTO 2070
          IF (.NOT.(IW .EQ. 0)) GOTO 2090
            OK = .FALSE.
            GOTO 2010
2090      CONTINUE
          WHO = IW
        If (.not.(busy(who))) goto 1039
        Write(5,9699)who
9699    format('0Sorry, ship ',i1,' is temporarily disabled!')
        goto 1042
1039          IF (.NOT.(XSHIP(WHO))) GOTO 2110
        If (share(who).eq.-1.or.share(who).eq.1) then
9918            WRITE(5, 1041)who
1041        FORMAT('0Ship ',i1' already has a full crew. Sorry!')
1042    call wait(60,1,m)
        goto 1998
        endif
            WRITE(5, 1051)who
1051        FORMAT('$Ship ',i1,' already has a Commander.',
        1' Do you wish to share this Command? ')
            CALL YESNO(0, OK)
        If ((ok) .and. (share(who) .eq. 1 .or. share(who) .eq. -1)) goto 9918
        If (ok) share(who)=1
2110      CONTINUE
          GOTO 2080
2070     CONTINUE
        call clear
        call outch(0,0)
        write(5,4554)
4554    format(' Those who cannot follow instructions
        1 cannot become Starfleet Commanders!'
)
          CALL EXIT
2080    CONTINUE
2010    IF (.NOT.(OK)) GOTO 1998
2020  CONTINUE
        If (.not.(xship(who))) then
        xship(who)=.true.
                If (share(who) .eq. 0) then
                header='Allow Sharing? '
                call respnd(header,yesshr)
                If (share(who) .eq. 0 .and. (.not.(yesshr))) share(who)=-1
                If (share(who) .eq. 1 .and. (.not.(yesshr))) then
                write(5,901)
901     format(' Oops, someone else has slipped into the cockpit!')
                endif
                else
                yesshr=.true.
                endif
        endif
        header='Silent Game? '
        call respnd(header,belflg)
        call video('h')
      REFRES = .TRUE.
      REFTOG = .FALSE.
      CREW(WHO) = CREW(WHO) + 1
      DEFSHP = WHO
      CALL STRMOV(BLUNK2, 1, 361, OBUFF, 1)
      CALL RBUFF
      IF (.NOT.(.NOT.QUIKUP)) GOTO 2130
C START 1 SECOND TIMER IF SLOW UPDATE
        CALL MARK(3, 30, 1, IDS)
2130  CONTINUE
C START 1/2 SECOND TIMER
      CALL MARK(2, 15, 1, IDS)
2150  CONTINUE
C
C       THE FOLLOWING CALL SPAWNS THE MTREKD UNIVERSE MANAGER TASK ON TT0:.
C       IF YOU CANNOT DO SOMETHING SIMILAR, YOU MUST START MTREKD YOURSELF
C       BEFORE PLAYING EACH GAME.
C
        IF (.NOT.(THRU)) GOTO 2180
C THEN MTREKD ISN'T RUNNING
c          CALL RUNMTR
C
C     PLACE LOCAL SCAN ON TERMINAL
C
2180    CONTINUE
        timcnt=timcnt+1
        if (timcnt .lt. 4) goto 9078
        timcnt=0
        call time(timbf)
        call tpos(12,70)
        encode(8,9009,mesbuf)timbf
9009    format (a8)
        call outch(mesbuf,8)
9078    call video('7')
        REFTOG = (.NOT.REFTOG) .OR. QUIKUP
        IF (.NOT.((XSHIP(WHO) .AND. REFTOG) .OR. REFRES)) GOTO 5555
C DON'T REFRESH IF BLOWN UP!
          CALL STRMOV(BLUNK, 1, 361, NBUFF, 1)
          ID = SCAN(WHO)
          MINID8 = MIN(ID, 8)
          IXX = XCORD(WHO)
          IYY = YCORD(WHO)
          IXLOW = MAX(2, IXX - ID)
          IXHI = MIN(99, IXX + ID)
          IYLOW = MAX(2, IYY - ID)
          IYHI = MIN(99, IYY + MINID8)
          DO 2220 IX1 = IXLOW, IXHI
            IX = IX1 - IXX
            DO 2240 IY1 = IYLOW, IYHI
              IY = IY1 - IYY
              ALPHA = UNIV(IX1, IY1)
              IF (.NOT.((ALPHA .GE. 48+1) .AND. (ALPHA .LE. 48+8) .AND.
     *(CLOAK(ALPHA-48)))) GOTO 2260
                NBUFF(IX, IY) = EMPTY
                GOTO 2270
2260           CONTINUE
                IF (.NOT.(ALPHA .LT. 0)) GOTO 2280
                  NBUFF(IX, IY) = '%'
                  GOTO 2290
2280             CONTINUE
                  NBUFF(IX, IY) = ALPHA
2290            CONTINUE
2270          CONTINUE
2240        CONTINUE
2250        CONTINUE
2220      CONTINUE
2230      CONTINUE
          IF (.NOT.(IXX-ID .LE. 1)) GOTO 2300
            CALL BNDRY(1, 1, MAX(1, IYY - ID), MIN(100, IYY + MINID8))
            GOTO 2310
2300       CONTINUE
            IF (.NOT.(IXX+ID .GE. 100)) GOTO 2320
              CALL BNDRY(100, 100, MAX(1, IYY - ID), MIN(100, IYY + MINI
     *D8))
2320        CONTINUE
2310      CONTINUE
          IF (.NOT.(IYY-ID .LE. 1)) GOTO 2340
            CALL BNDRY(MAX(1, IXX - ID), MIN(99, IXX + ID), 1, 1)
            GOTO 2350
2340       CONTINUE
            IF (.NOT.(IYY+ID .GE. 100)) GOTO 2360
              CALL BNDRY(MAX(1, IXX - ID), MIN(99, IXX + ID), 100, 100)
2360        CONTINUE
2350      CONTINUE
          IF (.NOT.(NBUFF(-ID, -ID) .EQ. EMPTY)) GOTO 2380
            NBUFF( - ID,  - ID) = '.'
2380      CONTINUE
          IF (.NOT.(NBUFF(-ID, MINID8) .EQ. EMPTY)) GOTO 2400
            NBUFF( - ID, MINID8) = '.'
2400      CONTINUE
          IF (.NOT.(NBUFF(ID, MINID8) .EQ. EMPTY)) GOTO 2420
            NBUFF(ID, MINID8) = '.'
2420      CONTINUE
          IF (.NOT.(NBUFF(ID, -ID) .EQ. EMPTY)) GOTO 2440
            NBUFF(ID,  - ID) = '.'
2440      CONTINUE
          DO 2460 IY =  - 9,  + 8
            ICURSX =  - 999
            DO 2480 IX =  - 9,  + 9
              IF (.NOT.(NBUFF(IX, IY) .NE. OBUFF(IX, IY))) GOTO 2500
                IF (.NOT.(ICURSX .GE. IX-2)) GOTO 2520
                  DO 2540 III = ICURSX + 1, IX
                    CALL OUTCH(' ', 1)
                    CALL OUTCH(NBUFF(III, IY), 1)
2540              CONTINUE
2550              CONTINUE
                  GOTO 2530
2520             CONTINUE
                  CALL TPOS(9 - IY, 2*IX + 43)
                  CALL OUTCH(NBUFF(IX, IY), 1)
2530            CONTINUE
                ICURSX = IX
                OBUFF(IX, IY) = NBUFF(IX, IY)
2500          CONTINUE
2480        CONTINUE
2490        CONTINUE
2460      CONTINUE
2470      CONTINUE
          CALL OUTCH(0,  - 1)
5555    call video('0')
C
C     THE FOLLOWING WAIT CONTROLS THE UPDATE RATE, WHICH IS NORMALLY
C     SET TO A HALF SECOND (30 CLOCK TICKS).
C
2200    CONTINUE
        IF (.NOT.(COMMND .NE. '  ' .AND. XSHIP(WHO))) GOTO 2560
          IF (.NOT.(REFTOG)) GOTO 2580
            CALL WAITFR(2, IDS)
            IF (.NOT.(QUIKUP)) GOTO 2600
C RESTART 1/2 SECOND TIMER
              CALL MARK(2, 15, 1, IDS)
2600        CONTINUE
            GOTO 2590
2580       CONTINUE
            CALL WAITFR(3, IDS)
C RESTART 1 SECOND TIMER
            CALL MARK(3, 30, 1, IDS)
C RESTART 1/2 SECOND TIMER
            CALL MARK(2, 15, 1, IDS)
2590      CONTINUE
2560    CONTINUE
        Do 7666 ibb=1,8
        If (share(ibb) .ne. -1) then
                If (shrflg(ibb) .ne. share(ibb)) then
        if (share(ibb) .eq. 1) then
        if (ibb .eq. who) then
        call video('1')
        else
        call video('7')
        endif
                call buffil((ibb+1),63,'Shr''d',5)
        If (ibb .eq. who) then
        call video('0')
        endif
        endif
        if (share(ibb) .eq. 0) then
                call buffil((ibb+1),63,'     ',5)
        endif
        shrflg(ibb)=share(ibb)
        endif
        endif
7666    continue
        If (playtm) goto 2998
        If (wizard) goto 2998
        crew(who)=crew(who)-1
        if (.not.(crew(who) .le. 400)) goto 9979
        xship(who)=.false.
        goto 9969
9979    xship(who)=.true.
9969    call outch(0,0)
        call purge
        call beeper(7)
        call tpos(23,20)
        call video('1')
        call outstr(.false.,'The GALAXY is now closed.  Thanks for playing
        1!'
,.false.)
        call video('0')
        call outch(0,0)
        If (share(who) .eq. 1) share(who)=0
        If (.not.(yesshr)) share(who)=0
        call exit
2998        NC = 1
        COMMND = '0 '
        IF (.NOT.(XSHIP(WHO))) GOTO 2620
          CALL TREAD(COMMND, NC)
C
C     CHECK FOR NO INPUT
C
2620    CONTINUE
        IF (.NOT.(COMMND .EQ. '0 ')) GOTO 2640
C
C     LONG RANGE SCAN COMMAND
C
          GOTO 2650
2640     CONTINUE
          IF (.NOT.(COMMND .EQ. 'L ')) GOTO 2660
            CALL GETINT('Target Ship? ', II, OK, 1, 8, DEFSHP)
            IF (.NOT.(OK)) GOTO 2680
              DEFSHP = II
              IX = XCORD(II)/10.
              IY = YCORD(II)/10.
              CALL TPOS(18, 75)
              ENCODE(5, 1071, MESBUF) IX, IY
1071          FORMAT(I2, ',', I2)
              CALL OUTCH(MESBUF, 5)
              XX = XCORD(WHO)
              YY = YCORD(WHO)
              X1 = XCORD(II)
              IF (.NOT.(X1 .LT. 51.)) GOTO 2700
                X2 = X1 + 100.
                GOTO 2710
2700           CONTINUE
                X2 = X1 - 100.
2710          CONTINUE
              Y1 = YCORD(II)
              IF (.NOT.(Y1 .LT. 51.)) GOTO 2720
                Y2 = Y1 + 100.
                GOTO 2730
2720           CONTINUE
                Y2 = Y1 - 100.
2730          CONTINUE
              D1(1) = ((XX - X1)**2 + (YY - Y1)**2)**.5
              D1(2) = ((XX - X1)**2 + (YY - Y2)**2)**.5
              D1(3) = ((XX - X2)**2 + (YY - Y1)**2)**.5
              D1(4) = ((XX - X2)**2 + (YY - Y2)**2)**.5
              IIT = 1
              DO 2740 J = 2, 4
                IF (.NOT.(D1(J) .LT. D1(IIT))) GOTO 2760
                  IIT = J
2760            CONTINUE
2740          CONTINUE
2750          CONTINUE
              D = D1(IIT)
              IF (.NOT.(IIT .EQ. 1)) GOTO 2780
                YD = Y1
                XD = X1
                GOTO 2790
2780           CONTINUE
                IF (.NOT.(IIT .EQ. 2)) GOTO 2800
                  YD = Y2
                  XD = X1
                  GOTO 2810
2800             CONTINUE
                  IF (.NOT.(IIT .EQ. 3)) GOTO 2820
                    YD = Y1
                    XD = X2
                    GOTO 2830
2820               CONTINUE
                    YD = Y2
                    XD = X2
2830              CONTINUE
2810            CONTINUE
2790          CONTINUE
              EDIS = D
              EDIR = ATAN3((YD - YY), (XD - XX))*57.29577951
              IF (.NOT.(EDIR .LT. 0.)) GOTO 2840
                EDIR = EDIR + 360.
2840          CONTINUE
              IF (.NOT.(EDIR .GT. 90.)) GOTO 2860
                EDIR = (450. - EDIR)/30.
                GOTO 2870
2860           CONTINUE
                EDIR = (90. - EDIR)/30.
2870          CONTINUE
              DEFDIR = EDIR
              CALL TPOS(17, 75)
              ENCODE(5, 1081, MESBUF) EDIS
1081          FORMAT(F5.2)
              CALL OUTCH(MESBUF, 5)
              CALL TPOS(16, 75)
              ENCODE(5, 1091, MESBUF) EDIR
1091          FORMAT(F5.2)
              CALL OUTCH(MESBUF, 5)
              CALL TPOS(15, 79)
              ENCODE(1, 1101, MESBUF) DEFSHP
1101          FORMAT(I1)
              CALL OUTCH(MESBUF, 1)
              CALL OUTCH(0,  - 1)
2680        CONTINUE