The Code Generator

Assembler language programs produced by the code generator are represented by ground terms having the following syntax:

t.Code =
     (Seq { t.Code } ) |
     (Instr s.Instr s.Operand) |
     (Label s.Label) |
     (Block s.Value).
s.Operand = s.Label | s.Value.
s.Label = s.Box.
s.Value = s.Int.
$
s.Instr =
     ADD  | SUB  | DIV  | MUL  | LOAD  | STORE |
     ADDC | SUBC | DIVC | MULC | LOADC |
     JUMPEQ | JUMPNE | JUMPLT | JUMPGT | JUMPLE | JUMPGE
     JUMP | READ | WRITE | HALT |

Assembler language programs may contain labels to be replaced with absolute addresses by the assembler. Assembling a program proceeds in two steps. First, the assembler determines the addresses associated with instructions and variables, and puts each address associated with a label into the box referred to by the label. Second, all labels are replaced with the addresses associated with them, i.e. each reference to a box is replaced with the contents of the box.

The module CmpGen has the following implementation: // // File: CmpGen.rf // $use StdIO Class Arithm Box; $use CmpDic; $func EncProgram t.Program s.Dic = t.Code; $func EncSt t.St s.Dic = t.Code; $func EncTest t.Test s.Label s.Dic = t.TestC; $func UnlessOp s.Op = s.JumpIf; $func EncExpr t.Expr s.Dic = t.ExprC; $func EncSubExpr t.Expr sN s.Dic = t.ExprC; $func LiteralOp s.Op = s.OpCode; $func MemoryOp s.Op = s.OpCode; $func Assemble t.Code s.StartAddr = s.FreeAddr; $func AssembleSeq e.CodeSeq s.Addr = s.FreeAddr; $func Dereference t.Code = t.Target; $func DereferenceSeq e.CodeSeq = e.CodeSeqD; $func WriteCodeSeq e.CodeSeq = ; // Generates an assembler language program // from an abstract program. GenCode t.Program = // Creating an empty dictionary. <MakeDic> :: s.Dic, // Generating the abstract program. <EncProgram t.Program s.Dic> :: t.Code, // Allocating memory for the program's instructions. <Assemble t.Code 1> :: s.FreeAddr, // Allocating memory for the program's variables. <AllocateDic s.Dic s.FreeAddr> :: s.EndAddr, // Replacing the labels with their addresses. <Dereference t.Code> :: t.CodeD, // Generating the directive BLOCK. <Sub s.EndAddr s.FreeAddr> :: s.BlockLength, (Seq t.CodeD (Block s.BlockLength)) :: t.Target, = t.Target; // Encodes a program. EncProgram (Program t.St) s.Dic = <EncSt t.St s.Dic> :: t.StC, <Box> :: s.L, = (Seq t.StC (Instr HALT 0) (Label s.L)); // Encodes a statement. EncSt (s.KeyWord e.Info) s.Dic = (s.KeyWord e.Info) : { (Assign sX t.Expr) = <LookupDic sX s.Dic> :: s.Addr, <EncExpr t.Expr s.Dic> :: t.ExprC, = (Seq t.ExprC (Instr STORE s.Addr)); (If t.Test t.Then t.Else) = <Box> :: s.L1, <Box> :: s.L2, <EncTest t.Test s.L1 s.Dic> :: t.TestC, <EncSt t.Then s.Dic> :: t.ThenC, <EncSt t.Else s.Dic> :: t.ElseC, = (Seq t.TestC t.ThenC (Instr JUMP s.L2) (Label s.L1) t.ElseC (Label s.L2) ); (While t.Test t.Do) = <Box> :: s.L1, <Box> :: s.L2, <EncTest t.Test s.L2 s.Dic> :: t.TestC, <EncSt t.Do s.Dic> :: t.DoC, = (Seq (Label s.L1) t.TestC t.DoC (Instr JUMP s.L1) (Label s.L2) ); (Read s.X) = <LookupDic s.X s.Dic> :: s.Addr, = (Instr READ s.Addr); (Write t.Expr) = <EncExpr t.Expr s.Dic> :: t.ExprC, = (Seq t.ExprC (Instr WRITE 0)); (Seq t.St1 t.St2) = <EncSt t.St1 s.Dic> :: t.StC1, <EncSt t.St2 s.Dic> :: t.StC2, = (Seq t.StC1 t.StC2); (Skip) = = (Seq ); }; // Encodes a test. EncTest (Test s.Op t.Arg1 t.Arg2) s.Label s.Dic = <EncExpr (Op Sub t.Arg1 t.Arg2) s.Dic> :: t.ExprC, <UnlessOp s.Op> :: s.JumpIf, = (Seq t.ExprC (Instr s.JumpIf s.Label)); UnlessOp // Generates a jump. { Eq = JUMPNE; Ne = JUMPEQ; Lt = JUMPGE; Gt = JUMPLE; Le = JUMPGT; Ge = JUMPLT; }; // This function compiles an arithmetic expression. // Auxiliary variables are created to keep // the values obtained by evaluating subexpressions. // The evaluation order of the subexpressions is chosen in // such a way as to reduce the number of auxiliary variables. EncExpr t.Expr s.Dic = <EncSubExpr t.Expr 0 s.Dic>; EncSubExpr (s.KeyWord e.Info) sN s.Dic = (s.KeyWord e.Info) : { (Const sC) = = (Instr LOADC sC); (Name sX) = <LookupDic sX s.Dic> :: s.Addr, = (Instr LOAD s.Addr); (Op s.Op t.Expr1 t.Expr2) = t.Expr2 : { (Const sC2) = <EncSubExpr t.Expr1 sN s.Dic> :: t.Expr1C, <LiteralOp s.Op> :: s.OpCode, = (Seq t.Expr1C (Instr s.OpCode sC2)); (Name sX2) = <EncSubExpr t.Expr1 sN s.Dic> :: t.Expr1C, <MemoryOp s.Op> :: s.OpCode, <LookupDic sX2 s.Dic> :: s.Addr, = (Seq t.Expr1C (Instr s.OpCode s.Addr)); (Op e) = <LookupDic sN s.Dic> :: s.Addr, <EncSubExpr t.Expr2 sN s.Dic> :: t.Expr2C, <Add sN 1> :: sN1, <EncSubExpr t.Expr1 sN1 s.Dic> :: t.Expr1C, <MemoryOp s.Op> :: s.OpCode, = (Seq t.Expr2C (Instr STORE s.Addr) t.Expr1C (Instr s.OpCode s.Addr) ); }; }; LiteralOp // Generates the names of { // the instructions with Add = ADDC; Sub = SUBC; // literal operands. Mult = MULTC; Div = DIVC; }; MemoryOp // Generates the names of { // the instructions with Add = ADD; Sub = SUB; // address operands. Mult = MULT; Div = DIV; }; // Allocates memory for the instructions. Assemble t.Code s.A0 = t.Code : { (Seq e.CodeSeq) = = <AssembleSeq e.CodeSeq s.A0>; (Instr s s) = = <Add s.A0 1>; (Label s.Label) = <Store s.Label s.A0> = s.A0; }; AssembleSeq e.CodeSeq s.A0 = e.CodeSeq : { t.Code e.Rest = <Assemble t.Code s.A0> :: s.A1, = <AssembleSeq e.Rest s.A1>; = = s.A0; }; // Replaces the labels with their addresses. Dereference t.Code = t.Code : { (Seq e.CodeSeq) = (Seq <DereferenceSeq e.CodeSeq>); (Instr s.Instr s.Value) = { <IsInt s.Value> = t.Code; //<IsBox s.Value> = (Instr s.Instr <Get s.Value>); }; (Label s.Label) = (Label <Get s.Label>); }; DereferenceSeq { t.Code e.CodeSeq = <Dereference t.Code><DereferenceSeq e.CodeSeq>; = ; }; // Converts the assembler language program to // the character sequence, and outputs it to // the standard output device. WriteCode { (Seq e.CodeSeq) = <WriteCodeSeq e.CodeSeq>; (Instr s.Instr s.Value) = <Print " "><Print s.Instr><Print ","> <Print s.Value><Print ";\n">; (Label s.Label) = <Print s.Label><Print ":\n">; (Block s.Value) = <Print " BLOCK,"><Print s.Value><Print ";\n">; }; WriteCodeSeq { t.Code e.CodeSeq = <WriteCode t.Code><WriteCodeSeq e.CodeSeq>; = ; };