Code Generator Prototypes

It can be useful to use really high-level languages, especially ones from the ML family to quickly write code generators for little languages that directly translate abstract syntax trees into VM or assembly language lists.

A First Example

Here is a compiler for a little language of plus-expressions over numbers and identifiers, with assignments and while statements.

compiler1.sml
(*
 *  A little compiler from a trivial imperative language to a small
 *  single-accumulator machine.
 *
 *  Source Language
 *  ---------------
 *  EXP  ->  NML  |  ID  |  EXP  "+"  ID
 *  BEX  ->  EXP  "="  ID  |  "not"  BEX
 *  COM  ->  "skip"
 *       |   ID  ":="  EXP
 *       |   COM  ";"  COM
 *       |   "while"  BEX  "do"  COM  "end"
 *)

type num = int
type ide = string

datatype exp =               (* Arithmetic Expressions *)
  Lit of num
| Ident of ide
| Plus of exp * ide

datatype bex =               (* Boolean Expressions *)
  Eq of exp * ide
| Not of bex

datatype com =               (* Commands *)
  Skip
| Assn of ide * exp
| Seq of com list
| While of bex * com

(*
 *  Target Language
 *  ---------------
 *  The target language is an assembly language for a machine with a
 *  single accumulator and eight simple instructions.
 *)

datatype inst =
  LDI of num                 (* Load Immediate *)
| LD of ide                  (* Load *)
| ST of ide                  (* Store *)
| NOT                        (* Not *)
| EQ of ide                  (* Equal *)
| ADD of ide                 (* Add *)
| JMP of num                 (* Jump *)
| JZR of num                 (* Jump if zero *)

(*
 *  The Compiler
 *  ------------
 *  KE e loc   the code from compiling expression e at address loc
 *  KB b loc   the code from compiling boolexp b at address loc
 *  KC c loc   the code from compiling command c at address loc
 *)

fun KE (Lit n) loc = [LDI n]
  | KE (Ident x) loc = [LD x]
  | KE (Plus(e,x)) loc = KE e loc @ [ADD x]

fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
  | KB (Not b) loc = KB b loc @ [NOT]

fun KC (Skip) loc = []
  | KC (Assn(x,e)) loc = KE e loc @ [ST x]
  | KC (Seq []) loc = []
  | KC (Seq (c::cs)) loc =
      let val f = KC c loc in
        f @ KC (Seq cs) (loc + length f)
      end
  | KC (While(b,c)) loc =
      let val f1 = KB b loc in
        let val f2 = KC c (loc + length f1 + 1) in
          f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
        end
      end;

(*
 *  Pretty-Printing a target program
 *)

fun mkstr (LDI n) = "LDI  " ^ makestring n
  | mkstr (LD x)  = "LD   " ^ x
  | mkstr (ST x)  = "ST   " ^ x
  | mkstr (NOT)   = "NOT  "
  | mkstr (EQ x)  = "EQ   " ^ x
  | mkstr (ADD x) = "ADD  " ^ x
  | mkstr (JMP n) = "JMP  " ^ makestring n
  | mkstr (JZR n) = "JZR  " ^ makestring n;

fun printinst h loc =
  TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");

fun pprint [] loc = ()
  | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1));

(*
 *  An Example Program
 *  ------------------
 *  x := 5;
 *  skip;
 *  while not 8 = y do
 *    z := (19 + x) + y;
 *  end;
 *  x := z;
 *)

val prog1 =
  Seq [
    Assn("x", Lit 5),
    Skip,
    While(
      Not(Eq(Lit 8, "y")),
      Assn("z",Plus(Plus(Lit 19, "x"), "y"))),
    Assn("x", Ident "z")];

(*
 * Top-level call to invoke the compiler
 *)

fun compile c loc = pprint (KC c loc) loc

A Second Example

Now lets add multiplication and input/output.

compiler2.sml
(*
 * A little compiler from a trivial imperative language to a small
 * single-accumulator machine.  The source language has addition
 * and multiplication operators, assignments, while loops, and a
 * couple I/O statements.
 *
 * Source Language
 * ---------------
 * EXP  ->  NML  |  ID  |  EXP  "+"  ID  |  EXP  "*"  ID  |  "read"
 * BEX  ->  EXP  "="  ID  |  "not"  BEX
 * COM  ->  "skip"
 *      |   "write"  EXP
 *      |   ID  ":="  EXP
 *      |   COM  ";"  COM
 *      |   "while"  BEX  "do"  COM  "end"
 *)

type num = int
type ide = string

datatype exp =
  Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read

datatype bex =
  Eq of exp * ide
| Not of bex

datatype com =
  Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com

(*
 * Target Language
 * ---------------
 * The target language is an assembly language for a machine with a
 * single accumulator and eleven simple instructions.
 *)

datatype inst =
  LDI of num                 (* Load Immediate *)
| LD of ide                  (* Load *)
| ST of ide                  (* Store *)
| NOT                        (* Not *)
| EQ of ide                  (* Equal *)
| ADD of ide                 (* Add *)
| MUL of ide                 (* Multiply *)
| JMP of num                 (* Jump *)
| JZR of num                 (* Jump if zero *)
| IN                         (* Input *)
| OUT                        (* Output *)

(*
 * The Compiler
 * ------------
 * KE e loc   the code from compiling expression e at address loc
 * KB b loc   the code from compiling boolexp b at address loc
 * KC c loc   the code from compiling command c at address loc
 *)

fun KE (Lit n) loc = [LDI n]
  | KE (Ident x) loc = [LD x]
  | KE (Plus(e,x)) loc = KE e loc @ [ADD x]
  | KE (Times(e,x)) loc = KE e loc @ [MUL x]
  | KE (Read) loc = [IN]

fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
  | KB (Not b) loc = KB b loc @ [NOT]

fun KC (Skip) loc = []
  | KC (Write e) loc = KE e loc @ [OUT]
  | KC (Assn(x,e)) loc = KE e loc @ [ST x]
  | KC (Seq(c1,c2)) loc =
      let val f = KC c1 loc in
        f @ KC c2 (loc + length f)
      end
  | KC (While(b,c)) loc =
      let val f1 = KB b loc in
        let val f2 = KC c (loc + length f1 + 1) in
          f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
        end
      end;

(*
 * Pretty-Printing a target program
 *)

fun mkstr (LDI n) = "LDI  " ^ makestring n
  | mkstr (LD x)  = "LD   " ^ x
  | mkstr (ST x)  = "ST   " ^ x
  | mkstr (NOT)   = "NOT  "
  | mkstr (EQ x)  = "EQ   " ^ x
  | mkstr (ADD x) = "ADD  " ^ x
  | mkstr (MUL x) = "MUL  " ^ x
  | mkstr (JMP n) = "JMP  " ^ makestring n
  | mkstr (JZR n) = "JZR  " ^ makestring n
  | mkstr (IN)    = "IN   "
  | mkstr (OUT)   = "OUT  "

fun printinst h loc =
  TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");

fun pprint [] loc = ()
  | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))

(*
 * An Example Program
 * ------------------
 * i := read;
 * product := 1;
 * while not 0 = i do
 *   product := product * i;
 *   i := i - 1
 * end;
 * write product
 *)

val prog1 =
  Seq (
    Assn("i",Read),
    Seq (
      Assn("product",Lit 1),
      Seq (
        While(
          Not(Eq(Lit 0,"i")),
          Seq (
  	    Assn("product",Times(Ident "product","i")),
            Assn("i",Plus(Lit(~1),"i")))),
        Write(Ident "product"))));

(*
 * Top-level call to invoke the compiler
 *)

fun compile c loc = pprint (KC c loc) loc

A Third Example

This is the same source language as before, but now the target architecture uses relative jumps.

compiler3.sml
(*
 * A little compiler from a trivial imperative language to a small
 * single-accumulator machine.  The source language has addition
 * and multiplication operators, assignments, while loops, and a
 * couple I/O statements.
 *
 * Source Language
 * ---------------
 * EXP  ->  NML  |  ID  |  EXP  "+"  ID  |  EXP  "*"  ID  |  "read"
 * BEX  ->  EXP  "="  ID  |  "not"  BEX
 * COM  ->  "skip"
 *      |   "write"  EXP
 *      |   ID  ":="  EXP
 *      |   COM  ";"  COM
 *      |   "while"  BEX  "do"  COM  "end"
 *)

type num = int
type ide = string

datatype exp =
  Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read

datatype bex =
  Eq of exp * ide
| Not of bex

datatype com =
  Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com

(*
 * Target Language
 * ---------------
 * The target language is an assembly language for a machine with a
 * single accumulator and eleven simple instructions.
 *)

datatype inst =
  LDI of num                 (* Load Immediate *)
| LD of ide                  (* Load *)
| ST of ide                  (* Store *)
| NOT                        (* Not *)
| EQ of ide                  (* Equal *)
| ADD of ide                 (* Add *)
| MUL of ide                 (* Multiply *)
| JMP of num                 (* Jump *)
| JZR of num                 (* Jump if zero *)
| IN                         (* Input *)
| OUT                        (* Output *)

(*
 * The Compiler
 * ------------
 * KE e loc   the code from compiling expression e at address loc
 * KB b loc   the code from compiling boolexp b at address loc
 * KC c loc   the code from compiling command c at address loc
 *)

fun KE (Lit n) = [LDI n]
  | KE (Ident x) = [LD x]
  | KE (Plus(e,x)) = (KE e) @ [ADD x]
  | KE (Times(e,x)) = (KE e) @ [MUL x]
  | KE (Read) = [IN]

fun KB (Eq(e,x)) = (KE e) @ [EQ x]
  | KB (Not b) = (KB b) @ [NOT]

fun KC (Skip) = []
  | KC (Assn(x,e)) = (KE e) @ [ST x]
  | KC (Write e) = (KE e) @ [OUT]
  | KC (Seq(c1,c2)) = (KC c1) @ (KC c2)
  | KC (While(b,c)) =
      let val test = (KB b) and body = (KC c) in
	test @ [JZR (length body + 2)] @
	body @ [JMP (~(length test + length body + 1))]
      end

(*
 * Pretty-Printing a target program
 *)

fun mkstr (LDI n) = "LDI  " ^ makestring n
  | mkstr (LD x)  = "LD   " ^ x
  | mkstr (ST x)  = "ST   " ^ x
  | mkstr (NOT)   = "NOT  "
  | mkstr (EQ x)  = "EQ   " ^ x
  | mkstr (ADD x) = "ADD  " ^ x
  | mkstr (MUL x) = "MUL  " ^ x
  | mkstr (JMP n) = "JMP  " ^ makestring n
  | mkstr (JZR n) = "JZR  " ^ makestring n
  | mkstr (IN)    = "IN   "
  | mkstr (OUT)   = "OUT  "

fun printinst h loc =
  TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");

fun pprint [] loc = ()
  | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))

(*
 * An Example Program
 * ------------------
 * i := read;
 * product := 1;
 * while not 0 = i do
 *   product := product * i;
 *   i := i - 1
 * end;
 * write product
 *)

val prog1 =
  Seq (
    Assn("i",Read),
    Seq (
      Assn("product",Lit 1),
      Seq (
        While(
          Not(Eq(Lit 0,"i")),
          Seq (
  	    Assn("product",Times(Ident "product","i")),
            Assn("i",Plus(Lit(~1),"i")))),
        Write(Ident "product"))));

(*
 * Top-level call to invoke the compiler
 *)

fun compile c loc = pprint (KC c) loc

Let’s Try Haskell

I wrote those Standard ML scripts in the early 1990s, maybe even earlier. For fun, let’s port the first example to Haskell.

compiler1.hs
--  A little Compiler from a trivial imperative language to a small
--  single-accumulator machine.

--  Source Language
--
--  Exp  =  numlit  |  id  |  Exp  "+"  id
--  Bex  =  Exp  "="  id  |  "not"  Bex
--  Com  =  "skip"
--       |   id  ":="  Exp
--       |   Com  ";"  Com
--       |   "while"  Bex  "do"  Com  "end"

data Exp                 -- Arithmetic Expressions can be
    = Lit Int            --   literals
    | Ident String       --   identifiers
    | Plus Exp String    --   binary plus expressions, left associative

data Bex                 -- Boolean Expressions can be
    = Eq Exp String      --   equality comparisons btw exp and identifiers
    | Not Bex            --   unary not applied to boolean expressions

data Com                 -- Commands can be
    = Skip               --   skips (no-ops)
    | Assn String Exp    --   assignment of an arithmetic expr to an id
    | Seq [Com]          --   a sequence (list) of commands
    | While Bex Com      --   a while-command with a boolean condition

--  Target Language
--
--  The target language is an assembly language for a machine with a
--  single accumulator and eight simple instructions.

data Inst
    = LDI Int            -- Load Immediate
    | LD String          -- Load
    | ST String          -- Store
    | NOT                -- Not
    | EQL String         -- Equal
    | ADD String         -- Add
    | JMP Int            -- Jump
    | JZR Int            -- Jump if zero
    deriving (Show)

--  The Compiler
--
--  ke e loc   the code from compiling expression e at address loc
--  kb b loc   the code from compiling boolexp b at address loc
--  kc c loc   the code from compiling command c at address loc

ke :: Exp -> Int -> [Inst]
ke (Lit n) loc = [LDI n]
ke (Ident x) loc = [LD x]
ke (Plus e x) loc = ke e loc ++ [ADD x]

kb :: Bex -> Int -> [Inst]
kb (Eq e x) loc = ke e loc ++ [EQL x]
kb (Not b) loc = kb b loc ++ [NOT]

kc :: Com -> Int -> [Inst]
kc (Skip) loc = []
kc (Assn x e) loc = ke e loc ++ [ST x]
kc (Seq []) loc = []
kc (Seq (c:cs)) loc =
    let f = kc c loc in f ++ kc (Seq cs) (loc + length f)
kc (While b c) loc =
    let f1 = kb b loc in
        let f2 = kc c (loc + length f1 + 1) in
            f1 ++ [JZR (loc + length f1 + 1 + length f2 + 1)] ++ f2 ++ [JMP loc]

--  Pretty-Printing a target program

pprint [] loc = putStrLn ""
pprint (h:t) loc = printInst h loc >> pprint t (loc + 1)
    where printInst h loc = putStrLn (show loc ++ "\t" ++ show h)

--  An Example Program
--
--  x := 5;
--  skip;
--  while not 8 = y do
--    z := (19 + x) + y;
--  end;
--  x := z;

prog1 =
  Seq [
    (Assn "x" (Lit 5)),
    Skip,
    (While
      (Not (Eq (Lit 8) "y"))
      (Assn "z" (Plus (Plus (Lit 19) "x") "y"))),
    (Assn "x" (Ident "z"))]

-- Top-level call to invoke the Compiler
compile c loc = pprint (kc c loc) loc

-- Since this is just a prototype, run this file as a script
main = compile prog1 0
Exercise: Port the other two examples to Haskell.
Exercise: Port all three examples to some other language of your choice. Perhaps OCaml or F#. In fact, try a non-ML language like Python!
Exercise: Enhance the sample programming language and extend any of the compiler prototypes (in any language) accordingly.