Here is a compiler for a little language of plus-expressions over numbers and identifiers, with assignments and while statements.
(* * A little compiler from a trivial imperative language to a small * single-accumulator machine. * * Source Language * --------------- * EXP -> NML | ID | EXP "+" ID * BEX -> EXP "=" ID | "not" BEX * COM -> "skip" * | ID ":=" EXP * | COM ";" COM * | "while" BEX "do" COM "end" *) type num = int type ide = string datatype exp = (* Arithmetic Expressions *) Lit of num | Ident of ide | Plus of exp * ide datatype bex = (* Boolean Expressions *) Eq of exp * ide | Not of bex datatype com = (* Commands *) Skip | Assn of ide * exp | Seq of com list | While of bex * com (* * Target Language * --------------- * The target language is an assembly language for a machine with a * single accumulator and eight simple instructions. *) datatype inst = LDI of num (* Load Immediate *) | LD of ide (* Load *) | ST of ide (* Store *) | NOT (* Not *) | EQ of ide (* Equal *) | ADD of ide (* Add *) | JMP of num (* Jump *) | JZR of num (* Jump if zero *) (* * The Compiler * ------------ * KE e loc the code from compiling expression e at address loc * KB b loc the code from compiling boolexp b at address loc * KC c loc the code from compiling command c at address loc *) fun KE (Lit n) loc = [LDI n] | KE (Ident x) loc = [LD x] | KE (Plus(e,x)) loc = KE e loc @ [ADD x] fun KB (Eq(e,x)) loc = KE e loc @ [EQ x] | KB (Not b) loc = KB b loc @ [NOT] fun KC (Skip) loc = [] | KC (Assn(x,e)) loc = KE e loc @ [ST x] | KC (Seq []) loc = [] | KC (Seq (c::cs)) loc = let val f = KC c loc in f @ KC (Seq cs) (loc + length f) end | KC (While(b,c)) loc = let val f1 = KB b loc in let val f2 = KC c (loc + length f1 + 1) in f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc] end end; (* * Pretty-Printing a target program *) fun mkstr (LDI n) = "LDI " ^ makestring n | mkstr (LD x) = "LD " ^ x | mkstr (ST x) = "ST " ^ x | mkstr (NOT) = "NOT " | mkstr (EQ x) = "EQ " ^ x | mkstr (ADD x) = "ADD " ^ x | mkstr (JMP n) = "JMP " ^ makestring n | mkstr (JZR n) = "JZR " ^ makestring n; fun printinst h loc = TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n"); fun pprint [] loc = () | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1)); (* * An Example Program * ------------------ * x := 5; * skip; * while not 8 = y do * z := (19 + x) + y; * end; * x := z; *) val prog1 = Seq [ Assn("x", Lit 5), Skip, While( Not(Eq(Lit 8, "y")), Assn("z",Plus(Plus(Lit 19, "x"), "y"))), Assn("x", Ident "z")]; (* * Top-level call to invoke the compiler *) fun compile c loc = pprint (KC c loc) loc
Now lets add multiplication and input/output.
(* * A little compiler from a trivial imperative language to a small * single-accumulator machine. The source language has addition * and multiplication operators, assignments, while loops, and a * couple I/O statements. * * Source Language * --------------- * EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read" * BEX -> EXP "=" ID | "not" BEX * COM -> "skip" * | "write" EXP * | ID ":=" EXP * | COM ";" COM * | "while" BEX "do" COM "end" *) type num = int type ide = string datatype exp = Lit of num | Ident of ide | Plus of exp * ide | Times of exp * ide | Read datatype bex = Eq of exp * ide | Not of bex datatype com = Skip | Write of exp | Assn of ide * exp | Seq of com * com | While of bex * com (* * Target Language * --------------- * The target language is an assembly language for a machine with a * single accumulator and eleven simple instructions. *) datatype inst = LDI of num (* Load Immediate *) | LD of ide (* Load *) | ST of ide (* Store *) | NOT (* Not *) | EQ of ide (* Equal *) | ADD of ide (* Add *) | MUL of ide (* Multiply *) | JMP of num (* Jump *) | JZR of num (* Jump if zero *) | IN (* Input *) | OUT (* Output *) (* * The Compiler * ------------ * KE e loc the code from compiling expression e at address loc * KB b loc the code from compiling boolexp b at address loc * KC c loc the code from compiling command c at address loc *) fun KE (Lit n) loc = [LDI n] | KE (Ident x) loc = [LD x] | KE (Plus(e,x)) loc = KE e loc @ [ADD x] | KE (Times(e,x)) loc = KE e loc @ [MUL x] | KE (Read) loc = [IN] fun KB (Eq(e,x)) loc = KE e loc @ [EQ x] | KB (Not b) loc = KB b loc @ [NOT] fun KC (Skip) loc = [] | KC (Write e) loc = KE e loc @ [OUT] | KC (Assn(x,e)) loc = KE e loc @ [ST x] | KC (Seq(c1,c2)) loc = let val f = KC c1 loc in f @ KC c2 (loc + length f) end | KC (While(b,c)) loc = let val f1 = KB b loc in let val f2 = KC c (loc + length f1 + 1) in f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc] end end; (* * Pretty-Printing a target program *) fun mkstr (LDI n) = "LDI " ^ makestring n | mkstr (LD x) = "LD " ^ x | mkstr (ST x) = "ST " ^ x | mkstr (NOT) = "NOT " | mkstr (EQ x) = "EQ " ^ x | mkstr (ADD x) = "ADD " ^ x | mkstr (MUL x) = "MUL " ^ x | mkstr (JMP n) = "JMP " ^ makestring n | mkstr (JZR n) = "JZR " ^ makestring n | mkstr (IN) = "IN " | mkstr (OUT) = "OUT " fun printinst h loc = TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n"); fun pprint [] loc = () | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1)) (* * An Example Program * ------------------ * i := read; * product := 1; * while not 0 = i do * product := product * i; * i := i - 1 * end; * write product *) val prog1 = Seq ( Assn("i",Read), Seq ( Assn("product",Lit 1), Seq ( While( Not(Eq(Lit 0,"i")), Seq ( Assn("product",Times(Ident "product","i")), Assn("i",Plus(Lit(~1),"i")))), Write(Ident "product")))); (* * Top-level call to invoke the compiler *) fun compile c loc = pprint (KC c loc) loc
This is the same source language as before, but now the target architecture uses relative jumps.
(* * A little compiler from a trivial imperative language to a small * single-accumulator machine. The source language has addition * and multiplication operators, assignments, while loops, and a * couple I/O statements. * * Source Language * --------------- * EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read" * BEX -> EXP "=" ID | "not" BEX * COM -> "skip" * | "write" EXP * | ID ":=" EXP * | COM ";" COM * | "while" BEX "do" COM "end" *) type num = int type ide = string datatype exp = Lit of num | Ident of ide | Plus of exp * ide | Times of exp * ide | Read datatype bex = Eq of exp * ide | Not of bex datatype com = Skip | Write of exp | Assn of ide * exp | Seq of com * com | While of bex * com (* * Target Language * --------------- * The target language is an assembly language for a machine with a * single accumulator and eleven simple instructions. *) datatype inst = LDI of num (* Load Immediate *) | LD of ide (* Load *) | ST of ide (* Store *) | NOT (* Not *) | EQ of ide (* Equal *) | ADD of ide (* Add *) | MUL of ide (* Multiply *) | JMP of num (* Jump *) | JZR of num (* Jump if zero *) | IN (* Input *) | OUT (* Output *) (* * The Compiler * ------------ * KE e loc the code from compiling expression e at address loc * KB b loc the code from compiling boolexp b at address loc * KC c loc the code from compiling command c at address loc *) fun KE (Lit n) = [LDI n] | KE (Ident x) = [LD x] | KE (Plus(e,x)) = (KE e) @ [ADD x] | KE (Times(e,x)) = (KE e) @ [MUL x] | KE (Read) = [IN] fun KB (Eq(e,x)) = (KE e) @ [EQ x] | KB (Not b) = (KB b) @ [NOT] fun KC (Skip) = [] | KC (Assn(x,e)) = (KE e) @ [ST x] | KC (Write e) = (KE e) @ [OUT] | KC (Seq(c1,c2)) = (KC c1) @ (KC c2) | KC (While(b,c)) = let val test = (KB b) and body = (KC c) in test @ [JZR (length body + 2)] @ body @ [JMP (~(length test + length body + 1))] end (* * Pretty-Printing a target program *) fun mkstr (LDI n) = "LDI " ^ makestring n | mkstr (LD x) = "LD " ^ x | mkstr (ST x) = "ST " ^ x | mkstr (NOT) = "NOT " | mkstr (EQ x) = "EQ " ^ x | mkstr (ADD x) = "ADD " ^ x | mkstr (MUL x) = "MUL " ^ x | mkstr (JMP n) = "JMP " ^ makestring n | mkstr (JZR n) = "JZR " ^ makestring n | mkstr (IN) = "IN " | mkstr (OUT) = "OUT " fun printinst h loc = TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n"); fun pprint [] loc = () | pprint (h::t) loc = (printinst h loc; pprint t (loc + 1)) (* * An Example Program * ------------------ * i := read; * product := 1; * while not 0 = i do * product := product * i; * i := i - 1 * end; * write product *) val prog1 = Seq ( Assn("i",Read), Seq ( Assn("product",Lit 1), Seq ( While( Not(Eq(Lit 0,"i")), Seq ( Assn("product",Times(Ident "product","i")), Assn("i",Plus(Lit(~1),"i")))), Write(Ident "product")))); (* * Top-level call to invoke the compiler *) fun compile c loc = pprint (KC c) loc
I wrote those Standard ML scripts in the early 1990s, maybe even earlier. For fun, let’s port the first example to Haskell.
-- A little Compiler from a trivial imperative language to a small -- single-accumulator machine. -- Source Language -- -- Exp = numlit | id | Exp "+" id -- Bex = Exp "=" id | "not" Bex -- Com = "skip" -- | id ":=" Exp -- | Com ";" Com -- | "while" Bex "do" Com "end" data Exp -- Arithmetic Expressions can be = Lit Int -- literals | Ident String -- identifiers | Plus Exp String -- binary plus expressions, left associative data Bex -- Boolean Expressions can be = Eq Exp String -- equality comparisons btw exp and identifiers | Not Bex -- unary not applied to boolean expressions data Com -- Commands can be = Skip -- skips (no-ops) | Assn String Exp -- assignment of an arithmetic expr to an id | Seq [Com] -- a sequence (list) of commands | While Bex Com -- a while-command with a boolean condition -- Target Language -- -- The target language is an assembly language for a machine with a -- single accumulator and eight simple instructions. data Inst = LDI Int -- Load Immediate | LD String -- Load | ST String -- Store | NOT -- Not | EQL String -- Equal | ADD String -- Add | JMP Int -- Jump | JZR Int -- Jump if zero deriving (Show) -- The Compiler -- -- ke e loc the code from compiling expression e at address loc -- kb b loc the code from compiling boolexp b at address loc -- kc c loc the code from compiling command c at address loc ke :: Exp -> Int -> [Inst] ke (Lit n) loc = [LDI n] ke (Ident x) loc = [LD x] ke (Plus e x) loc = ke e loc ++ [ADD x] kb :: Bex -> Int -> [Inst] kb (Eq e x) loc = ke e loc ++ [EQL x] kb (Not b) loc = kb b loc ++ [NOT] kc :: Com -> Int -> [Inst] kc (Skip) loc = [] kc (Assn x e) loc = ke e loc ++ [ST x] kc (Seq []) loc = [] kc (Seq (c:cs)) loc = let f = kc c loc in f ++ kc (Seq cs) (loc + length f) kc (While b c) loc = let f1 = kb b loc in let f2 = kc c (loc + length f1 + 1) in f1 ++ [JZR (loc + length f1 + 1 + length f2 + 1)] ++ f2 ++ [JMP loc] -- Pretty-Printing a target program pprint [] loc = putStrLn "" pprint (h:t) loc = printInst h loc >> pprint t (loc + 1) where printInst h loc = putStrLn (show loc ++ "\t" ++ show h) -- An Example Program -- -- x := 5; -- skip; -- while not 8 = y do -- z := (19 + x) + y; -- end; -- x := z; prog1 = Seq [ (Assn "x" (Lit 5)), Skip, (While (Not (Eq (Lit 8) "y")) (Assn "z" (Plus (Plus (Lit 19) "x") "y"))), (Assn "x" (Ident "z"))] -- Top-level call to invoke the Compiler compile c loc = pprint (kc c loc) loc -- Since this is just a prototype, run this file as a script main = compile prog1 0