Here is a compiler for a little language of plus-expressions over numbers and identifiers, with assignments and while statements.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp = (* Arithmetic Expressions *)
Lit of num
| Ident of ide
| Plus of exp * ide
datatype bex = (* Boolean Expressions *)
Eq of exp * ide
| Not of bex
datatype com = (* Commands *)
Skip
| Assn of ide * exp
| Seq of com list
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eight simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) loc = [LDI n]
| KE (Ident x) loc = [LD x]
| KE (Plus(e,x)) loc = KE e loc @ [ADD x]
fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
| KB (Not b) loc = KB b loc @ [NOT]
fun KC (Skip) loc = []
| KC (Assn(x,e)) loc = KE e loc @ [ST x]
| KC (Seq []) loc = []
| KC (Seq (c::cs)) loc =
let val f = KC c loc in
f @ KC (Seq cs) (loc + length f)
end
| KC (While(b,c)) loc =
let val f1 = KB b loc in
let val f2 = KC c (loc + length f1 + 1) in
f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
end
end;
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n;
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1));
(*
* An Example Program
* ------------------
* x := 5;
* skip;
* while not 8 = y do
* z := (19 + x) + y;
* end;
* x := z;
*)
val prog1 =
Seq [
Assn("x", Lit 5),
Skip,
While(
Not(Eq(Lit 8, "y")),
Assn("z",Plus(Plus(Lit 19, "x"), "y"))),
Assn("x", Ident "z")];
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c loc) loc
Now lets add multiplication and input/output.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine. The source language has addition
* and multiplication operators, assignments, while loops, and a
* couple I/O statements.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read"
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | "write" EXP
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp =
Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read
datatype bex =
Eq of exp * ide
| Not of bex
datatype com =
Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eleven simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| MUL of ide (* Multiply *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
| IN (* Input *)
| OUT (* Output *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) loc = [LDI n]
| KE (Ident x) loc = [LD x]
| KE (Plus(e,x)) loc = KE e loc @ [ADD x]
| KE (Times(e,x)) loc = KE e loc @ [MUL x]
| KE (Read) loc = [IN]
fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
| KB (Not b) loc = KB b loc @ [NOT]
fun KC (Skip) loc = []
| KC (Write e) loc = KE e loc @ [OUT]
| KC (Assn(x,e)) loc = KE e loc @ [ST x]
| KC (Seq(c1,c2)) loc =
let val f = KC c1 loc in
f @ KC c2 (loc + length f)
end
| KC (While(b,c)) loc =
let val f1 = KB b loc in
let val f2 = KC c (loc + length f1 + 1) in
f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
end
end;
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (MUL x) = "MUL " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n
| mkstr (IN) = "IN "
| mkstr (OUT) = "OUT "
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))
(*
* An Example Program
* ------------------
* i := read;
* product := 1;
* while not 0 = i do
* product := product * i;
* i := i - 1
* end;
* write product
*)
val prog1 =
Seq (
Assn("i",Read),
Seq (
Assn("product",Lit 1),
Seq (
While(
Not(Eq(Lit 0,"i")),
Seq (
Assn("product",Times(Ident "product","i")),
Assn("i",Plus(Lit(~1),"i")))),
Write(Ident "product"))));
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c loc) loc
This is the same source language as before, but now the target architecture uses relative jumps.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine. The source language has addition
* and multiplication operators, assignments, while loops, and a
* couple I/O statements.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read"
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | "write" EXP
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp =
Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read
datatype bex =
Eq of exp * ide
| Not of bex
datatype com =
Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eleven simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| MUL of ide (* Multiply *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
| IN (* Input *)
| OUT (* Output *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) = [LDI n]
| KE (Ident x) = [LD x]
| KE (Plus(e,x)) = (KE e) @ [ADD x]
| KE (Times(e,x)) = (KE e) @ [MUL x]
| KE (Read) = [IN]
fun KB (Eq(e,x)) = (KE e) @ [EQ x]
| KB (Not b) = (KB b) @ [NOT]
fun KC (Skip) = []
| KC (Assn(x,e)) = (KE e) @ [ST x]
| KC (Write e) = (KE e) @ [OUT]
| KC (Seq(c1,c2)) = (KC c1) @ (KC c2)
| KC (While(b,c)) =
let val test = (KB b) and body = (KC c) in
test @ [JZR (length body + 2)] @
body @ [JMP (~(length test + length body + 1))]
end
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (MUL x) = "MUL " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n
| mkstr (IN) = "IN "
| mkstr (OUT) = "OUT "
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))
(*
* An Example Program
* ------------------
* i := read;
* product := 1;
* while not 0 = i do
* product := product * i;
* i := i - 1
* end;
* write product
*)
val prog1 =
Seq (
Assn("i",Read),
Seq (
Assn("product",Lit 1),
Seq (
While(
Not(Eq(Lit 0,"i")),
Seq (
Assn("product",Times(Ident "product","i")),
Assn("i",Plus(Lit(~1),"i")))),
Write(Ident "product"))));
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c) loc
I wrote those Standard ML scripts in the early 1990s, maybe even earlier. For fun, let’s port the first example to Haskell.
-- A little Compiler from a trivial imperative language to a small
-- single-accumulator machine.
-- Source Language
--
-- Exp = numlit | id | Exp "+" id
-- Bex = Exp "=" id | "not" Bex
-- Com = "skip"
-- | id ":=" Exp
-- | Com ";" Com
-- | "while" Bex "do" Com "end"
data Exp -- Arithmetic Expressions can be
= Lit Int -- literals
| Ident String -- identifiers
| Plus Exp String -- binary plus expressions, left associative
data Bex -- Boolean Expressions can be
= Eq Exp String -- equality comparisons btw exp and identifiers
| Not Bex -- unary not applied to boolean expressions
data Com -- Commands can be
= Skip -- skips (no-ops)
| Assn String Exp -- assignment of an arithmetic expr to an id
| Seq [Com] -- a sequence (list) of commands
| While Bex Com -- a while-command with a boolean condition
-- Target Language
--
-- The target language is an assembly language for a machine with a
-- single accumulator and eight simple instructions.
data Inst
= LDI Int -- Load Immediate
| LD String -- Load
| ST String -- Store
| NOT -- Not
| EQL String -- Equal
| ADD String -- Add
| JMP Int -- Jump
| JZR Int -- Jump if zero
deriving (Show)
-- The Compiler
--
-- ke e loc the code from compiling expression e at address loc
-- kb b loc the code from compiling boolexp b at address loc
-- kc c loc the code from compiling command c at address loc
ke :: Exp -> Int -> [Inst]
ke (Lit n) loc = [LDI n]
ke (Ident x) loc = [LD x]
ke (Plus e x) loc = ke e loc ++ [ADD x]
kb :: Bex -> Int -> [Inst]
kb (Eq e x) loc = ke e loc ++ [EQL x]
kb (Not b) loc = kb b loc ++ [NOT]
kc :: Com -> Int -> [Inst]
kc (Skip) loc = []
kc (Assn x e) loc = ke e loc ++ [ST x]
kc (Seq []) loc = []
kc (Seq (c:cs)) loc =
let f = kc c loc in f ++ kc (Seq cs) (loc + length f)
kc (While b c) loc =
let f1 = kb b loc in
let f2 = kc c (loc + length f1 + 1) in
f1 ++ [JZR (loc + length f1 + 1 + length f2 + 1)] ++ f2 ++ [JMP loc]
-- Pretty-Printing a target program
pprint [] loc = putStrLn ""
pprint (h:t) loc = printInst h loc >> pprint t (loc + 1)
where printInst h loc = putStrLn (show loc ++ "\t" ++ show h)
-- An Example Program
--
-- x := 5;
-- skip;
-- while not 8 = y do
-- z := (19 + x) + y;
-- end;
-- x := z;
prog1 =
Seq [
(Assn "x" (Lit 5)),
Skip,
(While
(Not (Eq (Lit 8) "y"))
(Assn "z" (Plus (Plus (Lit 19) "x") "y"))),
(Assn "x" (Ident "z"))]
-- Top-level call to invoke the Compiler
compile c loc = pprint (kc c loc) loc
-- Since this is just a prototype, run this file as a script
main = compile prog1 0