Require Import Coq.Lists.List.
Require Import PL.Imp.
Import ListNotations.
(* ################################################################# *)
(** * Static single assignment form *)
(** SSA form is a special kind of intermediate language used in modern
compilers. The most famous intermediate language in SSA form is LLVM and
the famous clang compiler uses LLVM as its backend.
SSA means, each variable is a target of exactly one assignment command in
the program text. It is not the case for ordinary programs. For example,
[[
X ::= Y + 1;;
X ::= X + 1
]]
in this program above, the program variable [X] appears as the targets of
two assginment commands. We may turn this program into the following form:
[[
X1 ::= Y + 1;;
X2 ::= X1 + 1
]]
Then it is in SSA form since we distinguish the [X] after the first
assignment and the [X] after the second assignment.
Sometimes, you may find such kind of transformation impossible. For example,
[[
If (X == 42)
Then Y ::= 0
Else Y ::= X + 1
EndIf;;
Z ::= Y + 1
]]
it is impossible to rename the program variable [Y] properly to meet our
restriction. Thus, SSA form adds _phi command_ to represent merging values
of program variables. Here is an example:
[[
If (X == 42)
Then Y1 ::= 0
Else Y2 ::= X + 1
EndIf;;
Y3 ::= PHI(Y1, Y2);;
Z ::= Y3 + 1
]]
The phi command [Y3 ::= PHI(Y1, Y2)] says [Y1] will be assigned to [Y3] if
the control flow comes from the if-then branch, and [Y2] will be assigned to
[Y3] if the control flow comes from the if-else branch.
Since every variable appears as the target of exact one assignment, this
assignment is also called the variable's _definition_. The SSA is introduced
for efficient data flow analysis and compiler optimization. *)
(* ################################################################# *)
(** * Control flow graph *)
(** Usually, we will represent SSA programs using control flow graphs. In a
control flow graph, vertices are program points and edges are assignments,
jumps, and conditional jumps. We can describe them in Coq as follows. *)
Definition label: Type := Z.
Module CFG.
Inductive VCom: Type :=
| CAss (X: var) (a: aexp) (o1: label)
| CSkip (o1: label)
| CCond (b: bexp) (o1 o2: label).
Record vertex := {
vid: label;
vcom: VCom;
}.
Record com: Type := {
entry: label;
graph: list vertex;
exit: label
}.
End CFG.
(** Sometimes, we would prefer to represent the graph in a more compact form. In
other words,
- [CSkip] is meaningless and thus should be removed;
- Consecutive assignments should be combined together in one edge.
We can define it in Coq as follows. *)
Module CFGBlock.
Inductive VCom: Type :=
| CJump (asg: list (var * aexp)) (o: label)
| CCond (asg: list (var * aexp)) (b: bexp) (o1 o2: label).
Record vertex := {
vid: label;
vcom: VCom;
}.
Record com: Type := {
entry: label;
graph: list vertex;
exit: label
}.
End CFGBlock.
(* ################################################################# *)
(** * Constructing control flow graphs *)
(** We can easily build a CFG based on the program's syntax tree. We may
accomplish that either by a recursive construction or by a traverse of the
syntax tree. We define that by Coq's recursive functions. *)
Module Imp2CFG.
Inductive labeled_com: Type :=
| CSkip (l: label)
| CAss (X: var) (a: aexp) (l: label)
| CSeq (c1 c2 : labeled_com) (l: label)
| CIf (b : bexp) (c1 c2 : labeled_com) (l: label)
| CWhile (b : bexp) (c : labeled_com) (l: label).
Fixpoint add_label (c: Imp.com) (entry_label: label): labeled_com * Z :=
match c with
| Imp.CSkip => (CSkip entry_label, entry_label + 1)
| Imp.CAss X a => (CAss X a entry_label, entry_label + 1)
| Imp.CSeq c1 c2 => match add_label c1 entry_label with
| (c1', entry_label') =>
match add_label c2 entry_label' with
| (c2', entry_label'') =>
(CSeq c1' c2' entry_label, entry_label'')
end
end
| Imp.CIf b c1 c2 => match add_label c1 (entry_label + 1) with
| (c1', entry_label') =>
match add_label c2 entry_label' with
| (c2', entry_label'') =>
(CIf b c1' c2' entry_label, entry_label'')
end
end
| Imp.CWhile b c1 => match add_label c1 (entry_label + 1) with
| (c1', entry_label') =>
(CWhile b c1' entry_label, entry_label')
end
end.
Definition get_label (c: labeled_com): label :=
match c with
| CSkip l
| CAss _ _ l
| CSeq _ _ l
| CIf _ _ _ l
| CWhile _ _ l => l
end.
Fixpoint translate0 (c: labeled_com) (exit_label: Z): list CFG.vertex :=
match c with
| CSkip l =>
[{|
CFG.vid := l;
CFG.vcom := CFG.CSkip exit_label
|}]
| CAss X a l =>
[{|
CFG.vid := l;
CFG.vcom := CFG.CAss X a exit_label
|}]
| CSeq c1 c2 l =>
translate0 c1 (get_label c2) ++ translate0 c2 exit_label
| CIf b c1 c2 l =>
[{|
CFG.vid := l;
CFG.vcom := CFG.CCond b (get_label c1) (get_label c2)
|}] ++
translate0 c1 exit_label ++ translate0 c2 exit_label
| CWhile b c1 l =>
[{|
CFG.vid := l;
CFG.vcom := CFG.CCond b (get_label c1) exit_label
|}] ++
translate0 c1 l
end.
Definition translate (c: Imp.com): CFG.com :=
match add_label c 0 with
| (c', exit_label) =>
{| CFG.entry := 0;
CFG.graph := translate0 c' exit_label;
CFG.exit := exit_label;
|}
end.
End Imp2CFG.
(** We could also define constructions of compact control flow graphs. We leave
them as additional readings. *)
(* ################################################################# *)
(** * SSA construction, inserting PHI commands *)
(** The main idea of PHI command insertion is to find join nodes of assignments.
Specifically,
- In a CFG, basic block [n1] _dominates_ basic block [n2] if every path in
the CFG from the entry point to [n2] includes [n1]. By convention, every
basic block in a CFG dominates itself.
- Basic block [n1] strictly dominates [n2] if [n1] dominates [n2] and
[n1 <> n2].
- The _dominance frontier_ of a node [n], [DF(n)], is the border of the CFG
region that is dominated by [n]. More formally, the set of nodes [DF(n)]
contains all nodes [x] such that [n] dominates a predecessor of [x] but
[n] does not strictly dominate [x].
- We lift the definition of [DF] to CFG node sets, i.e. [DF(n1, n2, ...)] is
defined as the union of [DF(n1)], [DF(n2)], ...
- Let [omega_DF(S)] (iterated dominance frontier) be the limit of the
following sets of node:
[[
iter_DF(0, S) = empty-set
iter_DF(1, S) = DF(S)
iter_DF(n + 1, S) = DF(the union of S and iter_DF(n, S))
]]
We should add PHI nodes for variable [X] on [omega_DF(defs(X))], where
[defs(X)] is the set of nodes that contain definitions of [X]. *)
(* 2021-06-02 08:46 *)