LLVM Code Generation

Felix Dietze & Richard Musiol

2014-07-21

  • allow questions
1

Agenda

  • What is LLVM?
  • LLVM by Simple Example
  • Instruction Selection
  • Scheduling
  • Conclusion
2

LLVM

  • Started 2000 at University of Illinois
  • Low Level Virtual Machine
  • Compiler framework
  • Multiple frontends & backends
  • Many LLVM based compilers
  • Clang replaced GCC on current OS X
3

see handout

4

LLVM by Simple Example

5
int add(int x, int y) {
  return x + y;
}
6

Unoptimized LLVM code

define i64 @add(i64 %x, i64 %y) #0 {
  %x.addr = alloca i64, align 4
  %y.addr = alloca i64, align 4
  store i64 %x, i64* %x.addr, align 4
  store i64 %y, i64* %y.addr, align 4
  %0 = load i64* %x.addr, align 4
  %1 = load i64* %y.addr, align 4
  %add = add nsw i64 %0, %1
  ret i64 %add
}
7

Optimized LLVM code

define i64 @add(i64 %x, i64 %y) #0 {
  %add = add nsw i64 %y, %x
  ret i64 %add
}
  • more than 50 passes
8
9
10

Result of scheduling

Arguments: %RDI, %RSI, %RDX, %RCX, %R8, %R9
Return values: %RAX, %RDX
%vreg1 = COPY %RSI
%vreg2 = COPY %RDI
%vreg2 = ADD64rr %vreg2, %vreg1
%RAX = COPY %vreg2
RET %RAX
11

Register allocation

%vreg1%RSI
%vreg2%RDI

%RDI = ADD64rr %RDI, %RSI
%RAX = COPY %RDI
RET %RAX
12

Assembly

_add:
  addq  %rsi, %rdi
  movq  %rdi, %rax
  ret
13

One more addition?

long add(long x, long y) {
  return x + y + 42;
}
14

Surprise!

_add:
  leaq  42(%rdi,%rsi), %rax
  ret
15

Instruction Selection

16
void test() {
  volatile int x;
  volatile long y;
  volatile long z;

  x = 42;             // 32 bit ← 32 bit
  y = 42;             // 64 bit ← 32 bit
  z = 42000000000000; // 64 bit ← 64 bit
}
17
MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 42;
MOV64mi32 <fi#1>, 1, %noreg, 0, %noreg, 42;
%vreg0 = MOV64ri 42000000000000;
MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg0;
RET
  • volatile
18
memory ← instruction:
MOV32mi
MOV64mi32
MOV64mi

memory ← register ← instruction:
MOV64ri
MOV64mr

MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 42;
MOV64mi32 <fi#1>, 1, %noreg, 0, %noreg, 42;
%vreg0 = MOV64ri 42000000000000;
MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg0;
RET
19

Definition of MOV32mi

def MOV32mi:
  Ii32<
    0xC7, MRM0m,
    (outs), (ins i32mem:$dst, i32imm:$src),
    "mov{l}\t{$src, $dst|$dst, $src}",
    [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM
  >, OpSize32;
20
static const unsigned char MatcherTable[] = {
OPC_SwitchOpcode, 40|128, 104, TARGET_VAL(ISD::STORE),
  [...]
  OPC_SwitchType, 26, MVT::i64,
    [...]
  /*SwitchType*/ 24, MVT::i8,
    [...]
  /*SwitchType*/ 24, MVT::i16,
    [...]
  /*SwitchType*/ 24, MVT::i32,
    OPC_MoveParent,
    OPC_RecordChild2,
    OPC_CheckPredicate, 4,
    [...]
    OPC_MorphNodeTo, TARGET_VAL(X86::MOV32mi),
[...]
  • OPC_SwitchOpcode: 240 cases
21

Scheduling

22
  • Determine order of instructions
  • Optimize for
    • Register Pressure
    • Pipelining
  • Only relevant for complex code
  • Pipelining: interleaves instructions
23
void test(int* out, int* in, int* a0, int* a1, int* a2,
          int*  a3, int* a4, int* a5, int* a6) {
  for(int i = 0; i < 10; i++) {
    int v0 = a0[i];
    int v1 = a1[i];
    int v2 = a2[i];
    int v3 = a3[i];
    int v4 = a4[i];
    int v5 = a5[i];
    // int v6 = a6[i];
    int in0 = in[0];
    int in1 = in[1];
    int in2 = in[2];
    int in3 = in[3];
    int in4 = in[4];
    int in5 = in[5];
    // int in6 = in[6];
    int sum = 0;
    sum += v0 * in0;
    sum += v1 * in1;
    sum += v2 * in2;
    sum += v3 * in3;
    sum += v4 * in4;
    sum += v5 * in5;
    // sum += v6 * in6;
    out[i] = sum;
  }
}
24
movl    (%rsi), %r14d
movl    4(%rsi), %ebp
movl    8(%rsi), %ebx
movl    12(%rsi), %r15d
movl    16(%rsi), %r12d
movl    20(%rsi), %r13d
imull   (%rdx,%rax), %r14d
imull   (%rcx,%rax), %ebp
addl    %r14d, %ebp
imull   (%r8,%rax), %ebx
addl    %ebp, %ebx
imull   (%r9,%rax), %r15d
addl    %ebx, %r15d
imull   (%r11,%rax), %r12d
addl    %r15d, %r12d
imull   (%r10,%rax), %r13d
addl    %r12d, %r13d
movl    %r13d, (%rdi,%rax)
  • we are looking at the final result of machine code, after reg allocation
  • uses many registers
25
void test(int* out, int* in, int* a0, int* a1, int* a2,
          int*  a3, int* a4, int* a5, int* a6) {
  for(int i = 0; i < 10; i++) {
    int v0 = a0[i];
    int v1 = a1[i];
    int v2 = a2[i];
    int v3 = a3[i];
    int v4 = a4[i];
    int v5 = a5[i];
    int v6 = a6[i];
    int in0 = in[0];
    int in1 = in[1];
    int in2 = in[2];
    int in3 = in[3];
    int in4 = in[4];
    int in5 = in[5];
    int in6 = in[6];
    int sum = 0;
    sum += v0 * in0;
    sum += v1 * in1;
    sum += v2 * in2;
    sum += v3 * in3;
    sum += v4 * in4;
    sum += v5 * in5;
    sum += v6 * in6;
    out[i] = sum;
  }
}
26
movl    (%rsi), %ebp
movl    4(%rsi), %ebx
imull   (%rdx,%rax), %ebp
imull   (%rcx,%rax), %ebx
addl    %ebp, %ebx
movl    8(%rsi), %ebp
imull   (%r8,%rax), %ebp
addl    %ebx, %ebp
movl    12(%rsi), %ebx
imull   (%r9,%rax), %ebx
addl    %ebp, %ebx
movl    16(%rsi), %ebp
imull   (%r14,%rax), %ebp
addl    %ebx, %ebp
movl    20(%rsi), %ebx
imull   (%r11,%rax), %ebx
addl    %ebp, %ebx
movl    24(%rsi), %ebp
imull   (%r10,%rax), %ebp
addl    %ebx, %ebp
movl    %ebp, (%rdi,%rax)
27

Conclusion

28
  • LLVM is a rich framework
  • Many optimizations at various stages
  • Very modular = good reusability
  • High quality codebase
  • Try it!
29

References

30
SpaceForward
Left, Down, Page DownNext slide
Right, Up, Page UpPrevious slide
POpen presenter console
HToggle this help