Frontend Optimization 1 Optimization n Backend 100101010 010001011
Frontend Optimization 1 … Optimization n Backend 100101010 010001011 1001101010111 001010110
Applications OS Compiler Hardware
gcc:
2015: Academics introduce backdoor in “sudo” through a known bug in LLVM 1984: 1 st compiler backdoor by Ken Thompson 2006: 1 st CVE report on a compiler introducing a security vulnerability 1974: idea of using compilers to introduce backdoors - US Air Force report on Multics 1970 1980 1990 2000 2010 Backdoor in Windows/Linux? 2020
Pressure to Improve Compilers More Bugs in Compilers Potential Crashes and Vulnerabilities in Compiled Software
Since ever? Since 2000 s (SLAM, PREfast, SAGE, Z 3…) ?
Since ever? Since 2000 s ? Csmith – industry standard [PLDI’ 11] ?
ERROR: Domain of definedness of Target is smaller than Source's for i 4 %b Example: %X i 4 = 0 x 0 (0) c i 4 = 0 x 3 (3) d i 4 = 0 x 7 (7) %a i 4 = 0 x 0 (0) (1 << c) i 4 = 0 x 8 (8, -8) %t i 4 = 0 x 0 (0) Source value: 0 x 0 (0) Target value: undef
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } }
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } int f(int x, int y) { return (x / y) * y; } Compile to LLVM IR define i 32 @f(i 32 %x, i 32 %y) { %1 = sdiv i 32 %x, %y %2 = mul i 32 %1, %y ret i 32 %2 } Optimize define i 32 @f(i 32 %x, i 32 %y) { %1 = srem i 32 %x, %y %2 = sub i 32 %x, %1 ret i 32 %2 }
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } define i 32 @f(i 32 %x, i 32 %y) { %1 = sdiv i 32 %x, %y %2 = mul i 32 %1, %y ret i 32 %2 } => Optimize define i 32 @f(i 32 %x, i 32 %y) { %1 = srem i 32 %x, %y define i 32 @f(i 32 %x, i 32 %y) { %2 = sub i 32 %x, %1 = srem i 32 %x, %y ret i 32 %2 = sub i 32 %x, %1 } ret i 32 %2 }
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } %1 = sdiv i 32 %x, %y %2 = mul i 32 %1, %y => %t = srem i 32 %x, %y %2 = sub i 32 %x, %t
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } %1 = sdiv i 32 %x, %y %2 = mul i 32 %1, %y => %t = srem i 32 %x, %y %2 = sub i 32 %x, %t
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } %1 = sdiv %x, %y %2 = mul %1, %y => %t = srem %x, %y %2 = sub %x, %t
{ Value *Op 1 C = Op 1; Binary. Operator *BO = dyn_cast<Binary. Operator>(Op 0); if (!BO || (BO->get. Opcode() != Instruction: : UDiv && BO->get. Opcode() != Instruction: : SDiv)) { Op 1 C = Op 0; BO = dyn_cast<Binary. Operator>(Op 1); } Value *Neg = dyn_cast. Neg. Val(Op 1 C); if (BO && BO->has. One. Use() && (BO->get. Operand(1) == Op 1 C || BO->get. Operand(1) == Neg) && (BO->get. Opcode() == Instruction: : UDiv || BO->get. Opcode() == Instruction: : SDiv)) { Value *Op 0 BO = BO->get. Operand(0), *Op 1 BO = BO->get. Operand(1); // If the division is exact, X % Y is zero, so we end up with X or -X. if (Possibly. Exact. Operator *SDiv = dyn_cast<Possibly. Exact. Operator>(BO)) if (SDiv->is. Exact()) { if (Op 1 BO == Op 1 C) return Replace. Inst. Uses. With(I, Op 0 BO); return Binary. Operator: : Create. Neg(Op 0 BO); } Value *Rem; if (BO->get. Opcode() == Instruction: : UDiv) Rem = Builder->Create. URem(Op 0 BO, Op 1 BO); else Rem = Builder->Create. SRem(Op 0 BO, Op 1 BO); Rem->take. Name(BO); if (Op 1 BO == Op 1 C) return Binary. Operator: : Create. Sub(Op 0 BO, Rem); return Binary. Operator: : Create. Sub(Rem, Op 0 BO); } } Name: sdiv general %1 = sdiv %x, %y %2 = mul %1, %y => %t = srem %x, %y %2 = sub %x, %t Name: sdiv exact %1 = sdiv exact %x, %y %2 = mul %1, %y => %2 = %x
Precondition Pre: C 2 % (1<<C 1) == 0 %s = shl nsw %X, C 1 %r = sdiv %s, C 2 => %r = sdiv %X, C 2/(1<<C 1) Source template Target template Predicates in preconditions may be the result of a dataflow analysis.
Pre: C 2 % (1<<C 1) == 0 %s = shl nsw %X, C 1 %r = sdiv %s, C 2 => %r = sdiv %X, C 2/(1<<C 1) Generalized from LLVM IR: • Symbolic constants • Implicit types Constants
Typing Constraints Transformation Alive Refinement Constraints C++
LLVM has 3 types of UB: • Poison values • Undef values • True UB
Pre: is. Power. Of 2(C 1 ^ C 2) %x = add %A, C 1 %i = icmp ult %x, C 3 %y = add %A, C 2 %j = icmp ult %y, C 3 %r = or %i, %j => %and = and %A, ~(C 1 ^ C 2) %lhs = add %and, umax(C 1, C 2) %r = icmp ult %lhs, C 3 ERROR: Mismatch in values of %r Example: %A i 4 = 0 x 0 (0) C 1 i 4 = 0 x. A (10, -6) C 3 i 4 = 0 x 5 (5) C 2 i 4 = 0 x 2 (2) %x i 4 = 0 x. A (10, -6) %i i 1 = 0 x 0 (0) %y i 4 = 0 x 2 (2) %j i 1 = 0 x 1 (1, -1) %and i 4 = 0 x 0 (0) %lhs i 4 = 0 x. A (10, -6) Source value: 0 x 1 (1, -1) Target value: 0 x 0 (0)
Pre: C 1 u> C 3 && C 2 u> C 3 && is. Power. Of 2(C 1 ^ C 2) && is. Power. Of 2(-C 1 ^ -C 2) && (-C 1 ^ -C 2) == ((C 3 -C 1) ^ (C 3 -C 2)) && abs(C 1 -C 2) u> C 3 %x = add %A, C 1 %i = icmp ult %x, C 3 %y = add %A, C 2 %j = icmp ult %y, C 3 %r = or %i, %j => %and = and %A, ~(C 1^C 2) %lhs = add %and, umax(C 1, C 2) %r = icmp ult %lhs, C 3
File # opts. # translated 67 49 2 165 131 0 Calls 80 - - Casts 77 - - Combining 63 - - Compares 245 - - Load. Store. Alloca 28 17 0 Mul. Div. Rem 65 44 6 PHI 12 - - Select 74 52 0 Shifts 43 41 0 Simplify. Demanded 75 - - Vector. Ops 34 - - 1, 028 334 8 Add. Sub And. Or. Xor Total # bugs 14% wrong!
Pre: C 1 % C 2 == 0 %m = mul nsw %X, C 1 %r = sdiv %m, C 2 => %r = mul nsw %X, C 1/C 2 States that the operation will not result in a signed overflow
Frontend Optimization 1 … utc. TV Optimization n OK / Bug Backend 100101010 010001011 1001101010111 001010110
Benchmark Lines of code Compile with /d 2 Verify Slowdown bzip 2 7 k 5 min 106 x gcc 754 k 8 hours 186 x gzip sqlite 3 Z 3 9 k 189 k 500 k 2 min 1 h 20 min 17 hours 70 x 234 x 32 x Note: 32 -bits, single-threaded compiler
- Slides: 45