Premature optimization is the root of all evil

$일반적인 방법 1. 알고리즘 개선 void sort_bubble(std: : vector<int> &data) { for(int i=data. size()$

일반적인 방법 2. Parallel Computing(Multi Thread : CPU) for (int i=0; i<30; ++i) func(args.

일반적인 방법 3. Trade with Memory for (int i = 0; i < calc_size;

하드웨어(CPU) 동작 이해하기 1. Well cache hit ullong array[1000][1000] = {. . . };

하드웨어(CPU) 동작 이해하기 2. Predictable condition statement std: : vector<int> data; for(int i=0; i<10000000;

그외 1. Vectorization int int int A[64] B[64] C[64] D[64] E[64] = = =

$그외 2. Tail Recursion _size sum_no_tail(_size n){ if(n == 0) return n; return sum_no_tail(n-1)$

그외 for(int i=0; i<10000; ++i){ switch(value){ case 3: sum += 7; break; case 7:

그외 4. Function Parameter count 5. Pass as reference / pointer 6. Function inlining

Slides: 12

Download presentation

서론 Premature optimization is the root of all evil ◦ “Programmers waste enormous amounts of time thinking about, or worrying about, the speed of noncritical parts of their programs, and these attempts at efficiency actually have a strong negative impact when debugging and maintenance are considered. ” Know exactly when and what to optimize. ◦ Always instrument the system to discover its actual impact on the system. 2

$일반적인 방법 1. 알고리즘 개선 void sort_bubble(std: : vector<int> &data) { for(int i=data. size()$

일반적인 방법 1. 알고리즘 개선 void sort_bubble(std: : vector<int> &data) { for(int i=data. size() - 1; i>0; --i) for(int j=0; j<i; ++j) if(data[j] > data[j + 1]){ int t = data[j]; data[j] = data[j + 1]; data[j + 1] = t; } } void sort_quick(std: : vector<T> &data, int start, int end) { if(start >= end) return; int pivot = start, i = start + 1, j = end; int temp, d_pivot; while (i <= j) { d_pivot = data[pivot]; while (i <= end && data[i] <= d_pivot) i++; while (j > start && data[j] >= d_pivot) j--; if (i > j) { temp = data[j]; data[j] = data[pivot]; data[pivot] = temp; } else { temp = data[j]; data[j] = data[i]; data[i] = temp; } } sort_quick(data, start, j-1); sort_quick(data, j+1, end); } 4

일반적인 방법 2. Parallel Computing(Multi Thread : CPU) for (int i=0; i<30; ++i) func(args. . . ); std: : vector<std: : thread> threads; threads. reserve(30); for (int i=0; i<30; ++i) threads. emplace_back(func, args. . . ); for (auto &th : threads) if(th. joinable()) th. join(); threads. clear(); 5

일반적인 방법 3. Trade with Memory for (int i = 0; i < calc_size; ++i) sum += std: : cos((double) i * 0. 001); const double LUT_COSINE_360[360] = { 1. 00000000, 0. 999847695156391, 0. 999390827019096, . . . } double rad 2 deg(double rad){ return rad * 57. 295779513082320; } for (int i = 0; i < calc_size; ++i) sum += LUT_COSINE_360[ (int)(std: : round(rad 2 deg(i*0. 001)) %360]; 6

하드웨어(CPU) 동작 이해하기 1. Well cache hit ullong array[1000][1000] = {. . . }; ullong sum=0; for(int r=0; r<1000; ++r){ for(int c=0; c<1000; ++c){ sum += array[c][r]; } } for(int r=0; r<1000; ++r){ for(int c=0; c<1000; ++c){ sum += array[r][c]; } } return sum; 7

하드웨어(CPU) 동작 이해하기 2. Predictable condition statement std: : vector<int> data; for(int i=0; i<10000000; ++i) data. push_back(random()); sort_quick(sorted. Vector, 0, 10000000 - 1); ullong sum=0; for(const auto &v : sorted. Vector){ if(v > x) sum += v; } return sum; 8

그외 1. Vectorization int int int A[64] B[64] C[64] D[64] E[64] = = = {201, {489, {. . . 112 , . . . }; 973 , . . . }; }; llong sum=0; for(int i=0; i<64; ++i) sum += A[i]+B[i]+C[i]+D[i]+E[i]; int int int A[64] B[64] C[64] D[64] E[64] = = = {201, {489, {. . . 112 , . . . }; 973 , . . . }; }; int SUM[64] = {0}; llong sum=0; /** Compile With Vectorize flag */ for(int i=0; i<64; ++i) SUM[i] = A[i]+B[i]+C[i]+D[i]+E[i]; for(int i=0; i<64; ++i) sum += SUM[i]; 9

$그외 2. Tail Recursion _size sum_no_tail(_size n){ if(n == 0) return n; return sum_no_tail(n-1)$

그외 2. Tail Recursion _size sum_no_tail(_size n){ if(n == 0) return n; return sum_no_tail(n-1) + n*n; } _size sum_tail(_size n, _size sum=0){ if(n == 0) return sum; sum += n*n; return sum_tail(--n, sum); } 10

그외 for(int i=0; i<10000; ++i){ switch(value){ case 3: sum += 7; break; case 7: sum += 9; break; case 13: sum += 13; break; default: sum += 1; } } switch(value){ case 3: for(int i=0; i<10000; sum += 7; break; case 7: for(int i=0; i<10000; sum += 9; break; case 13: for(int i=0; i<10000; sum += 13; break; default: for(int i=0; i<10000; sum += 1; } ++i) 11

그외 4. Function Parameter count 5. Pass as reference / pointer 6. Function inlining 7. Mod / Multiplication optimize 12