I have the following fragment of code. It contains 3 sections where I measure memory access runtime. First is plain iteration over the array. The second is almost the same with the exception that the array address received from the function call. The third is the same as the second but manually optimized.
#include <map>
#include <cstdlib>
#include <chrono>
#include <iostream>
std::map<void*, void*> cache;
constexpr int elems = 1000000;
double x[elems] = {};
template <typename T>
T& find_in_cache(T& var) {
void* key = &var;
void* value = nullptr;
if (cache.count(key)) {
value = cache[key];
} else {
value = malloc(sizeof(T));
cache[key] = value;
}
return *(T*)value;
}
int main() {
std::chrono::duration<double> elapsed_seconds1, elapsed_seconds2, elapsed_seconds3;
for (int k = 0; k < 100; k++) { // account for cache effects
// first section
auto start = std::chrono::steady_clock::now();
for (int i = 1; i < elems; i++) {
x[i] = (x[i-1] + 1.0) * 1.001;
}
auto end = std::chrono::steady_clock::now();
elapsed_seconds1 = end-start;
// second section
start = std::chrono::steady_clock::now();
for (int i = 1; i < elems; i++) {
find_in_cache(x)[i] = (find_in_cache(x)[i-1] + 1.0) * 1.001;
}
end = std::chrono::steady_clock::now();
elapsed_seconds2 = end-start;
// third section
start = std::chrono::steady_clock::now();
double* y = find_in_cache(x);
for (int i = 1; i < elems; i++) {
y[i] = (y[i-1] + 1.0) * 1.001;
}
end = std::chrono::steady_clock::now();
elapsed_seconds3 = end-start;
}
std::cout << "elapsed time 1: " << elapsed_seconds1.count() << "s\n";
std::cout << "elapsed time 2: " << elapsed_seconds2.count() << "s\n";
std::cout << "elapsed time 3: " << elapsed_seconds3.count() << "s\n";
return x[elems - 1]; // prevent optimizing away
}
The timings of these sections are following:
elapsed time 1: 0.0018678s
elapsed time 2: 0.00423903s
elapsed time 3: 0.00189678s
Is it possible to change the interface of find_in_cache() without changing the body of the second iteration section to make its performance the same as section 3?
g++ test.cpp -O3 && ./a.out[[gnu::pure]].