No idea about multi-core parallelization. But for simple loops, it might modify little. For the following example, how to make simple loops with multi-core computation in VC++?
#include <iostream>
#include <vector>
#include <ctime>
using namespace std;
void foo(int n, double* a, double* b, double *c, double*d, double* e, double* f, double* g)
{
for (int i = 0; i < n; ++i)
{
a[i] = b[i] * a[i] + c[i] * (d[i] + e[i] + f[i] + g[i]);
}
}
int main()
{
int m = 1001001;
vector<double> a(m), b(m), c(m), d(m), f(m);
std::clock_t startcputime = std::clock();
for (int i = 0; i < 1000; ++i)
foo(1000000, &a[0], &b[0], &c[0], &d[0], &d[1], &f[0], &f[1000]);
double cpu_duration = (std::clock() - startcputime) / (double)CLOCKS_PER_SEC;
std::cout << "Finished in " << cpu_duration << " seconds [CPU Clock] " << std::endl;
}