1

I want to perform integral calculations using rectangles and trapezoids method using multiple threads to get faster results. Unfortunately, in my case, executing multi-threaded code turns out to be slower than standard sequential code. Using multiple threads is much slower than a single thread - after all, shouldn't it be the other way around? It feels like the more threads, the slower the code executes.

In addition, I noticed that the more threads, the less precise the result of the integral is. This is especially noticeable when calculating the integral using the trapezoid method.

Here's my code: https://dotnetfiddle.net/jEPURO

using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;

namespace ParallelProgramming.ConsoleApp
{
    class Program
    {
        public static string IntegrationMethod { get; set; }
        public static double IntervalBegin { get; set; }
        public static double IntervalEnd { get; set; }
        public static int NPrecisionValue { get; set; }
        public static bool IsParallel { get; set; }
        public static int ThreadValue { get; set; }
        public static Stopwatch Stopwatch { get; set; }
        public static double Result { get; set; }

        static void Main(string[] args)
        {
            Console.WriteLine("Function                                  | Elapsed Time     | Estimated Integral");
            Console.WriteLine("-----------------------------------------------------------------");

            IntervalBegin = 5;
            IntervalEnd = -2;
            NPrecisionValue = 100000000;

            //RectangularIntegration – Sequential
            NumericalIntegrationMethods integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.RectangularIntegration)} – Sequential | {Stopwatch.Elapsed} | {Result}");

            //RectangularIntegrationParallel - 1 thread
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.RectangularIntegrationParallel)} – 1 Thread | {Stopwatch.Elapsed} | {Result}");

            //RectangularIntegrationParallel - 2 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.RectangularIntegrationParallel)} – 2 Threads | {Stopwatch.Elapsed} | {Result}");

            //RectangularIntegrationParallel - 3 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.RectangularIntegrationParallel)} – 3 Threads | {Stopwatch.Elapsed} | {Result}");

            //RectangularIntegrationParallel - 4 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.RectangularIntegrationParallel)} – 4 Threads | {Stopwatch.Elapsed} | {Result}");

            //TrapezoidalIntegration - Sequential 
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.TrapezoidalIntegration)} – Sequential | {Stopwatch.Elapsed} | {Result}");

            //TrapezoidalIntegrationParallel – 1 Thread
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.TrapezoidalIntegrationParallel)} – 1 Thread | {Stopwatch.Elapsed} | {Result}");

            //TrapezoidalIntegrationParallel – 2 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.TrapezoidalIntegrationParallel)} – 2 Threads | {Stopwatch.Elapsed} | {Result}");
            
            //TrapezoidalIntegrationParallel – 3 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.TrapezoidalIntegrationParallel)} – 3 Threads | {Stopwatch.Elapsed} | {Result}");

            //TrapezoidalIntegrationParallel – 4 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
            Stopwatch.Stop();

            Console.WriteLine($"{nameof(integral.TrapezoidalIntegrationParallel)} – 4 Threads | {Stopwatch.Elapsed} | {Result}");

            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        }
    }
    public class NumericalIntegrationMethods
    {
        double Function(double x)
        {
            return x * x + 2 * x;
        }

        public double RectangularIntegration(double xp, double xk, int n)
        {
            double dx, integral = 0;
            dx = (xk - xp) / n;

            for (int i = 1; i <= n; i++)
            {
                integral += dx * Function(xp + i * dx);
                //Console.WriteLine("Sekwencyjnie - iteracja {0} wątek ID: {1}", i, Thread.CurrentThread.ManagedThreadId);
            }

            return integral;
        }

        public double TrapezoidalIntegration(double xp, double xk, int n)
        {
            double dx, integral = 0;
            dx = (xk - xp) / n;

            for (int i = 1; i <= n; i++)
            {
                integral += Function(xp + i * dx);
                //Console.WriteLine("Sekwencyjnie - iteracja {0} wątek ID: {1}", i, Thread.CurrentThread.ManagedThreadId);
            }

            integral += (Function(xp) + Function(xk)) / 2;
            integral *= dx;

            return integral;
        }

        public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
        {
            double dx, integral = 0;
            dx = (xk - xp) / n;

            Parallel.For(1, n + 1, new ParallelOptions { MaxDegreeOfParallelism = maxThreads }, i =>
            {
                integral += dx * Function(xp + i * dx);
                //Console.WriteLine("Równolegle - iteracja {0} wątek ID: {1}", i, Thread.CurrentThread.ManagedThreadId);
            });

            return integral;
        }

        public double TrapezoidalIntegrationParallel(double xp, double xk, int n, int maxThreads)
        {
            double dx, integral = 0;
            dx = (xk - xp) / n;

            Parallel.For(1, n + 1, new ParallelOptions { MaxDegreeOfParallelism = maxThreads }, i =>
            {
                integral += Function(xp + i * dx);
                //Console.WriteLine("Równolegle - iteracja {0} wątek ID: {1}", i, Thread.CurrentThread.ManagedThreadId);
            });

            integral += (Function(xp) + Function(xk)) / 2;
            integral *= dx;

            return integral;
        }
    }
}


And here's output:

Function                                  | Elapsed Time     | Estimated Integral
-----------------------------------------------------------------
RectangularIntegration – Sequential | 00:00:00.9284260 | -65.33333210831276
RectangularIntegrationParallel – 1 Thread | 00:00:01.7040507 | -65.33333210831276
RectangularIntegrationParallel – 2 Threads | 00:00:01.7191484 | -65.33333210831276
RectangularIntegrationParallel – 3 Threads | 00:00:01.6888398 | -57.73164823448317
RectangularIntegrationParallel – 4 Threads | 00:00:01.5530828 | -65.33333210831276
TrapezoidalIntegration – Sequential | 00:00:00.7278303 | -65.33333333332568
TrapezoidalIntegrationParallel – 1 Thread | 00:00:01.4265208 | -65.33333333332568
TrapezoidalIntegrationParallel – 2 Threads | 00:00:02.3009881 | -33.110522448239216
TrapezoidalIntegrationParallel – 3 Threads | 00:00:01.6062253 | -57.02137898750542
TrapezoidalIntegrationParallel – 4 Threads | 00:00:01.9967140 | -18.120285251376426

Why is this happening? What am I doing wrong? After all, the more threads used, the faster the results should be. 4 threads should be faster than 3, and 3 threads should be faster than 2 and so on. How can I get faster results using more threads?

18
  • 1
    Honestly, I wouldn't trust these numbers at all - there's no warmup for JIT and thread-pool growth. If you care about perf, benchmarkdotnet (freely available on NuGet) is highly recommended (it is pretty easy to get started with, there's examples on the GitHub page). I'd also be interested to see whether SIMD can be used here (via spans and vectors) Commented Mar 21, 2021 at 19:31
  • 1
    @Neil the C++ vs C# thing is not as obvious as that - for example large chunks of the CLR internals have recently been moved from C++ to C# and seen increases in performance. It is contextual, of course, but the current JIT and other capabilities: mean it isn't a simple race to bet on. Commented Mar 21, 2021 at 19:49
  • 2
    Unless Parallel.For works some magic that I'm not aware of, it looks like your parallel implementation might generate a TON of contention around the "integral" variable. Each loop is very simple and short, but they all compete read/writes of this shared variable. I would try to sum up everything in non-shared variables, and then sum those up after the individual iterations are done. Also, IF parallel for spawns up new threads (not sure if that's the case), that would incur a solid amount of overhead. And, as pointed out, make sure everything is warmed up etc. Commented Mar 21, 2021 at 20:03
  • 3
    ps: Is what you're doing even thread-safe? "+=" is not an atomic operation, unless there's some further synchronization magic going on behind the scenes, you might end up with invalid results Commented Mar 21, 2021 at 20:05
  • 3
    @Bogey there is no magic. The OP's parallel code is not thread-safe, and produces incorrect results. Commented Mar 21, 2021 at 20:13

1 Answer 1

5

Here is a way to parallelize the calculation in a way that allows each thread to work independently, with minimal interference from the other threads, using thread-local state (the accumulator argument). In general the less each thread has to step on each other's toes, the more efficient your parallel code becomes.

public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
{
    double dx, integral = 0;
    dx = (xk - xp) / n;
    var locker = new object();

    Parallel.ForEach(Partitioner.Create(0, n + 1), new ParallelOptions
    {
        MaxDegreeOfParallelism = maxThreads
    }, () => 0.0D, (range, state, accumulator) =>
    {
        for (int i = range.Item1; i < range.Item2; i++)
        {
            accumulator += dx * Function(xp + i * dx);
        }
        return accumulator;
    }, accumulator =>
    {
        lock (locker) { integral += accumulator; }
    });
    return integral;
}

The intention of using the Partitioner.Create method is to chunkify the workload. Instead of invoking the lambda for each tiny loop of the calculation, you can use the Partitioner to split the total range of the calculation into sub-ranges, and invoke the lambda once for each sub-range. Lambda invocations cannot be inlined by the compiler, so in general you would like to avoid calling a lambda millions of times per second.

The Parallel.ForEach overload that is used in this example has this signature:

public static ParallelLoopResult ForEach<TSource, TLocal>(
    Partitioner<TSource> source,
    ParallelOptions parallelOptions,
    Func<TLocal> localInit,
    Func<TSource, ParallelLoopState, TLocal, TLocal> body,
    Action<TLocal> localFinally);

An alternative to the Parallel class is to use PLINQ. In general PLINQ produces more concise and easier to understand code, usually at the cost of some additional overhead.

public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
{
    double dx = (xk - xp) / n;

    return Partitioner.Create(0, n + 1)
        .AsParallel()
        .WithDegreeOfParallelism(maxThreads)
        .Select(range =>
        {
            double integral = 0.0;
            for (int i = range.Item1; i < range.Item2; i++)
            {
                integral += dx * Function(xp + i * dx);
            }
            return integral;
        })
        .Sum();
}
Sign up to request clarification or add additional context in comments.

1 Comment

Greatly explained and I do see significant performance improve in parallel code execution. Thanks!

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.