2

Have a look at this snippet:

public class StringToggler
{
    static readonly bool[] ToggleableLatinChars = new[]
    {
        // 256 bools here
    };

    readonly bool[] LocalToggleableLatinChars = ToggleableLatinChars;

    public string Toggle(string s)
    {
        // blah blah

        if (LocalToggleableLatinChars[(byte) ch])
        {
            // blah blah
        }

        // blah blah
    }

    // blah blah
}

This code is demonstrably quicker (7% ish) in testing than if I used ToggleableLatinChars directly. (Using a local reference to ToggleableLatinChars in the method is also quicker by the same amount).

This effect is noticed only when compiling for .NET 4. When compiling for .NET 3.5, I see the opposite effect - using the static array is noticeably quicker. (My machine is an Intel i5 running Windows 7 64-bit and is compiling for x86)

Any idea why?

Update: Here is a complete code sample which is more akin to Marc's testing sample. Note I am now using static and local variable versions (not member variable any more). Although the difference I see with this is less than I was seeing with my original test code, when compiled for .NET 4, the local version is always faster. You can swap the running order around but Local always wins for me. (Compiling for .NET 3.5 does not do this: it is much faster overall than .NET 4 and static is either faster or the same)

using System;
using System.Diagnostics;
using System.Globalization;

internal class Program
{
    const int RepeatCount = 500000;

    const string TestString1_Unicode =          @"?=3.1415926?!! ?a??!#!%# ÜBERGRößEN!!?????? ??????@!e=2.71828182?#!!$@\^i^/!@$";
    const string TestString2_Numbers =          @"p=3.14159265358979323846264338327950288419716939937510....!!!!";
    const string TestString3_LowerCase =        @"nevr un-den-erstimate ze pauer of stoopid piplz in larg grupp!\*^*/";
    const string TestString4_UpperCase =        @"DUDE, WHY U R HERE?? U SHOULDA BE IN THE MEETING (BLAH-BLAH) $\*o*/$!";

    static void Main()
    {

        RunTestsStaticAccess();
        RunTestsLocalAccess();

        Console.ReadLine();
    }

    public static void RunTestsLocalAccess()
    {
        StringToggler st = new StringToggler();

        var watch = Stopwatch.StartNew();
        for (int i = 0; i < RepeatCount; i++)
        {
            st.ToggleCase_LocalAccess(TestString1_Unicode);
            st.ToggleCase_LocalAccess(TestString2_Numbers);
            st.ToggleCase_LocalAccess(TestString3_LowerCase);
            st.ToggleCase_LocalAccess(TestString4_UpperCase);
        }
        watch.Stop();
        Console.WriteLine("{0}: {1}ms", "RunTestsLocalAccess", watch.ElapsedMilliseconds);
    }

    public static void RunTestsStaticAccess()
    {
        StringToggler st = new StringToggler();

        var watch = Stopwatch.StartNew();
        for (int i = 0; i < RepeatCount; i++)
        {
            st.ToggleCase_StaticAccess(TestString1_Unicode);
            st.ToggleCase_StaticAccess(TestString2_Numbers);
            st.ToggleCase_StaticAccess(TestString3_LowerCase);
            st.ToggleCase_StaticAccess(TestString4_UpperCase);
        }
        watch.Stop();
        Console.WriteLine("{0}: {1}ms", "RunTestsStaticAccess", watch.ElapsedMilliseconds);
    }

    public class StringToggler
    {
        static readonly bool[] ToggleableLatinChars = new[]
        {
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,
             true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true, false, false, false, false, false,
            false,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,
             true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
             true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,
             true,  true,  true,  true,  true,  true,  true, false,  true,  true,  true,  true,  true,  true,  true, false,
             true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,  true,
             true,  true,  true,  true,  true,  true,  true, false,  true,  true,  true,  true,  true,  true,  true, false
        };

        readonly TextInfo textInfo;

        public StringToggler()
        {
            textInfo = CultureInfo.CurrentCulture.TextInfo;
        }

        public StringToggler(CultureInfo cultureInfo)
        {
            textInfo = cultureInfo.TextInfo;
        }

        public unsafe string ToggleCase_StaticAccess(string s)
        {
            s = string.Copy(s);

            fixed(char* p = s)
            {
                for (int i = 0; i < s.Length; i++)
                {
                    char ch = p[i];

                    if (ch <= 0xff)
                    {
                        if (ToggleableLatinChars[(byte) ch])
                        {
                            p[i] = (char) (ch ^ 0x20);
                        }
                    }
                    else
                    {
                        switch (CharUnicodeInfo.GetUnicodeCategory(ch))
                        {
                            case UnicodeCategory.UppercaseLetter:
                                p[i] = textInfo.ToLower(ch);
                                break;

                            case UnicodeCategory.LowercaseLetter:
                                p[i] = textInfo.ToUpper(ch);
                                break;
                        }
                    }
                }
            }

            return s;
        }

        public unsafe string ToggleCase_LocalAccess(string s)
        {
            s = string.Copy(s);

            var toggleableLatinChars = ToggleableLatinChars;

            fixed(char* p = s)
            {
                for (int i = 0; i < s.Length; i++)
                {
                    char ch = p[i];

                    if (ch <= 0xff)
                    {
                        if (toggleableLatinChars[(byte) ch])
                        {
                            p[i] = (char) (ch ^ 0x20);
                        }
                    }
                    else
                    {
                        switch (CharUnicodeInfo.GetUnicodeCategory(ch))
                        {
                            case UnicodeCategory.UppercaseLetter:
                                p[i] = textInfo.ToLower(ch);
                                break;

                            case UnicodeCategory.LowercaseLetter:
                                p[i] = textInfo.ToUpper(ch);
                                break;
                        }
                    }
                }
            }

            return s;
        }
    }
}
16
  • 2
    Can you show how you measured this? Commented Jun 12, 2011 at 8:15
  • btw, a char is 16-bits; and an array indexer wants 32 bits. A (int)ch is more appropriate here. Commented Jun 12, 2011 at 8:16
  • Can you include the method you used to test? Commented Jun 12, 2011 at 8:29
  • a). Are you measuring a sufficient number of iterations to be sure tha any cache-warming effects are eliminated? Run tests for extended persiods. b). can you disassemble the code and have a look? Could be that somehow different pointer sizes are being used in the two cases? Commented Jun 12, 2011 at 8:29
  • 1
    @Simon you need 256 bools. An effective way to store that would be to use the bits of 4 x 64-bit integers. Then bool index 17, say, is ((long0 >> 17)&1)!=0. These 4 longs could be direct fields rather than a de-reference to an array Commented Jun 12, 2011 at 17:25

1 Answer 1

6

Simply: it isn't. I don't trust your (not provided) test:

My results:

InstanceField: 6035ms
LocalVariable: 5373ms
StaticFieldStaticInitializer: 5364ms
StaticFieldNoInitializer: 5388ms

which ties in to what I would expect from additional ldarg0 and ldfld (to get the value from an instance field) rather than the simpler ldsfld (to get the value from a static field) or ldloc0 (to get the value from a local variable).

My code:

class Program
{
    static void Main()
    {
        new InstanceField().RunTests();
        new LocalVariable().RunTests();
        new StaticFieldStaticInitializer().RunTests();
        new StaticFieldNoInitializer().RunTests();
        Console.ReadLine();
    }
    class InstanceField
    {
        public bool[] arr= new bool[1024];
        public void RunTests()
        {
            var watch = Stopwatch.StartNew();
            int count = 0;
            for (int i = 0; i < 500000; i++)
            {
                for (int j = 0; j < arr.Length; j++)
                {
                    if (arr[j]) count++;
                }
            }
            watch.Stop();
            Console.WriteLine("{0}: {1}ms", GetType().Name, watch.ElapsedMilliseconds);
        }
    }
    class LocalVariable
    {
        public void RunTests()
        {
            bool[] arr = new bool[1024];
            var watch = Stopwatch.StartNew();
            int count = 0;
            for (int i = 0; i < 500000; i++)
            {
                for (int j = 0; j < arr.Length; j++)
                {
                    if (arr[j]) count++;
                }
            }
            watch.Stop();
            Console.WriteLine("{0}: {1}ms", GetType().Name, watch.ElapsedMilliseconds);
        }
    }
    class StaticFieldStaticInitializer
    {
        public static bool[] arr = new bool[1024];
        public void RunTests()
        {
            var watch = Stopwatch.StartNew();
            int count = 0;
            for (int i = 0; i < 500000; i++)
            {
                for (int j = 0; j < arr.Length; j++)
                {
                    if (arr[j]) count++;
                }
            }
            watch.Stop();
            Console.WriteLine("{0}: {1}ms", GetType().Name, watch.ElapsedMilliseconds);
        }
    }
    class StaticFieldNoInitializer
    {
        public static bool[] arr;
        public void RunTests()
        {                
            arr = new bool[1024];
            var watch = Stopwatch.StartNew();
            int count = 0;
            for (int i = 0; i < 500000; i++)
            {
                for (int j = 0; j < arr.Length; j++)
                {
                    if (arr[j]) count++;
                }
            }
            watch.Stop();
            Console.WriteLine("{0}: {1}ms", GetType().Name, watch.ElapsedMilliseconds);
        }
    }
}
Sign up to request clarification or add additional context in comments.

5 Comments

Which in assembly is "mov ecx, dword ptr ds:[xxxx]" to get the static array and "mov eax, dword ptr [ebp-3Ch]" plus "mov ecx, dword ptr [eax+4]" to get the instance array...
I have just changed the question to clarify the .NET version I was compiling for (.NET 4). When I repeat the test for 3.5 I get the opposite effect. At first try, I can't reproduce the effect using your code (under either .NET version) but I will try to find out what the difference is.
I think your code is not near enough to my code to show the effect. Your tests are basically just iterating over the array length which I think is being optimized since it is a recognizable pattern. My code is using each char in the input string as an index into the array. Also your test cases do not replicate the important point: I always have a pre-created static array but accessing it directly proves slower than having a local var (or member variable originally) pointing to the same array (for .NET 4).When I change your code to pass in a string and use it static is ~6% slower.
@Simon which is exactly why it is hard to talk about performance unless you post an example in the first place
Fair enough :-). I did think there was just enough information in the original snippet about the static and the 'extra' reference to it but I do find I misjudge the information to provide. Complete example now provided.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.