0

I am trying to deserialize Json data from a web feed that is ~1.7 GB in size. I started with the following code:

public override void CreateNewOutputRows()
{

    //Set Webservice URL
    string wUrl = "webserviceURLgoeshere";

    try
    {

        RootObject outPutResponse = GetWebServiceResult(wUrl);

        foreach (Impression imp in outPutResponse.impressions)
        {

            ImpressionsSheetOutputBuffer.AddRow();
            ImpressionsSheetOutputBuffer.token = imp.token;
            ImpressionsSheetOutputBuffer.userid = imp.userid;
            ImpressionsSheetOutputBuffer.itemid = imp.itemid;
            ImpressionsSheetOutputBuffer.view = imp.view;
            ImpressionsSheetOutputBuffer.imageguid = imp.imageguid;
            ImpressionsSheetOutputBuffer.bytes = imp.bytes;
            ImpressionsSheetOutputBuffer.format = imp.format;

            ImpressionIDBuffer.AddRow();
            ImpressionIDBuffer.oid = imp.imId.oid;

            ImpressionParamsBuffer.AddRow();
            ImpressionParamsBuffer.origformat = imp.imParams.origFormat;
            ImpressionParamsBuffer.size = imp.imParams.size;

            ImpressionTimeBuffer.AddRow();
            ImpressionTimeBuffer.numLong = Int32.Parse(imp.imTime.numLong);
        }
    }

    catch (Exception e)
    {
        FailComponent(e.ToString());
    }
}

private RootObject GetWebServiceResult(string wUrl)
{

    HttpWebRequest httpWReq = (HttpWebRequest)WebRequest.Create(wUrl);
    HttpWebResponse httpWResp = (HttpWebResponse)httpWReq.GetResponse();
    RootObject jsonResponse = null;

    try
    {

        if (httpWResp.StatusCode == HttpStatusCode.OK)
        {

            Stream responseStream = httpWResp.GetResponseStream();
            string jsonString = null;

            using (StreamReader reader = new StreamReader(responseStream))
            {
                jsonString = reader.ReadToEnd();
                reader.Close();
            }

            JavaScriptSerializer sr = new JavaScriptSerializer();
            jsonResponse = sr.Deserialize<RootObject>(jsonString);

        }

        else
        {
            FailComponent(httpWResp.StatusCode.ToString());

        }
    }

    catch (Exception e)
    {
        FailComponent(e.ToString());
    }
    return jsonResponse;
}

private void FailComponent(string errorMsg)
{
    bool fail = false;
    IDTSComponentMetaData100 compMetadata = this.ComponentMetaData;
    compMetadata.FireError(1, "Error Getting Data From Webservice!", errorMsg, "", 0, out fail);

}

}

public class Id {

    public string oid { get; set; }
}

public class Params {

    public string origFormat { get; set; }
    public string size { get; set; }
}

public class Time {

    public string numLong { get; set; }
}

public class Impression {

    public Id imId { get; set; }
    public string token { get; set; }
    public string userid { get; set; }
    public string itemid { get; set; }
    public string view { get; set; }
    public string imageguid { get; set; }
    public int bytes { get; set; }
    public string format { get; set; }
    public Params imParams { get; set; }
    public Time imTime { get; set; }
}

public class RootObject {
    public List<Impression> impressions { get; set; }
}

However, the StreamReader ReadToEnd method is where the exception gets thrown, as the size of the data is too large.

I tried changing that code to the following:

Stream responseStream = httpWResp.GetResponseStream();

StreamReader reader = new StreamReader(responseStream);

using (var myjson = new JsonTextReader(reader))
{
    JsonSerializer myserialization = new JsonSerializer();
    return (List<RootObject>)myserialization.Deserialize(myjson, typeof(List<RootObject>));
}

This gives me an error that I cannot implicitly convert type List<RootObject> to RootObject. Does anyone see what I might be doing wrong that I can't make this conversion? I used this question to get around the OutOfMemory exception, but now it returns no deserialized items. Any advice would be much appreciated.

EDIT: Json data looks like the following:

{
"_id": {
    "$oid": "000000000000000000000000"
    },
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
    "originalFormat": "tif",
    "size": "50x50"
    },
"time": {
    "$numberLong": "1458748200000"
    }
}
{
"_id": {
    "$oid": "100000000000000000000000"
     },
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
    "originalFormat": "tif",
    "size": "50x50"
    },
"time": {
    "$numberLong": "1458748200000"
    }
}
12
  • 64bit app should work fine. Please make sure you are not running as x86. Commented Jul 22, 2016 at 20:10
  • 1
    ~1.7 GB it appears you're running out of memory. Solution? Don't do that. How? Better design. Like what? Tons of things. Give me one. Process the file incrementally, don't try to load it all into memory. How. Depends. That's why people hire developers :/ Commented Jul 22, 2016 at 20:11
  • 1
    Don't try to read the whole thing at once. The whole point of a StreamReader is that you can stream the data. You don't have to get the whole thing at once. Do ReadLine or something. Consider using async/await if you need to keep your application responsive while it gets the massive file. Commented Jul 22, 2016 at 20:18
  • 1
    Possible duplicate of Parsing large json file in .NET Commented Jul 24, 2016 at 21:47
  • 1
    Anthony, your example JSON is not a valid JSON because it cannot have multiple root objects. See here. Commented Jul 25, 2016 at 15:48

1 Answer 1

0

You should create some rule to how you separate each object, and serialize them seperately.

Basically you could append stream.ReadLine() 18 times (assuming all objects are written exactly like you posted)

If they aren't you should use stream.ReadLine() to count your open and closing curly brackets until you reach the end of each object and serialize them separately that way.

I am guessing there are better ways, but these are quite simple and should solve your problem...

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.