I am trying to deserialize Json data from a web feed that is ~1.7 GB in size. I started with the following code:
public override void CreateNewOutputRows()
{
//Set Webservice URL
string wUrl = "webserviceURLgoeshere";
try
{
RootObject outPutResponse = GetWebServiceResult(wUrl);
foreach (Impression imp in outPutResponse.impressions)
{
ImpressionsSheetOutputBuffer.AddRow();
ImpressionsSheetOutputBuffer.token = imp.token;
ImpressionsSheetOutputBuffer.userid = imp.userid;
ImpressionsSheetOutputBuffer.itemid = imp.itemid;
ImpressionsSheetOutputBuffer.view = imp.view;
ImpressionsSheetOutputBuffer.imageguid = imp.imageguid;
ImpressionsSheetOutputBuffer.bytes = imp.bytes;
ImpressionsSheetOutputBuffer.format = imp.format;
ImpressionIDBuffer.AddRow();
ImpressionIDBuffer.oid = imp.imId.oid;
ImpressionParamsBuffer.AddRow();
ImpressionParamsBuffer.origformat = imp.imParams.origFormat;
ImpressionParamsBuffer.size = imp.imParams.size;
ImpressionTimeBuffer.AddRow();
ImpressionTimeBuffer.numLong = Int32.Parse(imp.imTime.numLong);
}
}
catch (Exception e)
{
FailComponent(e.ToString());
}
}
private RootObject GetWebServiceResult(string wUrl)
{
HttpWebRequest httpWReq = (HttpWebRequest)WebRequest.Create(wUrl);
HttpWebResponse httpWResp = (HttpWebResponse)httpWReq.GetResponse();
RootObject jsonResponse = null;
try
{
if (httpWResp.StatusCode == HttpStatusCode.OK)
{
Stream responseStream = httpWResp.GetResponseStream();
string jsonString = null;
using (StreamReader reader = new StreamReader(responseStream))
{
jsonString = reader.ReadToEnd();
reader.Close();
}
JavaScriptSerializer sr = new JavaScriptSerializer();
jsonResponse = sr.Deserialize<RootObject>(jsonString);
}
else
{
FailComponent(httpWResp.StatusCode.ToString());
}
}
catch (Exception e)
{
FailComponent(e.ToString());
}
return jsonResponse;
}
private void FailComponent(string errorMsg)
{
bool fail = false;
IDTSComponentMetaData100 compMetadata = this.ComponentMetaData;
compMetadata.FireError(1, "Error Getting Data From Webservice!", errorMsg, "", 0, out fail);
}
}
public class Id {
public string oid { get; set; }
}
public class Params {
public string origFormat { get; set; }
public string size { get; set; }
}
public class Time {
public string numLong { get; set; }
}
public class Impression {
public Id imId { get; set; }
public string token { get; set; }
public string userid { get; set; }
public string itemid { get; set; }
public string view { get; set; }
public string imageguid { get; set; }
public int bytes { get; set; }
public string format { get; set; }
public Params imParams { get; set; }
public Time imTime { get; set; }
}
public class RootObject {
public List<Impression> impressions { get; set; }
}
However, the StreamReader ReadToEnd method is where the exception gets thrown, as the size of the data is too large.
I tried changing that code to the following:
Stream responseStream = httpWResp.GetResponseStream();
StreamReader reader = new StreamReader(responseStream);
using (var myjson = new JsonTextReader(reader))
{
JsonSerializer myserialization = new JsonSerializer();
return (List<RootObject>)myserialization.Deserialize(myjson, typeof(List<RootObject>));
}
This gives me an error that I cannot implicitly convert type List<RootObject> to RootObject. Does anyone see what I might be doing wrong that I can't make this conversion? I used this question to get around the OutOfMemory exception, but now it returns no deserialized items. Any advice would be much appreciated.
EDIT: Json data looks like the following:
{
"_id": {
"$oid": "000000000000000000000000"
},
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
"originalFormat": "tif",
"size": "50x50"
},
"time": {
"$numberLong": "1458748200000"
}
}
{
"_id": {
"$oid": "100000000000000000000000"
},
"token": "00000000-0000-0000-0000-000000000000",
"userId": "username",
"itemId": "00000000-0000-0000-0000-000000000000",
"view": "view1",
"imageguid": "00000000-0000-0000-0000-000000000000",
"bytes": 1000,
"format": "PNG",
"params": {
"originalFormat": "tif",
"size": "50x50"
},
"time": {
"$numberLong": "1458748200000"
}
}
~1.7 GBit appears you're running out of memory. Solution? Don't do that. How? Better design. Like what? Tons of things. Give me one. Process the file incrementally, don't try to load it all into memory. How. Depends. That's why people hire developers :/