I have troubles with my .NET web scraping software for http://mydataprovider.com/ service due to Memory Leak. How my app works: it checks 10000 proxy servers for LIVE status. Many proxies are broken so I have to filter them and to leave only active proxies (timeout response for live proxy is 3 seconds).
And I have to do it quickly (1 process starts ~80 threads).
I used WebClient class Firstly, but Timeout property does not effect right when I set it. I used HttpWebRequest Timeout, but it also did not help me with timeout.
I discovered at SO that I could use ThreadPool.RegisterWaitForSingleObject class for right Timeout processing (find below class HttpWebRequest_BeginGetResponse what I developed ) but it has troubles with memory leak and I did not find way how to fix it,
I tested in with .net 4.0 & 4.6.2 - behaviours are the same....
If any idea, help me, please.
Here is Code of class that is responsible for proxy activities:
using System;
using System.Net;
using System.IO;
using System.Text;
using System.Threading;
using System.Collections.Generic;
namespace ECommercePriceWebTaskManager
{
//read this http://stackoverflow.com/questions/1783031/c-sharp-asynchronous-operation
/*
BeginInvoke You tell the program what you need to be done (the delegate), what to call when it's done (callback), and what to do it with (state). You get back an IAsyncResult, which is the object that you need to give it back in order to receive your result. You can then do other stuff, or use the WaitHandle in the IAsyncResult to block until the operation's done.
Callback: When the asynchronous operation finishes, it will call this method, giving you the same IAsyncResult as before. At this point, you can retrieve your state object from it, or pass the IAsyncResult to EndInvoke.
EndInvoke: This function takes the IAsyncResult and finds the result of the operation. If it hasn't finished yet, it'll block until it does, which is why you usually call it inside the callback.
This is a pattern that's often used all over the framework, not just on function delegates. Things like database connections, sockets, etc. all often have Begin/End pairs.
*/
public class HttpWebRequest_BeginGetResponse_RequestState
{
public ManualResetEvent allDone = new ManualResetEvent(false);
public byte[] BufferRead;
public HttpWebRequest request;
public HttpWebResponse response;
public Stream responseStream;
public string Html;
public IAsyncResult ResponseIAsyncResult = null;
public IAsyncResult ReadIAsyncResult = null;
public List<Exception> Exceptions = new List<Exception>();
}
public class HttpWebRequest_BeginGetResponse
{
const int BUFFER_SIZE = 10240;
const int DefaultTimeout = 5 * 1000;
List<byte> _bytes = new List<byte>();
Encoding _encoding = Encoding.UTF8;
HttpWebRequest_BeginGetResponse_RequestState _requestState = new HttpWebRequest_BeginGetResponse_RequestState();
RegisteredWaitHandle RWH_GetResponse = null;
RegisteredWaitHandle RWH_Read = null;
public string Load(string url, WebProxy wp, Encoding en)
{
HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(url);
httpWebRequest.Proxy = wp;
string respUrl;
return Load(httpWebRequest, en, out respUrl);
}
public string Load(HttpWebRequest httpWebRequest, Encoding en, out string respUrl)
{
respUrl = "";
_encoding = en;
try
{
_requestState.request = httpWebRequest;
_requestState.ResponseIAsyncResult = (IAsyncResult)httpWebRequest.BeginGetResponse(new AsyncCallback(GetResponse), _requestState);
RWH_GetResponse = ThreadPool.RegisterWaitForSingleObject(_requestState.ResponseIAsyncResult.AsyncWaitHandle, new WaitOrTimerCallback(GetResponseTimeout), _requestState, DefaultTimeout, true);
_requestState.allDone.WaitOne();
if (_requestState.response != null)
{
if (_requestState.response.ResponseUri != null)
{
respUrl = _requestState.response.ResponseUri.AbsolutePath;
}
}
}
catch (Exception e)
{
AddException(e);
}
AbortAll();
if (_requestState.Exceptions.Count > 0)
{
throw new Exception("BeginGetResponse .... ");
//throw new AggregateException(_requestState.Exceptions);
}
return _requestState.Html;
}
private void GetResponseTimeout(object state, bool timedOut)
{
lock (this)
{
if (timedOut)
{
AbortAll();
AddException(new Exception("BeginGetResponse timeout (Internal)"));
_requestState.allDone.Set();
}
}
}
private void GetResponse(IAsyncResult asynchronousResult)
{
lock (this)
{
try
{
_requestState.response = (HttpWebResponse)_requestState.request.EndGetResponse(asynchronousResult);
if (_requestState.allDone.WaitOne(0, false))
{
AbortAll();
return;
}
_requestState.responseStream = _requestState.response.GetResponseStream();
_requestState.BufferRead = new byte[BUFFER_SIZE];
_requestState.ReadIAsyncResult = _requestState.responseStream.BeginRead(_requestState.BufferRead, 0, BUFFER_SIZE, new AsyncCallback(Read), _requestState);
RWH_Read = ThreadPool.RegisterWaitForSingleObject(_requestState.ReadIAsyncResult.AsyncWaitHandle, new WaitOrTimerCallback(ReadTimeout), _requestState, 1000, true);
return;
}
catch (Exception e)
{
AddException(e);
}
AbortAll();
_requestState.allDone.Set();
}
}
private void ReadTimeout(object state, bool timedOut)
{
lock (this)
{
if (timedOut)
{
AbortAll();
AddException(new Exception("ReadTimeoutCallback timeout (Internal)"));
_requestState.allDone.Set();
}
}
}
private void AbortAll()
{
try
{
if (_requestState.responseStream != null)
{
_requestState.responseStream.Close();
}
}
catch { }
try
{
if (_requestState.response != null)
{
_requestState.response.Close();
}
}
catch { }
try
{
if (_requestState.request != null)
{
_requestState.request.Abort();
}
}
catch { }
if (RWH_GetResponse != null)
RWH_GetResponse.Unregister(_requestState.ResponseIAsyncResult.AsyncWaitHandle);
if (RWH_Read != null)
RWH_Read.Unregister(_requestState.ReadIAsyncResult.AsyncWaitHandle);
}
void AddException(Exception ex)
{
_requestState.Exceptions.Add(ex);
}
private void Read(IAsyncResult asyncResult)
{
lock (this)
{
try
{
int read = _requestState.responseStream.EndRead(asyncResult);
if (_requestState.allDone.WaitOne(0, false))
{
AbortAll();
return;
}
if (read > 0)
{
for (var i = 0; i < read; i++)
{
_bytes.Add(_requestState.BufferRead[i]);
}
if (RWH_Read != null)
{
RWH_Read.Unregister(_requestState.ReadIAsyncResult.AsyncWaitHandle);
}
_requestState.ReadIAsyncResult = _requestState.responseStream.BeginRead(_requestState.BufferRead, 0, BUFFER_SIZE, new AsyncCallback(Read), _requestState);
RWH_Read = ThreadPool.RegisterWaitForSingleObject(_requestState.ReadIAsyncResult.AsyncWaitHandle, new WaitOrTimerCallback(ReadTimeout), _requestState, 1000, true);
return;
}
else
{
_requestState.Html = _encoding.GetString(_bytes.ToArray());
}
}
catch (Exception e)
{
AddException(e);
}
AbortAll();
_requestState.allDone.Set();
}
}
}
}
Sometimes I can get a strange exception, look at the next image, please:
How I use HttpWebRequest_BeginGetResponse class :
var hb = new HttpWebRequest_BeginGetResponse ();
hb.Load("http://your_url_here.com");
That code was called from ~80 threads in 1 process.

