I have written an console app that gets top 50 records from EMp_Raw (contains empname, empdetaillink,inseteddate,parsedstatus) and should go to each link of employee from client website and crawl thro' html and get it's details and save it to our DB.I run this job using task scheduler every minute. I was able to successfully do that, but manager wants it to be multi threaded to make it faster. Below is what I tried and I am inserting the last record 50 times and not 50 unique records.
namespace Emp_Detail
{
class detEmp
{
private string empname;
private string empdetlink;
public detEmp()
{
}
public detEmp(String empname, String empdetlink)
{
this.empname = empname;
this.empdetlink = empdetlink;
}
public string getempname()
{
return empname;
}
public void setempname(string empname)
{
this.empname = empname;
}
public string getempdetlink()
{
return empdetlink;
}
public void setempdetlink(string empdetlink)
{
this.empdetlink = empdetlink;
}
}
class Program
{
static void Main(string[] args)
{
detEmp detail = new detEmp() { };
List<detEmp> d = new List<detEmp>();
Logger.info("Start Emp details at: "+ DateTime.Now);
try
{
using (SqlConnection connection = new SqlConnection(GetConnectionString()))
{
connection.Open();
using (SqlCommand sqlCommandReader = connection.CreateCommand())
{
sqlCommandReader.CommandText = @"SELECT top 50 empname, empDetailLink, ParsedStatus,InsertedDate FROM [dbo].[Emp_Raw] (nolock)
Where ParsedStatus = 0 and InsertedDate between '2014-07-23 08:30:30.000' and '2014-07-23 08:35:30.000'
order by InsertedDate";
SqlDataReader sqlDataReader = sqlCommandReader.ExecuteReader();
if (sqlDataReader.HasRows)//completed
{
int count = 0;
while (sqlDataReader.Read())
{
string DetailLink = sqlDataReader["EmpDetailLink"] == null ? string.Empty : sqlDataReader["EmpDetailLink"].ToString();
string empname = sqlDataReader["empname"] == null ? string.Empty : sqlDataReader["empname"].ToString();
if (!string.IsNullOrEmpty(empname))
{
detail.setempname(empname);
detail.setempdetlink(DetailLink);
d.Add(detail);
}
}
}
}
}
//when I print DetailLink using for loop I can see only last record inserted 50 times--not sure why
var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount * 1 }; //Number of cores you have, limits the thread count
Parallel.ForEach(d, options, i =>
{
//THis is a function that pulls out emp info from client website and puts into our db using htmlagilitypack--tested thoroughly works fine
InsertDetails(i.getempname(), i.getempdetlink());
Thread.Sleep(50);
});
using (SqlConnection connection = new SqlConnection(GetConnectionString()))
{
connection.Open();
using (SqlCommand sqlCommandReader = connection.CreateCommand())
{
sqlCommandReader.CommandText = ";WITH CTE AS (SELECT TOP 50 * FROM [dbo].[Emp_Raw] (nolock) where parsedstatus=0 and InsertedDate between '2014-07-23 08:30:30.000' and '2014-07-23 08:35:30.000' ORDER BY InsertedDate) UPDATE CTE SET ParsedStatus=1";
sqlCommandReader.ExecuteNonQuery();
}
}
}
Thread.Sleep.