0

I have written an console app that gets top 50 records from EMp_Raw (contains empname, empdetaillink,inseteddate,parsedstatus) and should go to each link of employee from client website and crawl thro' html and get it's details and save it to our DB.I run this job using task scheduler every minute. I was able to successfully do that, but manager wants it to be multi threaded to make it faster. Below is what I tried and I am inserting the last record 50 times and not 50 unique records.

namespace Emp_Detail
{
    class detEmp 
    {
        private string empname;
        private string empdetlink;
        public detEmp()
        {
        }

        public detEmp(String empname, String empdetlink)
        {

            this.empname = empname;
            this.empdetlink = empdetlink;
        }
        public string getempname()
        {
            return empname;
        }

        public void setempname(string empname)
        {
            this.empname = empname;
        }

        public string getempdetlink()
        {
            return empdetlink;
        }

        public void setempdetlink(string empdetlink)
        {
            this.empdetlink = empdetlink;
        }
    }
    class Program
    {
       static void Main(string[] args)
        {
            detEmp detail = new detEmp() { };
            List<detEmp> d = new List<detEmp>();
            Logger.info("Start Emp details at: "+ DateTime.Now);
            try
            {
                using (SqlConnection connection = new SqlConnection(GetConnectionString()))
                {
                    connection.Open();
                    using (SqlCommand sqlCommandReader = connection.CreateCommand())
                    {
                        sqlCommandReader.CommandText = @"SELECT top 50 empname, empDetailLink, ParsedStatus,InsertedDate  FROM [dbo].[Emp_Raw] (nolock) 
   Where ParsedStatus = 0 and InsertedDate between '2014-07-23 08:30:30.000' and '2014-07-23 08:35:30.000'
                                                     order by InsertedDate";
                        SqlDataReader sqlDataReader = sqlCommandReader.ExecuteReader();
                        if (sqlDataReader.HasRows)//completed
                        {
                            int count = 0;
                            while (sqlDataReader.Read())
                            {
   string DetailLink = sqlDataReader["EmpDetailLink"] == null ? string.Empty : sqlDataReader["EmpDetailLink"].ToString();
                                string empname = sqlDataReader["empname"] == null ? string.Empty : sqlDataReader["empname"].ToString();

                                if (!string.IsNullOrEmpty(empname))
                                {
                                   detail.setempname(empname);
                                   detail.setempdetlink(DetailLink);
                                    d.Add(detail);              
                                }                                         
                           }
                        }
                      }
                }
               //when I print DetailLink using for loop I can see only last record inserted 50 times--not sure why      

                var options = new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount * 1 }; //Number of cores you have, limits the thread count
                Parallel.ForEach(d, options, i =>
                {
                //THis is a function that pulls out emp info from client website and puts into our db using htmlagilitypack--tested thoroughly works fine
                    InsertDetails(i.getempname(), i.getempdetlink());
                    Thread.Sleep(50);
                });

                using (SqlConnection connection = new SqlConnection(GetConnectionString()))
                {
                    connection.Open();
                    using (SqlCommand sqlCommandReader = connection.CreateCommand())
                    {
                        sqlCommandReader.CommandText = ";WITH CTE AS (SELECT TOP 50 * FROM [dbo].[Emp_Raw] (nolock) where parsedstatus=0  and InsertedDate between '2014-07-23 08:30:30.000' and '2014-07-23 08:35:30.000' ORDER BY InsertedDate) UPDATE CTE SET ParsedStatus=1";
                        sqlCommandReader.ExecuteNonQuery();

                    }

                }
            }
6
  • 2
    What Version of C# are you using? And BTW by convention class names, getters and setters and methods are upper case. Commented Jul 26, 2014 at 8:28
  • Can you show the original version that worked correctly, for comparison? Commented Jul 26, 2014 at 8:28
  • In the original code, i didn't have any threading code and if (!string.IsNullOrEmpty(empname)) { InsertDetails(empname,DetailLink); } Commented Jul 26, 2014 at 8:30
  • Iam using .Net 4.5 framework. Commented Jul 26, 2014 at 8:31
  • 1
    You want to make your code faster? Get rid of Thread.Sleep. Commented Jul 26, 2014 at 8:41

1 Answer 1

2

You're creating a single detail object;

detEmp detail = new detEmp() { };

...and then reassigning the contents in the SQL reader loop;

while (sqlDataReader.Read())
{
...
    if (!string.IsNullOrEmpty(empname))
    {
       detail.setempname(empname);
       detail.setempdetlink(DetailLink);
        d.Add(detail);              
    }                                         

In other words, you assign data to detail, add it to the collection, change the content of detail, add it again... The problem being that you add the same object 50 times to the list, in the end all containing the last row read.

What you could do is simply create a new detail object for each iteration;

if (!string.IsNullOrEmpty(empname))
{
   detEmp detail = new detEmp() { };
   detail.setempname(empname);
   detail.setempdetlink(DetailLink);
    d.Add(detail);              
}                                         

...which will give you 50 different objects in the list, each with their separate content.

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.