I need to insert about 500k - 1 million data lines from a CSV file. The time of getting the reports list and insert to database by using SqlBulkCopy is very good (10k ~200ms). However, the time to check data validation and add elements to DataTable is very slow (10k data ~ 2 minute). I tried many ways but it didn't get any good results...
What should I do to improve my performance in this situation?
public ActionResult ConvertFile(HttpPostedFileBase postedFile, FormCollection formCollection)
{
Stopwatch st = new Stopwatch();
st.Start();
string message = "";
ConcurrentBag<ImportFailed> listFailed = new ConcurrentBag<ImportFailed>();
var semesterName = formCollection["SemesterName"];
var semester = SemestersDAO.GetSemesterByName(semesterName);
var campusName = formCollection["CampusName"];
var campus = CampusDAO.GetCampusByName(campusName);
try
{
if (postedFile != null)
{
string path = Server.MapPath("~/Uploads/");
if (!Directory.Exists(path))
{
Directory.CreateDirectory(path);
}
string filePath = path + Path.GetFileName(postedFile.FileName);
postedFile.SaveAs(filePath);
// Check Header of the file csv if it valid
var validateHeadersRs = ImportHelper.IsValidHeader(filePath, new CSVStudent().Headers);
if (validateHeadersRs != null)
{
message = $"Error: Missing headers: {validateHeadersRs}";
ViewBag.Error = message;
return View();
}
string conString = string.Empty;
conString = string.Format(conString, filePath);
conString = ConfigurationManager.ConnectionStrings["ConnectionString"].ConnectionString;
// Read CSV
var reports = ImportHelper.ReadData<CSVStudent>(filePath, modified: false).ToList();
st.Stop();
var timeToGetList = st.ElapsedMilliseconds;
st.Restart();
DataTable studentDt = new DataTable();
studentDt.Columns.AddRange(new DataColumn[5]
{
new DataColumn("Email", typeof(string)),
new DataColumn("Roll", typeof(string)),
new DataColumn("Full_Name", typeof(string)),
new DataColumn("Campus_ID", typeof(string)),
new DataColumn("Semester_ID", typeof(string))
});
DataTable subjectStudentDt = new DataTable();
subjectStudentDt.Columns.AddRange(new DataColumn[3]
{
new DataColumn("Subject_ID", typeof(string)),
new DataColumn("Roll", typeof(string)),
new DataColumn("Semester_ID", typeof(string))
});
int countLine = 1;
// ProcessData(reports, studentDt, subjectStudentDt, listFailed, semester, campus, countLine);
# region Parallel
Parallel.ForEach(reports, report =>
{
var (isValid, errorMessage) = validateEmail(report.Email);
if (!isValid)
{
listFailed.Add(new ImportFailed
{
Row = countLine.ToString(),
Error = errorMessage
});
}
else if (report.IsAnyEmpty())
{
listFailed.Add(new ImportFailed
{
Row = countLine.ToString(),
Error = "Data can't be empty!"
});
}
else if (SubjectsDAO.IsExistsSubjectStudent(report.Subject_ID, report.Roll,
semester.Semester_ID))
{
listFailed.Add(new ImportFailed
{
Row = countLine.ToString(),
Error =
$"Student {report.Roll} and subject {report.Subject_ID} is already exist"
});
}
else
{
lock (studentDt)
{
if (studentDt.AsEnumerable().All(row => report.Roll != row.Field<string>("Roll")))
{
DataRow studentRow = studentDt.NewRow();
studentRow["Email"] = report.Email;
studentRow["Roll"] = report.Roll;
studentRow["Full_Name"] = report.Full_Name;
studentRow["Campus_ID"] = campus.Campus_ID;
studentRow["Semester_ID"] = semester.Semester_ID;
studentDt.Rows.Add(studentRow);
}
}
lock (subjectStudentDt)
{
DataRow row2 = subjectStudentDt.NewRow();
row2["Subject_ID"] = report.Subject_ID;
row2["Roll"] = report.Roll;
row2["Semester_ID"] = semester.Semester_ID;
subjectStudentDt.Rows.Add(row2);
}
}
Interlocked.Increment(ref countLine);
});
#endregion
int subStuBefore = Subject_StudentDAO.CountSubject_Student();
int countBefore = StudentsDAO.CountNoStudent();
st.Stop();
var timeBeforeBulk = st.ElapsedMilliseconds;
st.Restart();
if (listFailed.Count <= 0)
{
using (var bulkCopy = new SqlBulkCopy(conString, SqlBulkCopyOptions.Default))
{
// Set options for SqlBulkCopy
bulkCopy.BatchSize = 10000;
bulkCopy.BulkCopyTimeout = 0;
bulkCopy.DestinationTableName = "Student";
foreach (DataColumn column in studentDt.Columns)
{
bulkCopy.ColumnMappings.Add(column.ColumnName, column.ColumnName);
}
bulkCopy.WriteToServer(studentDt);
bulkCopy.DestinationTableName = "Subject_Student";
foreach (DataColumn column in subjectStudentDt.Columns)
{
bulkCopy.ColumnMappings.Add(column.ColumnName, column.ColumnName);
}
bulkCopy.WriteToServer(subjectStudentDt);
}
int subStuAfter = Subject_StudentDAO.CountSubject_Student();
int countAfter = StudentsDAO.CountNoStudent();
int countAdd = countAfter - countBefore;
int countFail = studentDt.Rows.Count - countAdd;
int addSubStu = subStuAfter - subStuBefore;
st.Stop();
message = $"Import success {countAdd} student(s), fail {countFail} student(s) and success {addSubStu} subject student(s) in {timeToGetList} - {timeBeforeBulk} - {st.ElapsedMilliseconds} ms";
ViewBag.Message = message;
}
else
{
ViewBag.Error = "There's some error in your file. Please check errors below, correct your file and import again!";
}
}
}
catch (Exception e)
{
message = e.Message;
ViewBag.Error = message;
}
return View(listFailed);
}