I would like to "clean" a CSV file:
- deleting empty rows
- deleting empty columns
The rows or columns are not completely empty, they have, for example: "","","","","","","","","","","","","","", (in a row form) OR "","","","","","","","","","", (in a row form) OR
"",
"",
"",
"",
"",
"",
"",
(in a columns form)
These rows or columns can be anywhere in the CSV file.
What I have so far:
private void button1_Click(object sender, EventArgs e)
{
string sourceFile = @"XXXXX.xlsx";
string worksheetName = "Sample";
string targetFile = @"C:\Users\xxxx\xls_test\XXXX.csv";
// Creates the CSV file based on the XLS file
ExcelToCSVCoversion(sourceFile, worksheetName, targetFile);
// Manipulate the CSV: Clean empty rows
DeleteEmptyRoadFromCSV(targetFile);
}
static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
string targetFile)
{
string connectionString = @"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
+ @";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
OleDbConnection connection = null;
StreamWriter writer = null;
OleDbCommand command = null;
OleDbDataAdapter dataAdapter = null;
try
{
// Represents an open connection to a data source.
connection = new OleDbConnection(connectionString);
connection.Open();
// Represents a SQL statement or stored procedure to execute
// against a data source.
command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
connection);
// Specifies how a command string is interpreted.
command.CommandType = CommandType.Text;
// Implements a TextWriter for writing characters to the output stream
// in a particular encoding.
writer = new StreamWriter(targetFile);
// Represents a set of data commands and a database connection that are
// used to fill the DataSet and update the data source.
dataAdapter = new OleDbDataAdapter(command);
DataTable dataTable = new DataTable();
dataAdapter.Fill(dataTable);
for (int row = 0; row < dataTable.Rows.Count; row++)
{
string rowString = "";
for (int column = 0; column < dataTable.Columns.Count; column++)
{
rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
}
writer.WriteLine(rowString);
}
Console.WriteLine();
Console.WriteLine("The excel file " + sourceFile + " has been converted " +
"into " + targetFile + " (CSV format).");
Console.WriteLine();
}
catch (Exception exception)
{
Console.WriteLine(exception.ToString());
Console.ReadLine();
}
finally
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
}
connection.Dispose();
command.Dispose();
dataAdapter.Dispose();
writer.Close();
writer.Dispose();
}
}
static void DeleteEmptyRoadFromCSV(string fileName)
{
//string nonEmptyLines = @"XXXX.csv";
var nonEmptyLines = File.ReadAllLines(fileName)
.Where(x => !x.Split(',')
.Take(2)
.Any(cell => string.IsNullOrWhiteSpace(cell))
// use `All` if you want to ignore only if both columns are empty.
).ToList();
File.WriteAllLines(fileName, nonEmptyLines);
}
Finally, I tried to use the ideas from: Remove Blank rows from csv c# . But my ouput is not changing at all.
Any help is welcome!
Thank you.
File.ReadAllLinesis probably dangerous unless you know for sure you are dealing with small files.