Code development platform for open source projects from the European Union institutions

Skip to content
Snippets Groups Projects
Commit 9b12caef authored by Michael KRISPER's avatar Michael KRISPER
Browse files

correction of CSV File comment lines

parent 5058a7a0
No related branches found
No related tags found
No related merge requests found
......@@ -44,7 +44,7 @@ using TUGraz.VectoCommon.Utils;
namespace TUGraz.VectoCore.Utils
{
/// <summary>
/// Class for Reading and Writing VECTO CSV Files.
/// Class for Reading and Writing VECTO CSV Files.
/// </summary>
/// <remarks>
/// The following format applies to all CSV (Comma-separated values) Input Files used in VECTO:
......@@ -56,141 +56,139 @@ namespace TUGraz.VectoCore.Utils
/// max: id (name) [unit], id (name) [unit], ...
/// min: id,id,...
/// </remarks>
public class VectoCSVFile : LoggingObject
public static class VectoCSVFile
{
private static readonly Regex HeaderFilter = new Regex(@"\[.*?\]|\<|\>", RegexOptions.Compiled);
private const char Delimiter = ',';
private const char Comment = '#';
/// <summary>
/// Reads a CSV file which is stored in Vecto-CSV-Format.
/// Reads a CSV file which is stored in Vecto-CSV-Format.
/// </summary>
/// <param name="fileName"></param>
/// <param name="ignoreEmptyColumns"></param>
/// <param name="fullHeader"></param>
/// <exception cref="FileIOException"></exception>
/// <param name="fileName">the filename</param>
/// <param name="ignoreEmptyColumns">set true, if empty columns should be ignored. default: false.</param>
/// <param name="fullHeader">set true is column names should be preserved. Otherwise units are trimed away. default: false.</param>
/// <returns>A DataTable which represents the CSV File.</returns>
public static DataTable Read(string fileName, bool ignoreEmptyColumns = false, bool fullHeader = false)
{
try {
return ReadData(File.ReadAllLines(fileName, Encoding.UTF8), ignoreEmptyColumns, fullHeader);
using (var fs = new FileStream(fileName, FileMode.Open)) {
return ReadStream(fs, ignoreEmptyColumns, fullHeader);
}
} catch (Exception e) {
Logger<VectoCSVFile>().Error(e);
throw new VectoException("File {0}: {1}", fileName, e.Message);
LogManager.GetLogger(typeof(VectoCSVFile).FullName).Error(e);
throw new VectoException("Error File {0}: {1}", fileName, e.Message);
}
}
/// <summary>
/// Reads a CSV file which is stored in Vecto-CSV-Format.
/// Reads a CSV file which is stored in Vecto-CSV-Format.
/// </summary>
/// <param name="stream"></param>
/// <param name="ignoreEmptyColumns"></param>
/// <exception cref="FileIOException"></exception>
/// <param name="stream">the stream to read</param>
/// <param name="ignoreEmptyColumns">set true, if empty columns should be ignored. default: false.</param>
/// <param name="fullHeader">set true is column names should be preserved. Otherwise units are trimed away. default: false.</param>
/// <returns>A DataTable which represents the CSV File.</returns>
public static DataTable ReadStream(Stream stream, bool ignoreEmptyColumns = false)
public static DataTable ReadStream(Stream stream, bool ignoreEmptyColumns = false, bool fullHeader = false)
{
try {
return ReadData(ReadAllLines(stream), ignoreEmptyColumns);
return ReadData(ReadLines(stream), ignoreEmptyColumns, fullHeader);
} catch (Exception e) {
Logger<VectoCSVFile>().Error(e);
LogManager.GetLogger(typeof(VectoCSVFile).FullName).Error(e);
throw new VectoException("Failed to read stream: " + e.Message, e);
}
}
private static IEnumerable<string> ReadAllLines(Stream stream)
private static IEnumerable<string> ReadLines(Stream stream)
{
using (var reader = new StreamReader(stream)) {
string line;
while ((line = reader.ReadLine()) != null) {
yield return line;
}
using (var reader = new StreamReader(stream, Encoding.UTF8)) {
while (!reader.EndOfStream)
yield return reader.ReadLine();
}
}
private static DataTable ReadData(IEnumerable<string> data, bool ignoreEmptyColumns = false, bool fullHeader = false)
/// <summary>
///
/// </summary>
/// <param name="allLines"></param>
/// <param name="ignoreEmptyColumns"></param>
/// <param name="fullHeader"></param>
/// <returns></returns>
private static DataTable ReadData(IEnumerable<string> allLines, bool ignoreEmptyColumns = false,
bool fullHeader = false)
{
var linesEnumerable = RemoveComments(data);
var lines = linesEnumerable.GetEnumerator();
// trim, remove comments and filter empty lines
var lines = allLines
.Select(l => l.Trim())
.Select(l => l.Contains(Comment) ? l.Substring(0, l.IndexOf(Comment)) : l)
.Where(l => !string.IsNullOrWhiteSpace(l))
.GetEnumerator();
// start the enumerable
lines.MoveNext();
var validColumns = GetValidHeaderColumns(lines.Current, fullHeader).ToArray();
// add columns
var line = lines.Current;
if (!fullHeader) {
line = HeaderFilter.Replace(line, "");
}
double tmp;
var splittedColumns = line
.Split(Delimiter);
var columns = splittedColumns
.Select(col => col.Trim())
.Where(col => !double.TryParse(col, NumberStyles.Any, CultureInfo.InvariantCulture, out tmp))
.ToList();
if (validColumns.Length > 0) {
// Valid Columns found => header was valid => skip header line
if (columns.Count > 0) {
// first line was a valid header: advance to first data line
lines.MoveNext();
} else {
Logger<VectoCSVFile>().Warn("No valid Data Header found. Interpreting the first line as data line.");
LogManager.GetLogger(typeof(VectoCSVFile).FullName)
.Warn("No valid Data Header found. Interpreting the first line as data line.");
// set the validColumns to: {"0", "1", "2", "3", ...} for all columns in first line.
validColumns = GetColumns(lines.Current).Select((_, index) => index.ToString()).ToArray();
columns = splittedColumns.Select((_, index) => index.ToString()).ToList();
}
var table = new DataTable();
foreach (var col in validColumns) {
foreach (var col in columns) {
table.Columns.Add(col);
}
var i = 1;
// read data into table
var i = 0;
do {
var line = lines.Current;
i++;
line = lines.Current;
var cells = line.Split(Delimiter);
if (!ignoreEmptyColumns && cells.Length != table.Columns.Count) {
var cells = line.Split(Delimiter).Select(s => s.Trim()).ToArray();
if (cells.Length != table.Columns.Count && !ignoreEmptyColumns) {
throw new CSVReadException(
string.Format("Line {0}: The number of values is not correct. Expected {1} Columns, Got {2} Columns", i,
table.Columns.Count, cells.Length));
}
try {
// ReSharper disable once CoVariantArrayConversion
table.Rows.Add(cells);
} catch (InvalidCastException e) {
throw new CSVReadException(
string.Format("Line {0}: The data format of a value is not correct. {1}", i, e.Message), e);
}
i++;
} while (lines.MoveNext());
return table;
}
private static IEnumerable<string> GetValidHeaderColumns(string line, bool fullHeader = false)
{
double test;
var validColumns = GetColumns(line, fullHeader).
Where(col => !double.TryParse(col, NumberStyles.Any, CultureInfo.InvariantCulture, out test));
return validColumns.ToArray();
}
private static IEnumerable<string> GetColumns(string line, bool fullHeader = false)
{
if (!fullHeader) {
line = Regex.Replace(line, @"\[.*?\]", "");
line = line.Replace("<", "");
line = line.Replace(">", "");
}
return line.Split(Delimiter).Select(col => col.Trim());
}
private static IEnumerable<string> RemoveComments(IEnumerable<string> lines)
{
foreach (var line in lines) {
var index = line.IndexOf(Comment);
var result = index == -1 ? line : line.Substring(0, index + 1);
if (!string.IsNullOrWhiteSpace(result)) {
yield return result;
}
}
}
/// <summary>
/// Writes the datatable to the csv file.
/// Uses the column caption as header (with fallback to column name) for the csv header.
/// Writes the datatable to the csv file.
/// Uses the column caption as header (with fallback to column name) for the csv header.
/// </summary>
/// <param name="fileName">Path to the file.</param>
/// <param name="table">The Datatable.</param>
public static void Write(string fileName, DataTable table)
{
var stream = new StreamWriter(new FileStream(fileName, FileMode.Create), Encoding.UTF8);
Write(stream, table);
stream.Close();
using (var sw = new StreamWriter(new FileStream(fileName, FileMode.Create), Encoding.UTF8))
Write(sw, table);
}
/// <summary>
......@@ -217,9 +215,9 @@ namespace TUGraz.VectoCore.Utils
var showUnit = (bool?)col.ExtendedProperties["showUnit"];
var si = item as SI;
return (si != null
return si != null
? si.ToOutputFormat(decimals, outputFactor, showUnit)
: string.Format(CultureInfo.InvariantCulture, "{0}", item));
: string.Format(CultureInfo.InvariantCulture, "{0}", item);
});
writer.WriteLine(Delimiter.ToString().Join(formattedList));
......
using System.Data;
using System.IO;
using System.Linq;
using NUnit.Framework;
using TUGraz.VectoCommon.Utils;
using TUGraz.VectoCore.Utils;
namespace TUGraz.VectoCore.Tests.FileIO
{
[TestFixture]
public class VectoCSVFileTest
{
[Test]
public void VectoCSVFile_Read()
{
var table = VectoCSVFile.Read(@"TestData\test.csv");
Assert.AreEqual(3, table.Columns.Count);
Assert.IsTrue(new[] { "a", "b", "c" }.SequenceEqual(table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));
Assert.AreEqual(2, table.Rows.Count);
Assert.IsTrue(new[] { "1", "2", "3" }.SequenceEqual(table.Rows[0].ItemArray));
Assert.IsTrue(new[] { "4", "5", "6" }.SequenceEqual(table.Rows[1].ItemArray));
}
[Test]
public void VectoCSVFile_Read_RealLossMap()
{
var table = VectoCSVFile.Read(@"TestData\Components\Axle.vtlm");
Assert.AreEqual(3, table.Columns.Count);
Assert.IsTrue(
new[] { "Input Speed", "Input Torque", "Torque Loss" }.SequenceEqual(
table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));
Assert.AreEqual(285, table.Rows.Count);
}
[Test]
public void VectoCSVFile_ReadStream_Normal()
{
var stream = "a,b,c\n1,2,3\n4,5,6".GetStream();
var table = VectoCSVFile.ReadStream(stream);
Assert.AreEqual(3, table.Columns.Count);
Assert.IsTrue(new[] { "a", "b", "c" }.SequenceEqual(table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));
Assert.AreEqual(2, table.Rows.Count);
Assert.IsTrue(new[] { "1", "2", "3" }.SequenceEqual(table.Rows[0].ItemArray));
Assert.IsTrue(new[] { "4", "5", "6" }.SequenceEqual(table.Rows[1].ItemArray));
}
[Test]
public void VectoCSVFile_ReadStream_No_Header()
{
var stream = "1,2,3\n4,5,6".GetStream();
var table = VectoCSVFile.ReadStream(stream);
Assert.AreEqual(3, table.Columns.Count);
Assert.IsTrue(new[] { "0", "1", "2" }.SequenceEqual(table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));
Assert.AreEqual(2, table.Rows.Count);
Assert.IsTrue(new[] { "1", "2", "3" }.SequenceEqual(table.Rows[0].ItemArray));
Assert.IsTrue(new[] { "4", "5", "6" }.SequenceEqual(table.Rows[1].ItemArray));
}
[Test]
public void VectoCSVFile_ReadStream_Comments()
{
var stream = @"#a,b,c
#21,22,23
#674,95,96
a,b,c
#9,8,7
1,2,3
4,5,6".GetStream();
var table = VectoCSVFile.ReadStream(stream);
Assert.AreEqual(3, table.Columns.Count);
Assert.IsTrue(new[] { "a", "b", "c" }.SequenceEqual(table.Columns.Cast<DataColumn>().Select(c => c.ColumnName)));
Assert.AreEqual(2, table.Rows.Count);
Assert.IsTrue(new[] { "1", "2", "3" }.SequenceEqual(table.Rows[0].ItemArray));
Assert.IsTrue(new[] { "4", "5", "6" }.SequenceEqual(table.Rows[1].ItemArray));
}
[Test]
public void VectoCSVFile_Write_Filename()
{
const string fileName = "out_test.csv";
if (File.Exists(fileName))
File.Delete(fileName);
var table = new DataTable();
table.Columns.Add("a");
table.Columns.Add("b");
table.Rows.Add("1", "2");
VectoCSVFile.Write(fileName, table);
var text = File.ReadAllText(fileName);
Assert.AreEqual("a,b\r\n1,2\r\n", text);
}
[Test]
public void VectoCSVFile_Write_StreamWriter()
{
var table = new DataTable();
table.Columns.Add("a");
table.Columns.Add("b");
var row = table.NewRow();
row.ItemArray = new[] { "1", "2" };
table.Rows.Add(row);
using (var stream = new MemoryStream()) {
using (var sw = new StreamWriter(stream)) {
VectoCSVFile.Write(sw, table);
sw.Flush();
stream.Position = 0;
using (var sr = new StreamReader(stream))
Assert.AreEqual("a,b\r\n1,2\r\n", sr.ReadToEnd());
}
}
}
}
}
\ No newline at end of file
Input Speed [rpm],Input Torque [Nm],Torque Loss [Nm],Eff [-]
0,-2500,77.5
Input Speed [rpm],Input Torque [Nm],Torque Loss [Nm] # this is a comment
# rpm, Nm, Nm
# this is a comment
0,-2500,77.5 # this is a comment
0,-1500,62.5
0,-500,47.5
0,500,47.5
......@@ -7,6 +9,7 @@ Input Speed [rpm],Input Torque [Nm],Torque Loss [Nm],Eff [-]
0,2500,77.5
0,3500,92.5
0,4500,107.5
# this is a comment
0,5500,122.5
0,6500,137.5
0,7500,152.5
......
a,b,c
1,2,3
4,5,6
\ No newline at end of file
......@@ -77,6 +77,7 @@
<ItemGroup>
<Compile Include="Dummy\EngineFLDTest.cs" />
<Compile Include="Exceptions\ExceptionTests.cs" />
<Compile Include="FileIO\VectoCSVFileTest.cs" />
<Compile Include="FileIO\JsonTest.cs" />
<Compile Include="FileIO\SimulationDataReaderTest.cs" />
<Compile Include="GraphProgram.cs" />
......@@ -1312,6 +1313,9 @@
<None Include="TestData\Results\Integration\job_1-Gear-Test-dist.vmod">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Include="TestData\test.csv">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<None Include="TestData\Cycles\EngineOnly_FullLoad.vdri">
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment