From 07e9257ceb5c87adca35e5f9c788432bad01e2d4 Mon Sep 17 00:00:00 2001 From: MiuMiuMiue Date: Wed, 15 Jan 2025 18:28:34 -0800 Subject: [PATCH 1/3] make dummy headers when csv file is empty and hasHeader set to True --- .../amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala index d4e1c2ef15..2e268c031c 100644 --- a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala @@ -90,10 +90,12 @@ class CSVScanSourceOpDesc extends ScanSourceOpDesc { val attributeTypeList: Array[AttributeType] = inferSchemaFromRows( data.iterator.asInstanceOf[Iterator[Array[Any]]] ) - val header: Array[String] = + var header: Array[String] = if (hasHeader) parser.getContext.headers() else (1 to attributeTypeList.length).map(i => "column-" + i).toArray + if (header == null) header = (1 to attributeTypeList.length).map(i => "column-" + i).toArray + header.indices.foldLeft(Schema()) { (schema, i) => schema.add(header(i), attributeTypeList(i)) } From 2447a379b177c4ad674c8b2b537e0a78ba754bb3 Mon Sep 17 00:00:00 2001 From: MiuMiuMiue Date: Thu, 16 Jan 2025 14:42:48 -0800 Subject: [PATCH 2/3] code simplification --- .../source/scan/csv/CSVScanSourceOpDesc.scala | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala index 2e268c031c..4edbea3774 100644 --- a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala @@ -90,11 +90,14 @@ class CSVScanSourceOpDesc extends ScanSourceOpDesc { val attributeTypeList: Array[AttributeType] = inferSchemaFromRows( data.iterator.asInstanceOf[Iterator[Array[Any]]] ) - var header: Array[String] = - if (hasHeader) parser.getContext.headers() - else (1 to attributeTypeList.length).map(i => "column-" + i).toArray - if (header == null) header = (1 to attributeTypeList.length).map(i => "column-" + i).toArray + val header: Array[String] = Option(parser.getContext.headers()) + .getOrElse((1 to attributeTypeList.length).map(i => s"column-$i").toArray) +// var header: Array[String] = +// if (hasHeader) parser.getContext.headers() +// else (1 to attributeTypeList.length).map(i => "column-" + i).toArray +// +// if (header == null) header = (1 to attributeTypeList.length).map(i => "column-" + i).toArray header.indices.foldLeft(Schema()) { (schema, i) => schema.add(header(i), attributeTypeList(i)) From 746388c281af3d178689e4f3917c8c8d38f1d1ac Mon Sep 17 00:00:00 2001 From: MiuMiuMiue Date: Thu, 16 Jan 2025 18:19:19 -0800 Subject: [PATCH 3/3] condition added --- .../source/scan/csv/CSVScanSourceOpDesc.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala index 4edbea3774..ac9538c25b 100644 --- a/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala +++ b/core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/source/scan/csv/CSVScanSourceOpDesc.scala @@ -91,13 +91,11 @@ class CSVScanSourceOpDesc extends ScanSourceOpDesc { data.iterator.asInstanceOf[Iterator[Array[Any]]] ) - val header: Array[String] = Option(parser.getContext.headers()) - .getOrElse((1 to attributeTypeList.length).map(i => s"column-$i").toArray) -// var header: Array[String] = -// if (hasHeader) parser.getContext.headers() -// else (1 to attributeTypeList.length).map(i => "column-" + i).toArray -// -// if (header == null) header = (1 to attributeTypeList.length).map(i => "column-" + i).toArray + val header: Array[String] = + if (hasHeader) + Option(parser.getContext.headers()) + .getOrElse((1 to attributeTypeList.length).map(i => "column-" + i).toArray) + else (1 to attributeTypeList.length).map(i => "column-" + i).toArray header.indices.foldLeft(Schema()) { (schema, i) => schema.add(header(i), attributeTypeList(i))