Skip to content

Commit

Permalink
Don't create a GeoParquet metadata string if the SFT has no geometries
Browse files Browse the repository at this point in the history
  • Loading branch information
adeet1 committed Mar 28, 2024
1 parent 137dcb5 commit 09588e8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import org.locationtech.geomesa.fs.storage.common.observer.BoundsObserver
import org.locationtech.geomesa.fs.storage.parquet.io.SimpleFeatureParquetSchema.{GeoParquetSchemaKey, SchemaVersionKey, geoParquetMetadata}
import org.locationtech.geomesa.utils.geotools.ObjectType
import org.locationtech.geomesa.utils.geotools.ObjectType.ObjectType
import org.locationtech.geomesa.utils.geotools.RichSimpleFeatureType.RichSimpleFeatureType
import org.locationtech.geomesa.utils.text.WKBUtils
import org.locationtech.jts.geom._

Expand Down Expand Up @@ -81,9 +82,14 @@ class SimpleFeatureWriteSupport extends WriteSupport[SimpleFeature] {

// called once at the end after all SimpleFeatures are written
override def finalizeWrite(): FinalizedWriteContext = {
// Get the bounding box that spans all the SimpleFeatures' geometries
// Get the bounding boxes that span each geometry type
val bboxes = observer.getBoundingBoxes

// If the SFT has no geometries, then there's no need to create GeoParquet metadata
if (bboxes.isEmpty) {
return new FinalizedWriteContext(schema.metadata)
}

// TODO: not an elegant way to do it
// somehow trying to mutate the map, e.g. by calling metadata.put(GeoParquetSchemaKey, result), causes empty parquet files to be written
val newMetadata: java.util.Map[String, String] = Map(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,36 @@ class ParquetReadWriteTest extends Specification with AllExpectations with LazyL
metadata.contains(polygonsBboxString) must beTrue
}

"write parquet files with no geometries" >> {
val f = Files.createTempFile("geomesa", ".parquet")
val sft = SimpleFeatureTypes.createType("test", "name:String,age:Int,dtg:Date")
val sftConf = {
val c = new Configuration()
StorageConfiguration.setSft(c, sft)
// Use GZIP in tests but snappy in prod due to license issues
c.set(ParquetCompressionOpt, CompressionCodecName.GZIP.toString)
c
}

val features = {
Seq(
ScalaSimpleFeature.create(sft, "1", "first", 100, "2017-01-01T00:00:00Z"),
ScalaSimpleFeature.create(sft, "2", null, 200, "2017-01-02T00:00:00Z"),
ScalaSimpleFeature.create(sft, "3", "third", 300, "2017-01-03T00:00:00Z")
)
}

val writer = SimpleFeatureParquetWriter.builder(new Path(f.toUri), sftConf).build()
WithClose(writer) { writer =>
features.foreach(writer.write)
}

Files.size(f) must beGreaterThan(0L)

val metadata = writer.getFooter.getFileMetaData.getKeyValueMetaData.get(GeoParquetSchemaKey)
metadata must beNull
}

"read geoparquet files" >> {
val result = readFile(FilterCompat.NOOP, sftConf)
result mustEqual features
Expand Down

0 comments on commit 09588e8

Please sign in to comment.