Skip to content

Commit

Permalink
Add Scalafix rule for LogicalTypeSupplier removal
Browse files Browse the repository at this point in the history
  • Loading branch information
clairemcginty committed Jan 17, 2024
1 parent 41c8df7 commit 1523f24
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 1 deletion.
3 changes: 2 additions & 1 deletion scalafix/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def scio(version: String): List[ModuleID] = {
"scio-parquet",
"scio-test",
"scio-jdbc",
"scio-tensorflow"
"scio-tensorflow",
"scio-smb"
) ++ (VersionNumber(version).numbers match {
case Seq(0, minor, _) if minor < 10 =>
List(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
rule = FixLogicalTypeSupplier
*/
package fix.v0_14_0

import com.spotify.scio.ScioContext
import com.spotify.scio.parquet.ParquetConfiguration
import com.spotify.scio.parquet.avro._
import com.spotify.scio.values.SCollection
import org.apache.avro.specific.SpecificRecordBase
import org.apache.beam.sdk.extensions.smb.AvroLogicalTypeSupplier
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.avro.{AvroDataSupplier, AvroReadSupport, AvroWriteSupport}

object FixLogicalTypeSuppliers {
val sc = ScioContext()

sc.parquetAvroFile[SpecificRecordBase](
"input",
conf = ParquetConfiguration.of(
AvroReadSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier]
))

sc.parquetAvroFile[SpecificRecordBase](
"input",
null,
null,
ParquetConfiguration.of(
AvroReadSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier]
))

sc.parquetAvroFile[SpecificRecordBase](
"input",
conf = ParquetConfiguration.of(
AvroReadSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier],
"foo" -> "bar"
))

sc.parquetAvroFile[SpecificRecordBase](
"input",
null,
null,
ParquetConfiguration.of(
AvroReadSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier],
"foo" -> "bar"
))

val data: SCollection[SpecificRecordBase] = ???
data.saveAsParquetAvroFile(
"output",
conf = ParquetConfiguration.of(
AvroWriteSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier]
)
)

data.saveAsParquetAvroFile(
"output",
conf = ParquetConfiguration.of(
AvroWriteSupport.AVRO_DATA_SUPPLIER -> classOf[LogicalTypeSupplier],
"foo" -> "bar"
)
)

val conf = new Configuration()
conf.setClass(AvroReadSupport.AVRO_DATA_SUPPLIER, classOf[LogicalTypeSupplier], classOf[AvroDataSupplier])
conf.setClass(AvroWriteSupport.AVRO_DATA_SUPPLIER, classOf[LogicalTypeSupplier], classOf[LogicalTypeSupplier])
conf.setClass(AvroReadSupport.AVRO_DATA_SUPPLIER, classOf[AvroLogicalTypeSupplier], classOf[AvroDataSupplier])
conf.setClass(AvroWriteSupport.AVRO_DATA_SUPPLIER, classOf[AvroLogicalTypeSupplier], classOf[LogicalTypeSupplier])
conf.setClass("someClass", classOf[String], classOf[CharSequence])
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package fix.v0_14_0

import com.spotify.scio.ScioContext
import com.spotify.scio.parquet.ParquetConfiguration
import com.spotify.scio.parquet.avro._
import com.spotify.scio.values.SCollection
import org.apache.avro.specific.SpecificRecordBase
import org.apache.beam.sdk.extensions.smb.AvroLogicalTypeSupplier
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.avro.{AvroDataSupplier, AvroReadSupport, AvroWriteSupport}

object FixLogicalTypeSuppliers {
val sc = ScioContext()

sc.parquetAvroFile[SpecificRecordBase]("input")

sc.parquetAvroFile[SpecificRecordBase]("input", null, null)

sc.parquetAvroFile[SpecificRecordBase]("input", conf = ParquetConfiguration.of("foo" -> "bar"))

sc.parquetAvroFile[SpecificRecordBase]("input", null, null, ParquetConfiguration.of("foo" -> "bar"))

val data: SCollection[SpecificRecordBase] = ???
data.saveAsParquetAvroFile("output")

data.saveAsParquetAvroFile("output", conf = ParquetConfiguration.of("foo" -> "bar"))

val conf = new Configuration()
conf.setClass("someClass", classOf[String], classOf[CharSequence])
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ fix.v0_13_0.FixTaps
fix.v0_14_0.FixAvroSchemasPackage
fix.v0_14_0.FixAvroCoder
fix.v0_14_0.FixDynamicAvro
fix.v0_14_0.FixLogicalTypeSupplier

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package fix.v0_14_0

import scalafix.v1._
import scala.meta._

object FixLogicalTypeSupplier {
val ParquetConfigurationMatcher: SymbolMatcher = SymbolMatcher.normalized(
"com/spotify/scio/parquet/package/ParquetConfiguration#of"
)

val SetClassMatcher: SymbolMatcher = SymbolMatcher.normalized(
"org/apache/hadoop/conf/Configuration#setClass"
)

private val ParquetAvroPrefix = "com/spotify/scio/parquet/avro/syntax"
private val ParquetAvroReadMatcher = SymbolMatcher.normalized(s"$ParquetAvroPrefix/ScioContextOps#parquetAvroFile")
private val ParquetAvroWriteMatcher = SymbolMatcher.normalized(s"$ParquetAvroPrefix/SCollectionOps#saveAsParquetAvroFile")
}

class FixLogicalTypeSupplier extends SemanticRule("FixLogicalTypeSupplier") {
import FixLogicalTypeSupplier._

private def updateIOArgs(fnArgs: Seq[Term])(implicit doc: SemanticDocument): Seq[Term] = {
def removeTypeSupplier(confArgs: Seq[Term]): Option[Term] = {
val newConfiguration = confArgs.filterNot {
case q"$l -> classOf[LogicalTypeSupplier]" => true
case q"$l -> classOf[AvroLogicalTypeSupplier]" => true
case _ => false
}.toList

if (newConfiguration.isEmpty) {
None
} else {
Some(q"ParquetConfiguration.of(..$newConfiguration)")
}
}

fnArgs.flatMap {
case Term.Assign(lhs, q"$fn(..$confArgs)") if ParquetConfigurationMatcher.matches(fn.symbol) =>
removeTypeSupplier(confArgs).map(c => Term.Assign(lhs, c))
case q"$fn(..$confArgs)" if ParquetConfigurationMatcher.matches(fn.symbol) =>
removeTypeSupplier(confArgs)
case a => Some(a)
}
}


override def fix(implicit doc: SemanticDocument): Patch = {
doc.tree.collect {
case method@q"$fn(..$args)"
if (ParquetAvroReadMatcher.matches(fn.symbol) || ParquetAvroWriteMatcher.matches(fn.symbol)) && args.nonEmpty =>
val newArgs = updateIOArgs(args).toList
Patch.replaceTree(method, q"$fn(..$newArgs)".syntax)
case method@q"$lhs.$fn(..$args)"
if SetClassMatcher.matches(fn.symbol) && args.collect {
case q"classOf[LogicalTypeSupplier]" => true
case q"classOf[AvroLogicalTypeSupplier]" => true
}.nonEmpty =>
Patch.removeTokens(method.tokens)
}.asPatch
}
}

0 comments on commit 1523f24

Please sign in to comment.