Skip to content

Commit

Permalink
Adapter for Scala Grounders (rvacareanu/grounder) (#887)
Browse files Browse the repository at this point in the history
## Summary of Changes

Added an Adapter for the Scala Grounders package.

---------

Co-authored-by: Gus Hahn-Powell <[email protected]>
  • Loading branch information
robertvacareanu and myedibleenso authored Apr 30, 2024
1 parent a6df9ef commit fe8f41f
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 2 deletions.
1 change: 1 addition & 0 deletions skema/text_reading/scala/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ libraryDependencies ++= {
"com.lihaoyi" %% "requests" % "0.1.8",
"io.cequence" %% "openai-scala-client" % "1.0.0.RC.1",
"org.scalatest" %% "scalatest" % "3.0.9" % Test,
"org.clulab" %% "scala-grounders" % "0.0.35",
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import org.clulab.processors.Processor
import org.clulab.processors.fastnlp.FastNLPProcessor

import scala.collection.JavaConverters._
import scala.io.Source
import org.ml4ai.skema.grounding.scala_grounders.ScalaGroundersAdapter


object GrounderFactory {

Expand Down Expand Up @@ -52,7 +55,11 @@ object GrounderFactory {
new PipelineGrounder(Seq(manualGrounder, grounder))
else
grounder
case "manual" => manualGrounder
case "manual" => manualGrounder
case "scala-grounders" =>
// Similar to `miraembeddings`
val ontologyFilePath = domainConfig.getString("ontologyPath")
ScalaGroundersAdapter.fromFile(groundingConceptsPath=ontologyFilePath)
case other =>
throw new RuntimeException(s"$other - is not implemented as a grounding engine")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import org.json4s.JsonDSL._
* @param concept instance returned by a grounder implementations
* @param score of the grounding algorithm given to concept
*/
case class GroundingCandidate(concept: GroundingConcept, score: Float) {
case class GroundingCandidate(concept: GroundingConcept, score: Float, details: Option[String] = None) {

def toJValue: JValue = {
("groundingConcept" -> concept.toJValue) ~
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.ml4ai.skema.grounding.scala_grounders

import org.clulab.scala_grounders.grounding.GroundingConfig
import org.ml4ai.skema.text_reading.grounding.Grounder
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate
import org.ml4ai.skema.text_reading.grounding.GroundingConcept
import com.typesafe.config.Config
import org.clulab.scala_grounders.grounding.SequentialGrounder
import org.clulab.scala_grounders.model.DKG
import org.clulab.scala_grounders.model.DKGSynonym
import com.typesafe.config.ConfigFactory
import org.clulab.scala_grounders.using
import org.clulab.scala_grounders.model.DKG
import scala.io.Source


/**
* This class adapts the data definitions from this project to work with scala-grounder's definition
* Concretely, the changes needed are:
* - SKEMA's GroundingConcept to scala-grounder's DKG (avalaible in `fromConceptToDKG`)
* - scala-grounder's DKG to SKEMA's GroundingConcept (avalaible in `fromDKGToConcept`)
* - Create the scala-grounder Grounder (`grounder = SequentialGrounder()`)
* - Changing `groundingCandidates` to call the right method from the scala-grounder side
*
* @param groundingConcepts -> The concepts which we will use to do the grounding
* Every candidate text for grounding (i.e. any text that we
* want to ground) will be grounded on these concepts
* (Note: depending on the implementation, it is possible that
* none of these groundingConcepts candidates are suitable, so
* we might not return anything; however, we will never return
* a concept that is outside this)
*/
class ScalaGroundersAdapter(groundingConcepts: Seq[GroundingConcept]) extends Grounder {
lazy val concepts = groundingConcepts.map(fromConceptToDKG)
lazy val grounder = SequentialGrounder().mkFast(concepts)
def groundingCandidates(texts: Seq[String], k: Int): Seq[Seq[GroundingCandidate]] = {
texts.map { text =>
// TODO Maybe provide additional context (useful for NeuralGrounder)
grounder.ground(text, None, concepts, k)
.map { result =>
GroundingCandidate(fromDKGToConcept(result.dkg), result.score, details = Some(result.groundingDetails.grounderName))
}
.force.toSeq
}
}

/**
* Transform a SKEMA's `GroundingConcept` to a scala-grounders' `DKG`
* They have similar meanings, so the map is 1:1
*
* @param concept
* @return
*/
def fromConceptToDKG(concept: GroundingConcept): DKG = {
DKG(concept.id, concept.name, concept.description, concept.synonyms.map { synonyms => synonyms.map { s => DKGSynonym(s, None) } }.getOrElse(Seq.empty))
}

/**
* Transform a scala-grounder' `DKG` to SKEMA's `GroundingConcept`
* They have similar meanings, so the map is 1:1
*
* @param dkg
* @return
*/
def fromDKGToConcept(dkg: DKG): GroundingConcept = {
GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None)
}

}
/**
* Provide altenatives way of creating a `ScalaGroundersAdapter`
*/
object ScalaGroundersAdapter {
def apply(groundingConcepts: Seq[GroundingConcept]): ScalaGroundersAdapter = new ScalaGroundersAdapter(groundingConcepts)
def fromDkgs(dkgs: Seq[DKG]): ScalaGroundersAdapter = new ScalaGroundersAdapter(dkgs.map(dkg => GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None)))
def fromFile(groundingConceptsPath: String): ScalaGroundersAdapter = {
val concepts = using(Source.fromFile(groundingConceptsPath)) { it =>
ujson.read(it.mkString).arr.map(it => DKG.fromJson(it))
}
ScalaGroundersAdapter.fromDkgs(concepts)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package org.ml4ai.skema.grounding.scala_grounders

import org.ml4ai.skema.test.Test

import java.nio.{Buffer, ByteBuffer, ByteOrder}

import org.ml4ai.skema.text_reading.grounding.Grounder
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate
import org.ml4ai.skema.text_reading.grounding.GroundingConcept

/**
*
* Running command:
* sbt "testOnly org.ml4ai.skema.grounding.scala_grounders.TestAdapter"
*/
class TestAdapter extends Test {

behavior of "ScalaGroundersAdapter"

val gcs = Seq(
GroundingConcept(
id = "id1",
name = "dog",
description = Some("this is a cute dog"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id2",
name = "cat",
description = Some("this is a cute cat"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id3",
name = "dog cat",
description = Some("here we have a dog and a cat"),
synonyms = None,
embedding = None
),
GroundingConcept(
id = "id4",
name = "cat",
description = Some("this is a cute cat"),
synonyms = None,
embedding = None
),
)

val sga = new ScalaGroundersAdapter(gcs)

val result = sga.groundingCandidates(Seq("dog"), 10).head

// Check that the first one is a GroundingCandidate with id1
it should "ground" in {
result.foreach(println)
result.head.concept.id should be ("id1")
}


}

0 comments on commit fe8f41f

Please sign in to comment.