-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adapter for Scala Grounders (rvacareanu/grounder) (#887)
## Summary of Changes Added an Adapter for the Scala Grounders package. --------- Co-authored-by: Gus Hahn-Powell <[email protected]>
- Loading branch information
1 parent
a6df9ef
commit fe8f41f
Showing
5 changed files
with
154 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
82 changes: 82 additions & 0 deletions
82
...scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/scala_grounders/Adapter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
package org.ml4ai.skema.grounding.scala_grounders | ||
|
||
import org.clulab.scala_grounders.grounding.GroundingConfig | ||
import org.ml4ai.skema.text_reading.grounding.Grounder | ||
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate | ||
import org.ml4ai.skema.text_reading.grounding.GroundingConcept | ||
import com.typesafe.config.Config | ||
import org.clulab.scala_grounders.grounding.SequentialGrounder | ||
import org.clulab.scala_grounders.model.DKG | ||
import org.clulab.scala_grounders.model.DKGSynonym | ||
import com.typesafe.config.ConfigFactory | ||
import org.clulab.scala_grounders.using | ||
import org.clulab.scala_grounders.model.DKG | ||
import scala.io.Source | ||
|
||
|
||
/** | ||
* This class adapts the data definitions from this project to work with scala-grounder's definition | ||
* Concretely, the changes needed are: | ||
* - SKEMA's GroundingConcept to scala-grounder's DKG (avalaible in `fromConceptToDKG`) | ||
* - scala-grounder's DKG to SKEMA's GroundingConcept (avalaible in `fromDKGToConcept`) | ||
* - Create the scala-grounder Grounder (`grounder = SequentialGrounder()`) | ||
* - Changing `groundingCandidates` to call the right method from the scala-grounder side | ||
* | ||
* @param groundingConcepts -> The concepts which we will use to do the grounding | ||
* Every candidate text for grounding (i.e. any text that we | ||
* want to ground) will be grounded on these concepts | ||
* (Note: depending on the implementation, it is possible that | ||
* none of these groundingConcepts candidates are suitable, so | ||
* we might not return anything; however, we will never return | ||
* a concept that is outside this) | ||
*/ | ||
class ScalaGroundersAdapter(groundingConcepts: Seq[GroundingConcept]) extends Grounder { | ||
lazy val concepts = groundingConcepts.map(fromConceptToDKG) | ||
lazy val grounder = SequentialGrounder().mkFast(concepts) | ||
def groundingCandidates(texts: Seq[String], k: Int): Seq[Seq[GroundingCandidate]] = { | ||
texts.map { text => | ||
// TODO Maybe provide additional context (useful for NeuralGrounder) | ||
grounder.ground(text, None, concepts, k) | ||
.map { result => | ||
GroundingCandidate(fromDKGToConcept(result.dkg), result.score, details = Some(result.groundingDetails.grounderName)) | ||
} | ||
.force.toSeq | ||
} | ||
} | ||
|
||
/** | ||
* Transform a SKEMA's `GroundingConcept` to a scala-grounders' `DKG` | ||
* They have similar meanings, so the map is 1:1 | ||
* | ||
* @param concept | ||
* @return | ||
*/ | ||
def fromConceptToDKG(concept: GroundingConcept): DKG = { | ||
DKG(concept.id, concept.name, concept.description, concept.synonyms.map { synonyms => synonyms.map { s => DKGSynonym(s, None) } }.getOrElse(Seq.empty)) | ||
} | ||
|
||
/** | ||
* Transform a scala-grounder' `DKG` to SKEMA's `GroundingConcept` | ||
* They have similar meanings, so the map is 1:1 | ||
* | ||
* @param dkg | ||
* @return | ||
*/ | ||
def fromDKGToConcept(dkg: DKG): GroundingConcept = { | ||
GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None) | ||
} | ||
|
||
} | ||
/** | ||
* Provide altenatives way of creating a `ScalaGroundersAdapter` | ||
*/ | ||
object ScalaGroundersAdapter { | ||
def apply(groundingConcepts: Seq[GroundingConcept]): ScalaGroundersAdapter = new ScalaGroundersAdapter(groundingConcepts) | ||
def fromDkgs(dkgs: Seq[DKG]): ScalaGroundersAdapter = new ScalaGroundersAdapter(dkgs.map(dkg => GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None))) | ||
def fromFile(groundingConceptsPath: String): ScalaGroundersAdapter = { | ||
val concepts = using(Source.fromFile(groundingConceptsPath)) { it => | ||
ujson.read(it.mkString).arr.map(it => DKG.fromJson(it)) | ||
} | ||
ScalaGroundersAdapter.fromDkgs(concepts) | ||
} | ||
} |
62 changes: 62 additions & 0 deletions
62
..._reading/scala/src/test/scala/org/ml4ai/skema/grounding/scala_grounders/TestAdapter.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package org.ml4ai.skema.grounding.scala_grounders | ||
|
||
import org.ml4ai.skema.test.Test | ||
|
||
import java.nio.{Buffer, ByteBuffer, ByteOrder} | ||
|
||
import org.ml4ai.skema.text_reading.grounding.Grounder | ||
import org.ml4ai.skema.text_reading.grounding.GroundingCandidate | ||
import org.ml4ai.skema.text_reading.grounding.GroundingConcept | ||
|
||
/** | ||
* | ||
* Running command: | ||
* sbt "testOnly org.ml4ai.skema.grounding.scala_grounders.TestAdapter" | ||
*/ | ||
class TestAdapter extends Test { | ||
|
||
behavior of "ScalaGroundersAdapter" | ||
|
||
val gcs = Seq( | ||
GroundingConcept( | ||
id = "id1", | ||
name = "dog", | ||
description = Some("this is a cute dog"), | ||
synonyms = None, | ||
embedding = None | ||
), | ||
GroundingConcept( | ||
id = "id2", | ||
name = "cat", | ||
description = Some("this is a cute cat"), | ||
synonyms = None, | ||
embedding = None | ||
), | ||
GroundingConcept( | ||
id = "id3", | ||
name = "dog cat", | ||
description = Some("here we have a dog and a cat"), | ||
synonyms = None, | ||
embedding = None | ||
), | ||
GroundingConcept( | ||
id = "id4", | ||
name = "cat", | ||
description = Some("this is a cute cat"), | ||
synonyms = None, | ||
embedding = None | ||
), | ||
) | ||
|
||
val sga = new ScalaGroundersAdapter(gcs) | ||
|
||
val result = sga.groundingCandidates(Seq("dog"), 10).head | ||
|
||
// Check that the first one is a GroundingCandidate with id1 | ||
it should "ground" in { | ||
result.foreach(println) | ||
result.head.concept.id should be ("id1") | ||
} | ||
|
||
|
||
} |