-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* extract ISBN/ISSN to common * extract 020 and 022 from XML * add Edition to common marc * Implement common MarcElectronicResources * Apply auto-formatting rules * move loggingContext around * sort out the ElectronicResources interface * remove redundant TODO * resources now populate a default linkeText * Update pipeline/transformer/transformer_marc_common/src/test/scala/weco/pipeline/transformer/marc_common/transformers/MarcElectronicResourcesTest.scala Co-authored-by: Robert Kenny <[email protected]> * fix merge errors * warn on empty labels * add context to failure to find label * fix override * implement common current frequency transformer * use common currentFrequency in Sierra and XMl * implement common alternative titles * create common alternative title test suite --------- Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]> Co-authored-by: Robert Kenny <[email protected]>
- Loading branch information
1 parent
f945a2c
commit da5b58a
Showing
5 changed files
with
420 additions
and
21 deletions.
There are no files selected for viewing
50 changes: 50 additions & 0 deletions
50
...main/scala/weco/pipeline/transformer/marc_common/transformers/MarcAlternativeTitles.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package weco.pipeline.transformer.marc_common.transformers | ||
|
||
import weco.pipeline.transformer.marc_common.models.{ | ||
MarcField, | ||
MarcRecord, | ||
MarcSubfield | ||
} | ||
|
||
// Populate work:alternativeTitles | ||
// | ||
// The following fields are used as possible alternative titles: | ||
// * 240 $a https://www.loc.gov/marc/bibliographic/bd240.html | ||
// * 130 $a http://www.loc.gov/marc/bibliographic/bd130.html | ||
// * 246 $a https://www.loc.gov/marc/bibliographic/bd246.html | ||
|
||
object MarcAlternativeTitles extends MarcDataTransformer { | ||
|
||
override type Output = Seq[String] | ||
|
||
override def apply(record: MarcRecord): Seq[String] = { | ||
record | ||
.fieldsWithTags("240", "130", "246") | ||
.withoutCaptionTitles | ||
.map(field => alternativeTitle(field)) | ||
.filterNot(_.isEmpty) | ||
.distinct | ||
} | ||
|
||
private def alternativeTitle(field: MarcField): String = | ||
field.subfields.withoutUKLW.map(_.content).mkString(" ") | ||
|
||
implicit private class FieldsOps(fields: Seq[MarcField]) { | ||
|
||
// 246 with ind2 = 6 indicates a Caption Title | ||
// "printed at the head of the first page of text. Caption title: may be generated with the note for display." | ||
// This is not an alternative title that we want to capture here. | ||
def withoutCaptionTitles: Seq[MarcField] = | ||
fields filterNot { | ||
field => | ||
field.marcTag == "246" && field.indicator2 == "6" | ||
} | ||
} | ||
implicit private class SubfieldsOps(subfields: Seq[MarcSubfield]) { | ||
// Any $5 subfield with contents `UkLW` is Wellcome Library-specific and | ||
// should be omitted. | ||
def withoutUKLW: Seq[MarcSubfield] = | ||
subfields.filterNot(_ == MarcSubfield("5", "UkLW")) | ||
|
||
} | ||
} |
337 changes: 337 additions & 0 deletions
337
.../scala/weco/pipeline/transformer/marc_common/transformers/MarcAlternativeTitlesTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,337 @@ | ||
package weco.pipeline.transformer.marc_common.transformers | ||
|
||
import org.scalatest.LoneElement | ||
import org.scalatest.funspec.AnyFunSpec | ||
import org.scalatest.matchers.should.Matchers | ||
import org.scalatest.prop.TableDrivenPropertyChecks._ | ||
import weco.pipeline.transformer.marc_common.generators.MarcTestRecord | ||
import weco.pipeline.transformer.marc_common.models.{MarcField, MarcSubfield} | ||
import scala.util.Random | ||
class MarcAlternativeTitlesTest | ||
extends AnyFunSpec | ||
with Matchers | ||
with LoneElement { | ||
|
||
describe("extracting alternative titles from 130, 240, and 246 fields") { | ||
info("https://www.loc.gov/marc/bibliographic/bd130.html") | ||
info("https://www.loc.gov/marc/bibliographic/bd240.html") | ||
info("https://www.loc.gov/marc/bibliographic/bd246.html") | ||
describe("returning nothing") { | ||
it( | ||
"does not extract alternative titles if 130, 240, and 246 are absent" | ||
) { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "999", | ||
subfields = Seq(MarcSubfield(tag = "a", content = "mafeesh")) | ||
) | ||
) | ||
) | ||
) shouldBe Nil | ||
} | ||
|
||
it("does not return an empty alternative title given an empty field") { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "130", | ||
subfields = Seq(MarcSubfield(tag = "a", content = "")) | ||
) | ||
) | ||
) | ||
) shouldBe Nil | ||
} | ||
|
||
it( | ||
"does not return an empty alternative title given a field whose content is entirely filtered out" | ||
) { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "246", | ||
subfields = Seq(MarcSubfield(tag = "5", content = "UkLW")) | ||
) | ||
) | ||
) | ||
) shouldBe Nil | ||
} | ||
|
||
it("ignores 'caption title' fields, i.e. 246 fields with ind2=6") { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "246", | ||
subfields = | ||
Seq(MarcSubfield(tag = "a", content = "I am a caption")), | ||
indicator2 = "6" | ||
) | ||
) | ||
) | ||
) shouldBe Nil | ||
} | ||
|
||
} | ||
} | ||
describe("extracting a single alternative title") { | ||
forAll( | ||
Table( | ||
"tag", | ||
"130", | ||
"240", | ||
"246" | ||
) | ||
) { | ||
tag => | ||
describe(s"extracting an alternative title from $tag") { | ||
it(s"extracts an alternative tile from field $tag") { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = tag, | ||
subfields = | ||
Seq(MarcSubfield(tag = "a", content = "mafeesh")) | ||
) | ||
) | ||
) | ||
).loneElement shouldBe "mafeesh" | ||
} | ||
|
||
it( | ||
s"concatenates all subfields of $tag in document order to make the alternative title" | ||
) { | ||
val shuffled = Random.shuffle(subfieldLists(tag)) | ||
val subfields = shuffled.map( | ||
subtag => MarcSubfield(tag = subtag, content = subtag.toUpperCase) | ||
) | ||
val expectedTitle = | ||
shuffled.map(_.toUpperCase).mkString(" ") | ||
|
||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = tag, | ||
subfields = subfields | ||
) | ||
) | ||
) | ||
).loneElement shouldBe expectedTitle | ||
} | ||
if (tag == "246") { | ||
it("ignores subfield 246$5 if its value is UkLW") { | ||
info("$5UkLW is Wellcome Library-specific and should be omitted") | ||
info( | ||
"$5 is non-repeating, so this example should not exist in Real Life" | ||
) | ||
info(" but this test demonstrates that the existence oof $5UkLW") | ||
info( | ||
" does not impact the transformer's ability to extract anything else" | ||
) | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = tag, | ||
subfields = Seq( | ||
MarcSubfield(tag = "a", content = "Pinakes"), | ||
MarcSubfield(tag = "5", content = "UkLW"), | ||
MarcSubfield(tag = "5", content = "Mouseion") | ||
) | ||
) | ||
) | ||
) | ||
).loneElement shouldBe "Pinakes Mouseion" | ||
} | ||
} | ||
} | ||
} | ||
} | ||
describe("extracting multiple alternative titles") { | ||
it("extracts alternative titles from all relevant fields") { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "130", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = "I'm very well acquainted too" | ||
) | ||
) | ||
), | ||
MarcField( | ||
marcTag = "240", | ||
subfields = Seq( | ||
MarcSubfield(tag = "a", content = "with matters mathematical") | ||
) | ||
), | ||
MarcField( | ||
marcTag = "246", | ||
subfields = | ||
Seq(MarcSubfield(tag = "a", content = "I understand equations")) | ||
), | ||
MarcField( | ||
marcTag = "246", | ||
subfields = Seq(MarcSubfield(tag = "a", content = "both simple")) | ||
), | ||
MarcField( | ||
marcTag = "240", | ||
subfields = | ||
Seq(MarcSubfield(tag = "a", content = "and quadratical")) | ||
), | ||
MarcField( | ||
marcTag = "130", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = | ||
"About binomial theorem I am teeming with a lot o' news" | ||
) | ||
) | ||
) | ||
) | ||
) | ||
) should contain theSameElementsAs Seq( | ||
"I'm very well acquainted too", | ||
"with matters mathematical", | ||
"I understand equations", | ||
"both simple", | ||
"and quadratical", | ||
"About binomial theorem I am teeming with a lot o' news" | ||
) | ||
} | ||
it("does not return duplicate alternative titles") { | ||
MarcAlternativeTitles( | ||
MarcTestRecord( | ||
fields = Seq( | ||
MarcField( | ||
marcTag = "130", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = | ||
"With many cheerful facts about the square of the hypotenuse" | ||
) | ||
) | ||
), | ||
MarcField( | ||
marcTag = "240", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = | ||
"With many cheerful facts about the square of the hypotenuse" | ||
) | ||
) | ||
), | ||
MarcField( | ||
marcTag = "246", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = | ||
"With many cheerful facts about the square of the hypotenuse" | ||
) | ||
) | ||
), | ||
MarcField( | ||
marcTag = "246", | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = | ||
"With many cheerful facts about the square of the hypoten-potenuse" | ||
) | ||
) | ||
) | ||
) | ||
) | ||
) should contain theSameElementsAs Seq( | ||
"With many cheerful facts about the square of the hypotenuse", | ||
"With many cheerful facts about the square of the hypoten-potenuse" | ||
) | ||
} | ||
it("only filters on ind2=6 for 246 fields") { | ||
info("246 with indicator2 is a caption title") | ||
info("this is not true of 130 and 240") | ||
val fields = Seq( | ||
"130" -> "I am not a caption", | ||
"246" -> "I am a caption", | ||
"240" -> "Nor am I" | ||
) map { | ||
case (tag, content) => | ||
MarcField( | ||
indicator2 = "6", | ||
marcTag = tag, | ||
subfields = Seq( | ||
MarcSubfield( | ||
tag = "a", | ||
content = content | ||
) | ||
) | ||
) | ||
} | ||
|
||
MarcAlternativeTitles( | ||
MarcTestRecord(fields = fields) | ||
) should contain theSameElementsAs Seq( | ||
"I am not a caption", | ||
"Nor am I" | ||
) | ||
} | ||
} | ||
|
||
private val subfieldLists = Map( | ||
"130" -> Seq( | ||
"a", | ||
"d", | ||
"f", | ||
"g", | ||
"h", | ||
"k", | ||
"l", | ||
"m", | ||
"n", | ||
"o", | ||
"p", | ||
"r", | ||
"s", | ||
"t", | ||
"0", | ||
"1", | ||
"2", | ||
"6", | ||
"7", | ||
"8" | ||
), | ||
"240" -> Seq( | ||
"a", | ||
"d", | ||
"f", | ||
"g", | ||
"h", | ||
"k", | ||
"l", | ||
"m", | ||
"n", | ||
"o", | ||
"p", | ||
"r", | ||
"s", | ||
"0", | ||
"1", | ||
"2", | ||
"6", | ||
"7", | ||
"8" | ||
), | ||
"246" -> Seq("a", "b", "f", "g", "h", "i", "n", "p", "5", "6", "7", "8") | ||
) | ||
} |
Oops, something went wrong.