Skip to content

Commit

Permalink
Marc alternative titles (#2598)
Browse files Browse the repository at this point in the history
* extract ISBN/ISSN to common

* extract 020 and 022 from XML

* add Edition to common marc

* Implement common MarcElectronicResources

* Apply auto-formatting rules

* move loggingContext around

* sort out the ElectronicResources interface

* remove redundant TODO

* resources now populate a default linkeText

* Update pipeline/transformer/transformer_marc_common/src/test/scala/weco/pipeline/transformer/marc_common/transformers/MarcElectronicResourcesTest.scala

Co-authored-by: Robert Kenny <[email protected]>

* fix merge errors

* warn on empty labels

* add context to failure to find label

* fix override

* implement common current frequency transformer

* use common currentFrequency in Sierra and XMl

* implement common alternative titles

* create common alternative title test suite

---------

Co-authored-by: Buildkite on behalf of Wellcome Collection <[email protected]>
Co-authored-by: Robert Kenny <[email protected]>
  • Loading branch information
3 people authored Apr 4, 2024
1 parent f945a2c commit da5b58a
Show file tree
Hide file tree
Showing 5 changed files with 420 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package weco.pipeline.transformer.marc_common.transformers

import weco.pipeline.transformer.marc_common.models.{
MarcField,
MarcRecord,
MarcSubfield
}

// Populate work:alternativeTitles
//
// The following fields are used as possible alternative titles:
// * 240 $a https://www.loc.gov/marc/bibliographic/bd240.html
// * 130 $a http://www.loc.gov/marc/bibliographic/bd130.html
// * 246 $a https://www.loc.gov/marc/bibliographic/bd246.html

object MarcAlternativeTitles extends MarcDataTransformer {

override type Output = Seq[String]

override def apply(record: MarcRecord): Seq[String] = {
record
.fieldsWithTags("240", "130", "246")
.withoutCaptionTitles
.map(field => alternativeTitle(field))
.filterNot(_.isEmpty)
.distinct
}

private def alternativeTitle(field: MarcField): String =
field.subfields.withoutUKLW.map(_.content).mkString(" ")

implicit private class FieldsOps(fields: Seq[MarcField]) {

// 246 with ind2 = 6 indicates a Caption Title
// "printed at the head of the first page of text. Caption title: may be generated with the note for display."
// This is not an alternative title that we want to capture here.
def withoutCaptionTitles: Seq[MarcField] =
fields filterNot {
field =>
field.marcTag == "246" && field.indicator2 == "6"
}
}
implicit private class SubfieldsOps(subfields: Seq[MarcSubfield]) {
// Any $5 subfield with contents `UkLW` is Wellcome Library-specific and
// should be omitted.
def withoutUKLW: Seq[MarcSubfield] =
subfields.filterNot(_ == MarcSubfield("5", "UkLW"))

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
package weco.pipeline.transformer.marc_common.transformers

import org.scalatest.LoneElement
import org.scalatest.funspec.AnyFunSpec
import org.scalatest.matchers.should.Matchers
import org.scalatest.prop.TableDrivenPropertyChecks._
import weco.pipeline.transformer.marc_common.generators.MarcTestRecord
import weco.pipeline.transformer.marc_common.models.{MarcField, MarcSubfield}
import scala.util.Random
class MarcAlternativeTitlesTest
extends AnyFunSpec
with Matchers
with LoneElement {

describe("extracting alternative titles from 130, 240, and 246 fields") {
info("https://www.loc.gov/marc/bibliographic/bd130.html")
info("https://www.loc.gov/marc/bibliographic/bd240.html")
info("https://www.loc.gov/marc/bibliographic/bd246.html")
describe("returning nothing") {
it(
"does not extract alternative titles if 130, 240, and 246 are absent"
) {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "999",
subfields = Seq(MarcSubfield(tag = "a", content = "mafeesh"))
)
)
)
) shouldBe Nil
}

it("does not return an empty alternative title given an empty field") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(MarcSubfield(tag = "a", content = ""))
)
)
)
) shouldBe Nil
}

it(
"does not return an empty alternative title given a field whose content is entirely filtered out"
) {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "246",
subfields = Seq(MarcSubfield(tag = "5", content = "UkLW"))
)
)
)
) shouldBe Nil
}

it("ignores 'caption title' fields, i.e. 246 fields with ind2=6") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "246",
subfields =
Seq(MarcSubfield(tag = "a", content = "I am a caption")),
indicator2 = "6"
)
)
)
) shouldBe Nil
}

}
}
describe("extracting a single alternative title") {
forAll(
Table(
"tag",
"130",
"240",
"246"
)
) {
tag =>
describe(s"extracting an alternative title from $tag") {
it(s"extracts an alternative tile from field $tag") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields =
Seq(MarcSubfield(tag = "a", content = "mafeesh"))
)
)
)
).loneElement shouldBe "mafeesh"
}

it(
s"concatenates all subfields of $tag in document order to make the alternative title"
) {
val shuffled = Random.shuffle(subfieldLists(tag))
val subfields = shuffled.map(
subtag => MarcSubfield(tag = subtag, content = subtag.toUpperCase)
)
val expectedTitle =
shuffled.map(_.toUpperCase).mkString(" ")

MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields = subfields
)
)
)
).loneElement shouldBe expectedTitle
}
if (tag == "246") {
it("ignores subfield 246$5 if its value is UkLW") {
info("$5UkLW is Wellcome Library-specific and should be omitted")
info(
"$5 is non-repeating, so this example should not exist in Real Life"
)
info(" but this test demonstrates that the existence oof $5UkLW")
info(
" does not impact the transformer's ability to extract anything else"
)
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = tag,
subfields = Seq(
MarcSubfield(tag = "a", content = "Pinakes"),
MarcSubfield(tag = "5", content = "UkLW"),
MarcSubfield(tag = "5", content = "Mouseion")
)
)
)
)
).loneElement shouldBe "Pinakes Mouseion"
}
}
}
}
}
describe("extracting multiple alternative titles") {
it("extracts alternative titles from all relevant fields") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content = "I'm very well acquainted too"
)
)
),
MarcField(
marcTag = "240",
subfields = Seq(
MarcSubfield(tag = "a", content = "with matters mathematical")
)
),
MarcField(
marcTag = "246",
subfields =
Seq(MarcSubfield(tag = "a", content = "I understand equations"))
),
MarcField(
marcTag = "246",
subfields = Seq(MarcSubfield(tag = "a", content = "both simple"))
),
MarcField(
marcTag = "240",
subfields =
Seq(MarcSubfield(tag = "a", content = "and quadratical"))
),
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"About binomial theorem I am teeming with a lot o' news"
)
)
)
)
)
) should contain theSameElementsAs Seq(
"I'm very well acquainted too",
"with matters mathematical",
"I understand equations",
"both simple",
"and quadratical",
"About binomial theorem I am teeming with a lot o' news"
)
}
it("does not return duplicate alternative titles") {
MarcAlternativeTitles(
MarcTestRecord(
fields = Seq(
MarcField(
marcTag = "130",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "240",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "246",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypotenuse"
)
)
),
MarcField(
marcTag = "246",
subfields = Seq(
MarcSubfield(
tag = "a",
content =
"With many cheerful facts about the square of the hypoten-potenuse"
)
)
)
)
)
) should contain theSameElementsAs Seq(
"With many cheerful facts about the square of the hypotenuse",
"With many cheerful facts about the square of the hypoten-potenuse"
)
}
it("only filters on ind2=6 for 246 fields") {
info("246 with indicator2 is a caption title")
info("this is not true of 130 and 240")
val fields = Seq(
"130" -> "I am not a caption",
"246" -> "I am a caption",
"240" -> "Nor am I"
) map {
case (tag, content) =>
MarcField(
indicator2 = "6",
marcTag = tag,
subfields = Seq(
MarcSubfield(
tag = "a",
content = content
)
)
)
}

MarcAlternativeTitles(
MarcTestRecord(fields = fields)
) should contain theSameElementsAs Seq(
"I am not a caption",
"Nor am I"
)
}
}

private val subfieldLists = Map(
"130" -> Seq(
"a",
"d",
"f",
"g",
"h",
"k",
"l",
"m",
"n",
"o",
"p",
"r",
"s",
"t",
"0",
"1",
"2",
"6",
"7",
"8"
),
"240" -> Seq(
"a",
"d",
"f",
"g",
"h",
"k",
"l",
"m",
"n",
"o",
"p",
"r",
"s",
"0",
"1",
"2",
"6",
"7",
"8"
),
"246" -> Seq("a", "b", "f", "g", "h", "i", "n", "p", "5", "6", "7", "8")
)
}
Loading

0 comments on commit da5b58a

Please sign in to comment.