-
Notifications
You must be signed in to change notification settings - Fork 2
/
create-csv-for-entities-missing-cbsc-crosswalk.xq
110 lines (89 loc) · 4.53 KB
/
create-csv-for-entities-missing-cbsc-crosswalk.xq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
xquery version "3.0";
(:
: Creates a CSV file that contains the Syriaca.org records for persons, places,
: and works that lack both a CBSC idno **and** a CBSC bibl entry. Some of these
: may never require a CBSC keyword. But some may have one that is just unmatched.
: Last run on all data on 2021-12-02.
:
: @author William L. Potter
: @version 1.0
: @date 2021-12-02
:)
declare default element namespace "http://www.tei-c.org/ns/1.0";
declare namespace srophe="https://srophe.app";
import module namespace functx="http://www.functx.com";
let $personsColl := collection("C:\Users\anoni\Documents\GitHub\srophe\srophe-app-data\data\persons\tei\")
let $placesColl := collection("C:\Users\anoni\Documents\GitHub\srophe\srophe-app-data\data\places\tei\")
let $worksColl := collection("C:\Users\anoni\Documents\GitHub\srophe\srophe-app-data\data\works\tei\")
let $csvOptions := map{'header': true ()}
let $persons :=
for $person in $personsColl
let $recordUri := $person//text/body/listPerson/person/idno[1]/text()
let $enHeadword := $person//titleStmt/title[@level="a"]/text()
let $enHeadword := normalize-space(functx:substring-before-if-contains($enHeadword, " —"))
let $cbscIdno :=
for $idno in $person//text/body/listPerson/person/idno
return if(contains($idno/text(), "csc.org.il")) then $idno/text()
let $cbscIdno := string-join($cbscIdno, "|")
let $cbscBibls := $person//text/body/listPerson/person/bibl[ptr/@target = "http://syriaca.org/bibl/5"]
let $cbscBiblTargetAttributes := $cbscBibls/citedRange[@unit="entry"]/@target
let $cbscBiblTarget := for $target in $cbscBiblTargetAttributes
return string($target)
let $cbscBiblTarget := string-join($cbscBiblTarget, "|")
let $cbscBiblKeyword := $cbscBibls/citedRange[@unit="entry"]/text()
let $cbscBiblKeyword := string-join($cbscBiblKeyword, "|")
let $noCbscData := boolean(($cbscIdno || $cbscBiblTarget || $cbscBiblKeyword) = "")
return if($noCbscData) then
<record>
<syriacaUri>{$recordUri}</syriacaUri>
<syriacaEnglishHeadword>{$enHeadword}</syriacaEnglishHeadword>
<category>person</category>
</record>
let $places :=
for $place in $placesColl
let $recordUri := $place//text/body/listPlace/place/idno[1]/text()
let $enHeadword := $place//titleStmt/title[@level="a"]/text()
let $enHeadword := normalize-space(functx:substring-before-if-contains($enHeadword, " —"))
let $cbscIdno :=
for $idno in $place//text/body/listPlace/place/idno
return if(contains($idno/text(), "csc.org.il")) then $idno/text()
let $cbscIdno := string-join($cbscIdno, "|")
let $cbscBibls := $place//text/body/listPlace/place/bibl[ptr/@target = "http://syriaca.org/bibl/5"]
let $cbscBiblTargetAttributes := $cbscBibls/citedRange[@unit="entry"]/@target
let $cbscBiblTarget := for $target in $cbscBiblTargetAttributes
return string($target)
let $cbscBiblTarget := string-join($cbscBiblTarget, "|")
let $cbscBiblKeyword := $cbscBibls/citedRange[@unit="entry"]/text()
let $cbscBiblKeyword := string-join($cbscBiblKeyword, "|")
let $noCbscData := boolean(($cbscIdno || $cbscBiblTarget || $cbscBiblKeyword) = "")
return if($noCbscData) then
<record>
<syriacaUri>{$recordUri}</syriacaUri>
<syriacaEnglishHeadword>{$enHeadword}</syriacaEnglishHeadword>
<category>place</category>
</record>
let $works :=
for $work in $worksColl
let $recordUri := $work//text/body/bibl/idno[1]/text()
let $enHeadword := $work//titleStmt/title[@level="a"]/text()
let $enHeadword := normalize-space(functx:substring-before-if-contains($enHeadword, " —"))
let $cbscIdno :=
for $idno in $work//text/body/bibl/idno
return if(contains($idno/text(), "csc.org.il")) then $idno/text()
let $cbscIdno := string-join($cbscIdno, "|")
let $cbscBibls := $work//text/body/bibl/bibl[ptr/@target = "http://syriaca.org/bibl/5"]
let $cbscBiblTargetAttributes := $cbscBibls/citedRange[@unit="entry"]/@target
let $cbscBiblTarget := for $target in $cbscBiblTargetAttributes
return string($target)
let $cbscBiblTarget := string-join($cbscBiblTarget, "|")
let $cbscBiblKeyword := $cbscBibls/citedRange[@unit="entry"]/text()
let $cbscBiblKeyword := string-join($cbscBiblKeyword, "|")
let $noCbscData := boolean(($cbscIdno || $cbscBiblTarget || $cbscBiblKeyword) = "")
return if($noCbscData) then
<record>
<syriacaUri>{$recordUri}</syriacaUri>
<syriacaEnglishHeadword>{$enHeadword}</syriacaEnglishHeadword>
<category>work</category>
</record>
let $xmlDoc := <csv>{$persons, $places, $works}</csv>
return csv:serialize($xmlDoc, $csvOptions)