-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrdfxml.lisp
1909 lines (1766 loc) · 82.3 KB
/
rdfxml.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; Copyright (c) 2008, Joshua Taylor. All rights reserved.
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;; * Redistributions of source code must retain the above copyright
;;; notice, this list of conditions and the following disclaimer.
;;; * Redistributions in binary form must reproduce the above
;;; copyright notice, this list of conditions and the following
;;; disclaimer in the documentation and/or other materials
;;; provided with the distribution.
;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
(in-package #:cl-rdfxml)
;;; RDF URI classes
(declaim (inline core-syntax-term-p syntax-term-p old-term-p
node-element-uri-p property-element-uri-p
property-attribute-uri-p))
(defun core-syntax-term-p (object)
"core-syntax-term-p object => boolean
core-syntax-term-p returns true if object is in +core-syntax-terms+."
(or (eq object +rdf-rdf+)
(eq object +rdf-id+)
(eq object +rdf-about+)
(eq object +rdf-parse-type+)
(eq object +rdf-resource+)
(eq object +rdf-node-id+)
(eq object +rdf-datatype+)))
(defun syntax-term-p (object)
"syntax-term-p object => boolean
syntax-term-p returns true if object is a syntax term (a core syntax
term, rdf:Description, or rdf:li)."
(or (core-syntax-term-p object)
(eq object +rdf-description+)
(eq object +rdf-li+)))
(defun old-term-p (object)
"old-term-p object => boolean
old-term-p returns true if object is in +old-terms+."
(or (eq object +rdf-about-each+)
(eq object +rdf-about-each-prefix+)
(eq object +rdf-bag-id+)))
(defun node-element-uri-p (xuri &aux (uri (intern-uri xuri)))
"node-element-uri-p xuri => boolean
xuri---a URI designator
Node-element-uri-p determines whether the designated URI is a valid
node element URI. Node element URIs are all URIs except core syntax
terms, rdf:li, and old RDF terms."
(and (not (core-syntax-term-p uri))
(not (eq uri +rdf-li+))
(not (old-term-p uri))))
(defun property-element-uri-p (xuri &aux (uri (intern-uri xuri)))
"property-element-uri-p xuri => boolean
xuri---a URI designator
Property-element-uri-p determines whether the designated URI is a
valid property element URI. Property element URIs are all URIs except
core syntax terms, rdf:Description, and old RDF terms."
(and (not (core-syntax-term-p uri))
(not (eq uri +rdf-description+))
(not (old-term-p uri))))
(defun property-attribute-uri-p (xuri &aux (uri (intern-uri xuri)))
"property-attribute-uri-p xuri => boolean
xuri---a URI designator
Property-attribute-uri-p determines whether the designated URI is a
valid property attribute URI. Property attribute URIs are all URIs
except core syntax terms, rdf:Description, rdf:li, and old RDF terms."
(and (not (core-syntax-term-p uri))
(not (eq uri +rdf-description+))
(not (eq uri +rdf-li+))
(not (old-term-p uri))))
(defun rdf-namespace-prefixed-p (xuri)
"rdf-namespace-prefixed xuri => boolean
xuri---a uri designator
Rdf-namespace-prefixed-p returns true if xuri is prefixed with
+rdf-namespace+."
(let ((uri (puri:intern-uri xuri)))
(prefixp +rdf-namespace+ (puri::uri-string uri))))
(defun membership-property-element-p (xuri)
"membership-property-element-p xuri => boolean
xuri---a uri designator
Membership-property-element-p returns true if the designated URI is
either rdf:li or of the form rdf:_n where n standard for a sequence of
base-10 digits."
(let ((uri (puri:intern-uri xuri)))
(or (eq uri +rdf-li+)
(and (rdf-namespace-prefixed-p uri)
(membership-fragment-p (puri:uri-fragment uri))))))
(defun membership-fragment-p (fragment)
"membership-fragment fragment => boolean
fragment---a string or NIL
Membership-fragment-p returns true if fragment is of the form \"_n
where n is a decimal integer greater than zero with no leading
zeros.\" (RDF/XML Syntax Specification, Section 5.1)"
(if (null fragment) nil
(let ((fragment fragment))
(declare (string fragment))
(and (>= (length fragment) 2)
(char= (char fragment 0) #\_)
(char/= (char fragment 1) #\0)
(every 'digit-char-p (subseq fragment 1))))))
(defun rdf-name-p (xuri)
"rdf-name-p xuri => boolean
xuri---a URI designator
Rdf-name-p returns true if the designated URI is an RDF name,
including the infinite set of rdf:_n names."
(let ((uri (puri:intern-uri xuri)))
;; This list comes from 5.1 "The RDF Namespace and Vocabulary in
;; the RDF/XML Syntax Specification"
(flet ((syntax-name-p (uri)
(or (eq uri +rdf-rdf+)
(eq uri +rdf-description+)
(eq uri +rdf-id+)
(eq uri +rdf-about+)
(eq uri +rdf-parse-type+)
(eq uri +rdf-resource+)
;; rdf:li
(eq uri +rdf-node-id+)
(eq uri +rdf-datatype+)))
(class-name-p (uri)
(or (eq uri +rdf-seq+)
(eq uri +rdf-bag+)
(eq uri +rdf-alt+)
(eq uri +rdf-statement+)
(eq uri +rdf-property+)
(eq uri +rdf-xml-literal+)
(eq uri +rdf-list+)))
(property-name-p (uri)
(or (eq uri +rdf-subject+)
(eq uri +rdf-predicate+)
(eq uri +rdf-object+)
(eq uri +rdf-type+)
(eq uri +rdf-value+)
(eq uri +rdf-first+)
(eq uri +rdf-rest+)
;; rdf:_n
))
(resource-name-p (uri)
(eq uri +rdf-nil+)))
(or (syntax-name-p uri)
(class-name-p uri)
(property-name-p uri)
(resource-name-p uri)
;; rdf:li and rdf:_n
(membership-property-element-p uri)))))
;;;; PURI Utilities
;; PURI utilities appear in a strange place, because we want to be
;; able to signal warnings when URIs are encountered that begin with
;; the RDF namespace, but are not actually RDF names. To do this, we
;; need the RDF name utilities defined above.
(defun intern-uri (xuri)
"intern-uri xuri => uri
xuri---a uri designator
uri---a puri:uri
Intern URI is a wrapper around puri:intern-uri that provides a
use-value restart, so that a new uri designator can be provided."
(let ((uri (restart-case (puri:intern-uri xuri)
(parse-uri-non-strictly ()
:report "Try a non-strict parse."
:test (lambda (condition)
(and puri:*strict-parse*
(typep condition 'puri:uri-parse-error)))
(let ((puri:*strict-parse* nil))
(intern-uri xuri)))
(use-value (value okp)
:report "Use a different URI."
:interactive prompt-for-uri
(if okp
(intern-uri value)
(intern-uri xuri))))))
(prog1 uri
(when (and (rdf-namespace-prefixed-p uri)
(not (rdf-name-p uri)))
(warn 'rdf-prefixed-non-rdf-name :name uri)))))
(defun merge-uris (uri base)
"merge-uris uri base => interned-uri
uri, base---uri designators
interned-uri---a uri
merge-uris is a convenience function that merges uri and base and
interns the result."
(intern-uri
(restart-case (puri:merge-uris uri base)
(parse-uri-non-strictly ()
:report "Try a non-strict parse."
:test (lambda (condition)
(and puri:*strict-parse*
(typep condition 'puri:uri-parse-error)))
(let ((puri:*strict-parse* nil))
(merge-uris uri base)))
(use-value (value okp)
:report "Supply a different uri to merge with base."
:interactive prompt-for-uri
(if okp
(merge-uris value base)
(merge-uris uri base)))
(use-value (value okp)
:report "Use a different URI."
:interactive prompt-for-uri
(if okp
(intern-uri value)
(merge-uris uri base))))))
(defun add-fragment (id base)
"add-fragment id base => interned-uri
id---a uri fragment id
base---a uri designator
Add-fragment merges \"#<id>\" with base, and returns the result of
interning the merged URI."
(merge-uris (strcat #\# id) base))
;;;; RDF Concepts
;;; Blank Nodes
(defclass blank-node ()
((id
:initarg :id
:type string
:reader blank-node-id
:documentation "The graph-local ID of the blank-node. This slot is
unbound if the blank node has no id. The id of a blank node is used to
identify the blank node within the scope of a particular graph. That
two blank-nodes happen to have the same ID is not an indication that
they represent the same blank node."))
(:documentation "The blank-node class represents blank nodes in a
graph. Blanks nodes are local to a graph, and can be identified within
a graph by their ID. The blank-node id is used for refering to the
same blank node in an RDF/XML document, and so in general, blank-nodes
ought to compared using object equality, i.e., eq."))
(defgeneric blank-node-id (blank-node)
(:documentation "blank-node-id blank-node => id
blank-node---a blank-node
id---a string
Blank-node-id returns the ID of the blank-node. Blank-node-ids are
intended to be used for readability purposes. Blank-nodes should be
compared directly using object equality. That two blank-nodes have ids
that are string= does not mean that they represent the same RDF
blank-node."))
(defmethod print-object ((bnode blank-node) stream)
"print-object bnode stream => bnode
bnode---a blank-node
stream---an output stream
Print-objects prints a representation of the blank node to stream. The
representation is not readable."
(prog1 bnode
(if (not (slot-boundp bnode 'id))
(print-unreadable-object (bnode stream :identity t :type t))
(print-unreadable-object (bnode stream :type t)
(write-string (blank-node-id bnode) stream)))))
(defvar *blank-nodes* nil
"An equal hash table that is used for `interning' blank node IDs
within the scope of a single graph. Initially nil, but should be bound
for each graph being processed.")
(defun blank-node (&optional (id nil idp) (namespace *blank-nodes*))
"blank-node [id [namespace]] => blank-node
id---a string
namespace---an equal hash-table
Blank-node returns a blank node. If id is specified, then if there is
already a blank node in namespace whose id is equal to id, then that
blank node is returned. Otherwise, a new blank node is created,
inserted into namespace, and returned. If id is not specified, then a
new blank node is returned, and namespace is not modified."
(if (not idp) (make-instance 'blank-node)
(multiple-value-bind (bnode present?) (gethash id namespace)
(if present? bnode
(setf (gethash id namespace)
(make-instance 'blank-node :id id))))))
;;; Literals
(defclass literal ()
((string
:initarg :string
:reader literal-string
:type string
:documentation "The lexical form of the literal."))
(:documentation "The literal class is the superclass of both the
plain-literal and the typed literal. Every literal has some lexical
form, and the slot storing this form is defined in the literal class,
and may be read with literal-string."))
(defgeneric literal-string (literal)
(:documentation "literal-string literal => string
literal---a literal
string---a string
Literal-string returns the lexical form of the literal."))
;; plain-literals
(defclass plain-literal (literal)
((language
:initarg :language
:reader literal-language
:type (or null string)
:documentation "The language tag associated with a plain literal.
If language is nil, the the plain-literal has no language type. If
language is non-nil, it should be a string that conforms to RDF 3066."))
(:documentation "The plain-literal class is the class comprising all
plain-literals. These literals have a lexical form, inherited from the
superclass literal, and an optional language tag. The language tag,
when provided, should be of the form specified by RFC 3066, and is
normalized to lowercase."))
(defgeneric literal-language (plain-literal)
(:documentation "literal-language plain-literal => result
plain-literal---a plain-literal
result---a string or nil
Literal-language return the language tag of the plain-literal, if
there is one, and nil if no language tag is associated with the
literal."))
(defmethod print-object ((literal plain-literal) stream)
"print-object plain-literal stream => plain-literal
plain-literal---a plain-literal
stream---an output stream
Print-object prints the plain-literal in a form similar to the W3C RDF
Validation output. The literal string appears with double quotation
marks, and the language-tag, if provided is output with an asperand."
(prog1 literal
(print-unreadable-object (literal stream)
(write (literal-string literal) :stream stream)
(unless (null (literal-language literal))
(write-char #\@ stream)
(write-string (literal-language literal) stream)))))
(defvar *plain-literals* (make-hash-table :test 'equal)
"An equal hash-table used for interning plain literals, that is,
literals with a string component and an optional language tag.")
(defun intern-plain-literal (string &optional (language nil))
"intern-plain-literal string [language] => plain-literal
string, language---strings
plain-literal---a plain literal
Intern-plain-literal returns a literal with the specified string and
language. Calls to intern-plain-literal with strings that are equal
and languages that are equal return the same literal object."
(let* ((language (etypecase language
(null language)
(string (string-downcase language))))
(key (cons string language)))
(multiple-value-bind (literal present?)
(gethash key *plain-literals*)
(if present? literal
(setf (gethash key *plain-literals*)
(make-instance 'plain-literal
:string string
:language language))))))
;; typed-literals
(defclass typed-literal (literal)
((datatype
:initarg :datatype
:reader literal-datatype
:type puri:uri
:documentation "The datatype of a typed-literal, which is not
optional, is a URI designating the datatype."))
(:documentation "The typed-literal class is the class comprising all
typed-literals. These literals have a lexical form, inherited from the
superclass literal, and a required datatype. The datatype is a
puri:uri."))
(defgeneric literal-datatype (typed-literal)
(:documentation "literal-datatype typed-literal => datatype
typed-literal---a typed-literal
datatype---an interned PURI uri
Literal-datatype returns the datatype of a typed-literal. The datatype
URI is interned, and may be compared with eq."))
(defmethod print-object ((literal typed-literal) stream)
"print-object typed-literal stream => typed-literal
typed-literal---a typed-literal
stream---an output stream
Print-object prints the typed-literal in a form similar to the W3C RDF
Validation output. The literal string appears with double quotation
marks, and the datatype URI is printed with a ^^ prefix."
(prog1 literal
(print-unreadable-object (literal stream)
(write (literal-string literal) :stream stream)
(write-string "^^" stream)
(puri:render-uri (literal-datatype literal) stream))))
(defvar *typed-literals* (make-hash-table :test 'equal)
"An equal hash-table used for interning typed literals, that is,
literals with a string component and a datatype.")
(defun intern-typed-literal (string datatype)
"intern-plain-literal string datatype => typed-literal
string---a string
datatype---a URI designator
typed-literal---a typed literal
intern-typed-literal returns a literal with the specified string and
datatype. Calls to intern-plain-literal with strings that are equal
and designators for the same URI return the same literal object."
(let* ((datatype (intern-uri datatype))
(key (cons string datatype)))
(multiple-value-bind (literal present?)
(gethash key *typed-literals*)
(if present? literal
(setf (gethash key *typed-literals*)
(make-instance 'typed-literal
:string string
:datatype datatype))))))
;;;; XML Utilities
(defvar +xml-namespace+
"http://www.w3.org/XML/1998/namespace"
"The string form of the URI XML namespace.")
(defun id-name-p (string)
"id-name-p string => boolean
string---a string
Id-name-p returns true if string is a valid XML NCName, which are the
only valid names attribute values for rdf:ID and rdf:nodeID."
(declare (string string))
(and (cxml::valid-name-p string)
;; cxml::nc-name-p assumes string is a valid name.
(cxml::nc-name-p string)))
;;; Whitespace
(declaim (inline xml-whitespace-p))
(defun xml-whitespace-p (string)
"xml-whitespace-p string => boolean
string---a string-designator
xml-whitespace-p returns true if every element of the string is an XML
whitespace character (i.e., is #x20, #x9, #xD, or #xA), or if string is
the empty string."
(loop for char across (string string)
for code = (char-code char)
always (or (eql code #x20)
(eql code #x9)
(eql code #xD)
(eql code #xA))))
(defun peek-skipping-comments (source)
"peek-skipping-comments source => result*
souce---a cxml:source
Peek-skipping-comments returns the same values that klacks:peek
returns, with the exception that if the next event from source is
:comments, the event is consumed. The values of the first non-comment
event are returned."
;; thanks to Red Daly, [email protected]
(loop while (eq :comment (klacks:peek source))
do (klacks:consume source)
finally (return (klacks:peek source))))
(defun consume-whitespace (source)
"consume-whitespace source => |
source---a cxml source
Consume-whitespace peeks and consumes events from source while the
events are of type :characters and the associated string satisfies
xml-whitespace-p, or if the event is :comment."
(declare (optimize (speed 3) (safety 0)))
(loop for event = (klacks:peek source)
while (or (and (eq :characters event)
(xml-whitespace-p (klacks:peek-value source)))
(eq :dtd event)
(eq :start-document event)
(eq :comment event))
do (klacks:consume source)))
;;; Element and Attribute Utilities
(defun element-uri (source)
"element-uri source => uri
source---a cxml:source
uri---a URI
Element-uri returns the result of interning the concatenation of the
current element's uri and lname. Element-uri calls klacks:current-uri
and klacks:current-lname, which signal an error if the current event is
not :start-element or :end-element."
(intern-uri
(strcat (klacks:current-uri source)
(klacks:current-lname source))))
(defun prefixp (prefix sequence &key (test 'eql))
"prefixp prefix sequence => boolean, properp
prefix, string---sequences
properp---a boolean
prefixp returns true if prefix is a prefix of sequence. Elements of
prefix and sequence are compared using test. A second value, properp,
is true when the length of the prefix is less than the length of the
sequence, regardless of whether prefix is actually a prefix of
sequence."
(let ((lp (length prefix))
(ls (length sequence)))
(values (and (every test prefix sequence)
(<= lp ls))
(< lp ls))))
(defun xml-lang-attribute-p (attribute)
"xml-lang-attribute-p attribute => boolean
attribute---a sax:attribute
xml-lang-attribute-p returns true if attribute has a local-name
\"name\" and a namespace-uri +xml-namespace+."
(and (string= "name" (sax:attribute-local-name attribute))
(string= +xml-namespace+ (sax:attribute-namespace-uri attribute))))
(defun xml-attribute-p (attribute)
"xml-attribute-p attribute => boolean
attribute---a sax:attribute
Xml-attribute-p returns true if attribute has a prefix that begins
with \"xml\", or if the attribute has no prefix and the attribute's
local name begins with \"xml\". or if attribute satisfies
xml-lang-attribute-p."
(or (xml-lang-attribute-p attribute)
(prefixp "xml" (sax:attribute-qname attribute) :test 'char-equal)))
(defun attribute-uri (attribute source)
"attribute-uri attribute source => uri
attribute---a sax:attribute
source---a cxml:source
uri---a PURI uri
Attribute-uri returns the URi associated with the attribute, as
described by Section 6.1.4 Attribute Event in the RDF/XML Syntax
specification. In general this is the concatenation of the namespace
URI with the local name. If no namespace is provided, and the local
name is ID, about, resource, parseType, or type, then the
corresponding RDF term URI is returned. Otherwise, an error is
signalled. "
(let ((local-name (sax:attribute-local-name attribute))
(namespace (sax:attribute-namespace-uri attribute)))
(cond
((not (null namespace))
(intern-uri (strcat namespace local-name)))
((find local-name #("ID" "about" "resource" "parseType" "type")
:test 'string=)
(warn 'non-namespaced-name :name local-name)
(intern-uri (strcat +rdf-namespace+ local-name)))
(t (error 'non-namespaced-attribute
:attribute local-name
:source source)))))
(defvar *element-ids* nil
"During parsing, an 'equal hash table that functions as a set of the
IDs that have been processed. Duplicate IDs in an RDF/XML document are
prohibited, i.e., it is an error for two elements to have the same
value on an rdf:ID attribute.")
(defun unique-attributes (attributes source)
"unique-attributes attributes source => uniques
attributes, uniques---associaation lists
source---a cxml:source
Unique-attributes returns an association list that no entries with the
same key. The entries of the new association list are those of the
original association list, but in the event that a duplicate is
detected, an error of type duplicate-attribute is signalled, and
various options are provided."
(let ((ignored-attributes '())
(new-attributes '()))
;; For each entry in the attributes, if the attribute is not being
;; ignored, check whether a value for the attribute has already
;; been provided. If it has not, then save the value. Otherwise,
;; signal an error of type duplicate-attribute. Restarts are:
;; ignore /all/ occurances of this attribute in attributes
;; (including the ones seen earlier), selecting the old value, and
;; selecting the new value.
(doalist ((uri value pair) attributes new-attributes)
(unless (member uri ignored-attributes)
(let ((old-value (assoc uri new-attributes)))
(if (null old-value) (push pair new-attributes)
(restart-case (error 'duplicate-attribute
:attribute uri :source source)
(ignore-attribute ()
:report "Ignore this attribute."
(push uri ignored-attributes)
(setf new-attributes (delete old-value new-attributes)))
(use-old-value ()
:report
(lambda (o)
(format o "Use old attribute value ~S." (cdr old-value))))
(use-new-value ()
:report
(lambda (o)
(format o "Use new attribute value ~S." value))
(setf (cdr old-value) value)))))))))
(defun ensure-non-repeated-id (attributes source)
"ensure-non-repeated-id attributes => new-attributes
attributes, new-attributes---association lists
Ensure-non-repeated-id ensures that if the association list attributes
contains a value for the key rdf:ID, that *element-ids* does not
currently map value to t, that is, that the element has not appeared
on another element. If there is an rdf:ID value and it has already
been specified on another element, restarts include using this value
anyway, ignoring the rdf:ID attribute, and using a different value. In
any of the cases that an ID is eventually specified, *element-ids* is
updated to include the new ID."
(let ((pair (assoc +rdf-id+ attributes)))
(if (null pair) attributes
(do () ((not (gethash (cdr pair) *element-ids*))
(setf (gethash (cdr pair) *element-ids*) t)
attributes)
(restart-case (error 'repeated-id
:attribute +rdf-id+
:value (cdr pair)
:source source)
(nil ()
:report "Use it anyway."
(return attributes))
(ignore-attribute ()
:report "Ignore it on this element."
(return (delete pair attributes)))
(use-value (new-value okp)
:report "Use a different ID."
:interactive (lambda ()
(multiple-value-list
(prompt-for-line "Enter an ID:")))
(when okp (setf (cdr pair) new-value))))))))
(defun element-attributes (source)
"element-attributes source => attributes
source---a cxml:source
attributes---an alist
Element-attributes returns an association list whose keys are the
attribute URIs of the attributes of the current element and whose
values are the corresponding values. The attributes used in RDF/XML
are the atributes of the element, except that xml attributes are
removed (those satisfying xml-attribute-p) according to Section 6.1.2
\"Element Event\" in the RDF/XML Syntax Specification."
(let* ((attributes (klacks:list-attributes source))
;; remove XML attributes
(attributes (remove-if 'xml-attribute-p attributes))
;; build an association list
(attributes (loop
for attribute in attributes
collecting (cons (attribute-uri attribute source)
(sax:attribute-value attribute))))
;; check for duplicate attributes
(attributes (unique-attributes attributes source))
;; check for repeated IDs
(attributes (ensure-non-repeated-id attributes source)))
;; return the attributes
attributes))
;;;; Language processing
(defun language-tag-p (string)
"language-tag-p string => boolean
string---a string
language-tag-p return true if the designated string is in the form
specified by RFC-3066. the general form of such a language tag is
<ALPHA>{1,8} ['-' [<ALPHA> | <DIGIT>]{1,8}]*, that is, a string of at
least 1 and at most 8 alphabetic characters followed by any number of
subtags that are a hypen followed by at least 1 and at most 8
alphabetic or digit characters. RFC-3066 also specifies what
particular strings may appear based on country codes, etc., but these
constraints are not enforced here."
(labels ((alphap (x)
(or (char<= #\a x #\z)
(char<= #\A x #\Z)))
(digitp (x)
(char<= #\0 x #\9))
(primary-subtag (stream)
(loop for char = (peek-char nil stream nil nil)
until (or (null char)
(not (alphap char)))
counting (read-char stream) into count
finally (return (<= 1 count 8))))
(subtag (stream)
(loop for char = (peek-char nil stream nil nil)
until (or (null char)
(not (or (alphap char)
(digitp char))))
counting (read-char stream) into count
finally (return (<= 1 count 8)))))
(with-input-from-string (stream string)
(when (primary-subtag stream)
(loop for char = (peek-char nil stream nil nil)
until (null char)
do (read-char stream)
always (and (char= char #\-)
(subtag stream)))))))
(defvar *current-xml-lang* nil
"The most recent xml:lang attribute that was specified in the
RDF/XML text. The initial value is nil, and *current-xml-lang* is
always rebound to nil when document parsing begins.")
(defun immediate-xml-lang (source)
"immediate-xml-lang source => result
source---a cxml:source
result---a string or nil
Immediate-xml-lang returns the value of the xml:lang attribute on the
source. Source's current event should be :start-element. If the
attribute is specified, its value, a string, is returned. If the
attribute is not specified, nil is returned."
(let ((lang (klacks:get-attribute source "lang" +xml-namespace+)))
(do () ((or (null lang)
(string= "" lang)
(language-tag-p lang))
lang)
(restart-case (error 'invalid-language-tag :tag lang :source source)
(use-value (value)
:report "Use a different language tag."
:interactive (lambda ()
(list (prompt-for-line "Enter a new language tag:")))
(setf lang value))
(ignore-language ()
:report "Ignore the bad language tag. (Treat as if ~
xml:lang attribute had not been provided.)"
(setf lang nil))))))
(defmacro with-xml-lang (lang &body body)
"with-xml-lang lang form*
With-xml-lang evalutes lang to generate a new language, and evalutes
body with a new binding of *current-xml-lang*. If the result of
evalating lang is null, then *current-xml-lang* is rebound to its
current value, if it is \"\", then *current-xml-lang* is bound to nil,
otherwise, *current-xml-lang* is bound to the new language."
(let ((%lang (make-symbol "LANGUAGE")))
`(let* ((,%lang ,lang)
(*current-xml-lang*
(cond ((null ,%lang) *current-xml-lang*)
((string= "" ,%lang) nil)
(t ,%lang))))
,@body)))
;;;; li-counters
(defvar *li-counter* '()
"A list of li-counters. With each expecting-element', a new counter
is pushed onto *li-counter*, and so incrementing the counter of an
element's parent is done by (incf (cadr *li-counter*)).")
(defun expanded-li-uri ()
"expanded-li-uri => uri
uri---a puri:uri
Expanded-li-uri returns the uri generated by incrementing the current
element's parent's li counter and adding the fragment _n (where n is
the incremented counter) to the RDF namespace."
(let ((n (incf (cadr *li-counter*))))
(add-fragment (strcat #\_ (write-to-string n)) +rdf-namespace+)))
;;;; Expecting Element
(defun check-for-illegal-namespace-names (attributes)
"check-for-illegal-namespace-names attributes => |
attributes---a list of attributes
check-for-illegal-namespace-names enforces the restriction of Section
5.1 of the RDF/XML Syntax Specification that states that \"within
RDF/XML documents it is not permitted to use XML namespaces whose
namespace name is the RDF namespace URI reference concatenated with
additional characters.\" If such a namespace binding is encountered,
an error of type illegal-namespace-name is signalled."
(dolist (namespace-declaration
(cxml::find-namespace-declarations attributes))
(destructuring-bind (prefix . uri) namespace-declaration
(multiple-value-bind (prefixp properp) (prefixp +rdf-namespace+ uri)
(when (and prefixp properp)
(error 'illegal-namespace-name :prefix prefix :uri uri))))))
(defmacro expecting-element
((source &optional (lname nil lnamep) (uri nil urip)) &body body)
"expecting-element (source [lname [uri]]) form*
expecting-element is a wrapper around klacks:expecting element that
ensures proper binding of the *current-xml-lang* variable, so that
plain literals can properly inherit the value of xml:lang attributes.
Within this RDF/XML parser, expecting-element should always be used
rather than klacks:expecting-element."
(let ((%source (make-symbol "SOURCE")))
`(let ((,%source ,source))
(klacks:expecting-element (,%source
,@(when lnamep `(,lname))
,@(when urip `(,uri)))
;; TODO: The attributes are already being extracted here, so
;; it might be worthwhile to process them all here and
;; somehow cache them (and partition them based on how
;; they're used) so that we don't extract more times than
;; necessary.
(check-for-illegal-namespace-names
(klacks:list-attributes ,%source))
(with-xml-lang (immediate-xml-lang ,%source)
(let ((*li-counter* (cons 0 *li-counter*)))
,@body))))))
;;;; Conditions
(defun current-position (source)
"current-position source => position
source---a cxml:source
position---a string
Current-position returns a string of the form <line>.<column>
indicating the approximate position of source."
(format nil "~s.~s"
(klacks:current-line-number source)
(klacks:current-column-number source)))
(define-condition rdfxml-warning (warning) ()
(:documentation
"The class of warnings signalled by the RDF/XML parser."))
(define-condition rdf-prefixed-non-rdf-name (rdfxml-warning)
((name :initarg :name :reader name))
(:documentation "According to \"Section 5.1 The RDF Namespace and
Vocabulary\" of the RDF/XML Syntax Specification, certain names are
defined as RDF names, and these begin with the RDF namespace name, but
\"any other names [beginning with the RDF namespace name] are not
defined and SHOULD generate a warning when encountered, but should
otherwise behave normally.\" rdf-prefixed-non-rdf-name is the class of
warnings that are signalled in such situations.")
(:report
(lambda (condition stream)
(format stream "The name ~A was encountered, which begins with ~
the RDF namespace name, but is not a defined ~
RDF name. (See Section 5.1 of the RDF/XML ~
Syntax Specification for more information."
(name condition)))))
(defvar *warn-on-rdf-prefixed-non-rdf-names* t
"According to to Section 5.1, The RDF Namespace and Vocabulary of
the RDF/XML Syntax Specification, warnings SHOULD be generated when a
name is encountered that begins with the RDF namespace name, but is
not an RDF name. If *warn-on-rdf-prefixed-non-rdf-names* is true (the
default), then such warnings are generated, but are muffled otherwise.")
(define-condition non-namespaced-name (rdfxml-warning)
((name :initarg :name :reader name))
(:documentation "According to 6.1.4 of the RDF/XML Syntax
Specification, the attributes ID, about, resource, parseType, and type
may appear without a namespace prefix, and are interpreted as the
corresponding RDF names. Also, \"new documents SHOULD NOT use these
unqualified attributes, and applications MAY choose to warn when the
unqualified form is seen in a document.\" non-namespaced-name is the
class of warnings that are signalled in such situations.")
(:report
(lambda (condition stream)
(format stream "Unqualified attribute ~A is being placed into ~
the RDF namespace according to Section 6.1.4 of ~
the RDF/XML Syntax Specification, but new ~
documents should not use unqualified attribues."
(name condition)))))
(defvar *warn-on-non-namespaced-names* t
"A boolean (whose default value is true) that controls whether a
warning is signalled when a permitted non-namespaced attribute is
encountered. The only attributes which may appear without namespaces
are ID, about, resource, parseType, and type. New documents should not
use unqualified forms, though they may appear in legacy documents. See
Section 6.1.4 of the RDF/XML Syntax Specification.")
(define-condition other-parse-type (rdfxml-warning)
((parse-type :initarg :parse-type :reader parse-type))
(:documentation "The rdf:parseType attribute has three explicitly
meaning values, \"Resource\", \"Literal\", and \"Collection\". If
rdf:parseType is encountered with a different value, the element is
processed as though the value had been \"Literal\". The specification
does not indicate that a warning should be signalled, and so such
warnings are not generated in the default case, but if the user
requests warnings on such attribute values, a warning of type
other-parse-type is signalled.")
(:report
(lambda (condition stream)
(format stream "The rdf:parseType attribute appeared with value ~
~S, which is not one of \"Literal\", ~
\"Resource\", or \"Collection\". The element ~
will be processed as though the value was ~
\"Literal\"." (parse-type condition)))))
(defvar *warn-on-parse-type-other* nil
"A boolean (whose default value is false) that controls whether a
warning is signalled when an element is encountered that specifies the
rdf:parseType attribute with a value other than \"Literal\",
\"Resource\", or \"Collection\". Such an element is treated as though
the value were \"Literal\", and this situation is not an error.
Nonetheless, it seems likely that one might be interested in knowing
when it occurs.")
;;; Errors
(define-condition rdfxml-error (error)
((source :initarg :source :reader source))
(:documentation "The class of errors signalled by the RDF/XML parser."))
;;; Illegal namespace name
(define-condition illegal-namespace-name (rdfxml-error)
((prefix :initarg :prefix :reader prefix)
(uri :initarg :uri :reader uri))
(:documentation "According to Section 5.1 of the RDF/XML Syntax
Specification, Within RDF/XML documents it is not permitted to use XML
namespaces whose namespace name is the RDF namespace URI reference
concatenated with additional characters.")
(:report
(lambda (condition stream)
(format stream "Within RDF/XML documents it is not permitted to ~
use XML namespaces whose namespace name is the ~
RDF namespace URI reference concatenated with ~
additional characters, but the prefix ~S would ~
have been bound to the name ~S."
(prefix condition)
(uri condition)))))
;;; Bad attribute value
(define-condition invalid-attribute-value (rdfxml-error)
((value :initarg :value :reader value)
(attribute :initarg :attribute :reader attribute))
(:documentation "Conditions of type invalid-attribute-value are
signalled when an attribute value is not appropriate for the
particular attribute. This kind of situation may happen, for instance,
if a xml:lang value is not RFC 3066 compliant, or if an rdf:ID or
rdf:nodeID value is not an XML NCName. Note that these situations are
distinct from those in which an attribute appears where it should not."))
(define-condition repeated-id (invalid-attribute-value) ()
(:documentation "Errors of type repeated-id are signalled when the
value of an rdf:ID on an element is the same as the value of an rdf:ID
attribute on another element. rdf:IDs should be unique within the a
document.")
(:report
(lambda (condition stream)
(format stream "Near ~A, rdf:ID appeared with value ~S, but ~
an element has already appeared with this same ID."
(current-position (source condition))
(value condition)))))
(define-condition non-nc-name-id (invalid-attribute-value) ()
(:documentation "Errors of type non-nc-name-id are raised when
attributes rdf:ID or rdf:nodeID appear with values that are not valid
NCNames.")
(:report
(lambda (condition stream)
(format stream "Near ~A attribute ~S appeared with value ~S ~
which should be, but is not, an XML NCName."
(current-position (source condition))
(attribute condition)
(value condition)))))
(define-condition invalid-language-tag (invalid-attribute-value)
((tag :initarg :tag :reader tag))
(:documentation "Language tags in RDF/XML (and more generally, XML)
must be in accordance with RFC 3066. When a language tag is specified
that is not of the proper form, an error of type invalid-language-tag
is signalled.")
(:report
(lambda (condition stream)
(format stream "~S is not an RFC 3066 compliant language tag, ~
but was encountered near ~a."
(tag condition)
(current-position (source condition))))))
;;; Unexpected characters
(define-condition unexpected-characters (rdfxml-error)
((characters :initarg :characters :reader characters))
(:documentation "Excess whitespace is always permitted between
elements, but arbitrary character data is not. When non-whitespace
character data is encountered where whitespace is expected, an error
of type unexpected characters is signalled.")
(:report
(lambda (condition stream)
(format stream "The text ~s appeared near ~a, but only whitespace
is permitted."
(characters condition)
(current-position (source condition))))))
;;; Duplicate attributes
(define-condition duplicate-attribute (rdfxml-error)
((attribute :initarg :attribute :reader attribute))
(:documentation "Errors of type duplicate-attribute are signalled
when attributes are specified more than once and the XML parser did
not flag the error. This happens when, according to the RDF/XML
specification, certain non-namespaced attributes are interpreted as
being in the RDF namespace. A duplicate attribute can appear, for
instance, when rdf:ID is specified in conjunction with ID, which is