Skip to content

Commit

Permalink
Strip HTML tags from DublinCore Creator tags
Browse files Browse the repository at this point in the history
  • Loading branch information
fguillot committed Sep 9, 2023
1 parent 344a237 commit 36f0136
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,30 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package rdf // import "miniflux.app/v2/internal/reader/rdf"
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"

import (
"strings"

"miniflux.app/v2/internal/reader/sanitizer"
)

// DublinCoreFeedElement represents Dublin Core feed XML elements.
type DublinCoreFeedElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
}

// DublinCoreEntryElement represents Dublin Core entry XML elements.
type DublinCoreEntryElement struct {
func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
}

// DublinCoreItemElement represents Dublin Core entry XML elements.
type DublinCoreItemElement struct {
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}

func (item *DublinCoreItemElement) GetSanitizedCreator() string {
return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
}
28 changes: 28 additions & 0 deletions internal/reader/rdf/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,34 @@ func TestParseItemWithDublicCoreDate(t *testing.T) {
}
}

func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<title>Example</title>
<link>http://example.org</link>
</channel>
<item>
<title>Title</title>
<description>Test</description>
<link>http://example.org/test.html</link>
<dc:creator>&lt;a href=&quot;http://example.org/author1&quot;>Author 1&lt;/a&gt; (University 1), &lt;a href=&quot;http://example.org/author2&quot;>Author 2&lt;/a&gt; (University 2)</dc:creator>
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
</item>
</rdf:RDF>`

feed, err := Parse("http://example.org", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}

expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
if feed.Entries[0].Author != expectedAuthor {
t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
}
}

func TestParseItemWithoutDate(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
Expand Down
9 changes: 5 additions & 4 deletions internal/reader/rdf/rdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
)
Expand All @@ -22,7 +23,7 @@ type rdfFeed struct {
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
Items []rdfItem `xml:"item"`
DublinCoreFeedElement
dublincore.DublinCoreFeedElement
}

func (r *rdfFeed) Transform(baseURL string) *model.Feed {
Expand All @@ -38,7 +39,7 @@ func (r *rdfFeed) Transform(baseURL string) *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
if entry.Author == "" && r.DublinCoreCreator != "" {
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
entry.Author = r.GetSanitizedCreator()
}

if entry.URL == "" {
Expand All @@ -60,7 +61,7 @@ type rdfItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
DublinCoreEntryElement
dublincore.DublinCoreItemElement
}

func (r *rdfItem) Transform() *model.Entry {
Expand Down Expand Up @@ -88,7 +89,7 @@ func (r *rdfItem) entryContent() string {
}

func (r *rdfItem) entryAuthor() string {
return strings.TrimSpace(r.DublinCoreCreator)
return r.GetSanitizedCreator()
}

func (r *rdfItem) entryURL() string {
Expand Down
11 changes: 0 additions & 11 deletions internal/reader/rss/dublincore.go

This file was deleted.

5 changes: 3 additions & 2 deletions internal/reader/rss/rss.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/date"
"miniflux.app/v2/internal/reader/dublincore"
"miniflux.app/v2/internal/reader/media"
"miniflux.app/v2/internal/reader/sanitizer"
"miniflux.app/v2/internal/urllib"
Expand Down Expand Up @@ -182,7 +183,7 @@ type rssItem struct {
CommentLinks []rssCommentLink `xml:"comments"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
Categories []rssCategory `xml:"category"`
DublinCoreElement
dublincore.DublinCoreItemElement
FeedBurnerElement
PodcastEntryElement
media.Element
Expand Down Expand Up @@ -250,7 +251,7 @@ func (r *rssItem) entryAuthor() string {
}

if author == "" {
author = r.DublinCoreCreator
author = r.GetSanitizedCreator()
}

return sanitizer.StripTags(strings.TrimSpace(author))
Expand Down

0 comments on commit 36f0136

Please sign in to comment.