Skip to content

Commit

Permalink
Merge pull request #314 from wsexport/metadata-entities
Browse files Browse the repository at this point in the history
Don't encode HTML entities before use
  • Loading branch information
dayllanmaza authored Jan 15, 2021
2 parents f5f0628 + ddc095c commit bacc93b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
27 changes: 17 additions & 10 deletions src/BookProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,14 @@ public function getMulti( array $titles, $isMetadata = false ) {
return $pages;
}

public function getMetadata( $title, $isMetadata, DOMDocument $doc ) {
/**
* Get metadata etc. from a XHTML document.
* @param string $title The book's titlepage's page name.
* @param bool $isMetadata Whether this the book's content, chapters, credits, and pictures should also be extracted from the document.
* @param DOMDocument $doc The document to read metadata from.
* @return Book
*/
public function getMetadata( string $title, bool $isMetadata, DOMDocument $doc ): Book {
$pageList = [ $title ];
$parser = new PageParser( $doc );
$book = new Book();
Expand All @@ -76,18 +83,18 @@ public function getMetadata( $title, $isMetadata, DOMDocument $doc ) {
}

$book->type = $metadataParser->getMetadata( 'ws-type' );
$book->name = htmlspecialchars( $metadataParser->getMetadata( 'ws-title' ) );
$book->name = $metadataParser->getMetadata( 'ws-title' );
if ( $book->name == '' ) {
$book->name = $this->removeNamespacesFromTitle( str_replace( '_', ' ', $metadataSrc ) );
}
$book->periodical = htmlspecialchars( $metadataParser->getMetadata( 'ws-periodical' ) );
$book->author = htmlspecialchars( $metadataParser->getMetadata( 'ws-author' ) );
$book->translator = htmlspecialchars( $metadataParser->getMetadata( 'ws-translator' ) );
$book->illustrator = htmlspecialchars( $metadataParser->getMetadata( 'ws-illustrator' ) );
$book->school = htmlspecialchars( $metadataParser->getMetadata( 'ws-school' ) );
$book->publisher = htmlspecialchars( $metadataParser->getMetadata( 'ws-publisher' ) );
$book->year = htmlspecialchars( $metadataParser->getMetadata( 'ws-year' ) );
$book->place = htmlspecialchars( $metadataParser->getMetadata( 'ws-place' ) );
$book->periodical = $metadataParser->getMetadata( 'ws-periodical' );
$book->author = $metadataParser->getMetadata( 'ws-author' );
$book->translator = $metadataParser->getMetadata( 'ws-translator' );
$book->illustrator = $metadataParser->getMetadata( 'ws-illustrator' );
$book->school = $metadataParser->getMetadata( 'ws-school' );
$book->publisher = $metadataParser->getMetadata( 'ws-publisher' );
$book->year = $metadataParser->getMetadata( 'ws-year' );
$book->place = $metadataParser->getMetadata( 'ws-place' );
$book->key = $metadataParser->getMetadata( 'ws-key' );
$book->progress = $metadataParser->getMetadata( 'ws-progress' );
$book->volume = $metadataParser->getMetadata( 'ws-volume' );
Expand Down
25 changes: 5 additions & 20 deletions src/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,19 @@
use DOMDocument;

/**
* @author Thomas Pellissier Tanon
* @copyright 2011 Thomas Pellissier Tanon
* @license GPL-2.0-or-later
*/

/**
* container for a page of Wikisource
* Container for a page of Wikisource.
*/
class Page {

/**
* title of the book in Wikisource
*/
/** @var string Wiki page name of the page on Wikisource. */
public $title = '';

/**
* name to display
*/
/** @var string The page's actual title, e.g. a page with title 'Foo/Bar' might have a name of 'Foo, Bar'. */
public $name = '';

/**
* content of the page
* @type DOMDocument
*/
/** @var DOMDocument Content of the page. */
public $content = null;

/**
* list of the subpages as Page object
*/
/** @var Page[] List of the subpages. */
public $chapters = [];
}
8 changes: 8 additions & 0 deletions tests/Book/BookProviderTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,12 @@ private function parseDocument( $filename ) {
$this->assertTrue( $doc->loadHTMLFile( $filename ), 'parsing of "' . $filename . '"" failed' );
return $doc;
}

public function testTitleEntities() {
$doc = new DOMDocument();
$doc->loadHTML( '<body><p class="ws-title">Title &amp; name</p></body>' );
$book = $this->bookProvider->getMetadata( 'Title name', false, $doc );
$this->assertSame( 'Title name', $book->title );
$this->assertSame( 'Title & name', $book->name );
}
}

0 comments on commit bacc93b

Please sign in to comment.