Skip to content

Commit

Permalink
pkp/pkp-lib#10365 update parsing for onix xml format
Browse files Browse the repository at this point in the history
  • Loading branch information
kaitlinnewson committed Oct 18, 2024
1 parent 8de64df commit 7b3ac0e
Show file tree
Hide file tree
Showing 26 changed files with 219 additions and 171 deletions.
6 changes: 3 additions & 3 deletions classes/codelist/CodelistItem.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public function getText(): string
*/
public function setText(string $text): void
{
return $this->setData('text', $text);
$this->setData('text', $text);
}

/**
Expand All @@ -56,9 +56,9 @@ public function getCode(): string
/**
* Set codelist code.
*/
public function setCode(string $code)
public function setCode(string $code): void
{
return $this->setData('code', $code);
$this->setData('code', $code);
}

/**
Expand Down
8 changes: 4 additions & 4 deletions classes/codelist/ONIXCodelistItem.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ public function getText()
*
* @param string $text
*/
public function setText($text)
public function setText($text): void
{
return $this->setData('text', $text);
$this->setData('text', $text);
}

/**
Expand All @@ -60,9 +60,9 @@ public function getCode()
*
* @param string $code
*/
public function setCode($code)
public function setCode($code): void
{
return $this->setData('code', $code);
$this->setData('code', $code);
}
}

Expand Down
19 changes: 13 additions & 6 deletions classes/codelist/ONIXCodelistItemDAO.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

use Illuminate\Support\Facades\Cache;
use PKP\core\Registry;
use PKP\db\DAO;
use PKP\db\XMLDAO;
use PKP\facades\Locale;
use PKP\file\FileManager;
Expand All @@ -27,7 +28,7 @@
use PKP\plugins\Hook;
use PKP\xslt\XSLTransformer;

class ONIXCodelistItemDAO extends \PKP\db\DAO
class ONIXCodelistItemDAO extends DAO
{
/** @var string The name of the codelist we are interested in */
public string $_list;
Expand All @@ -45,8 +46,7 @@ public function _getCache(?string $locale = null): array

// Reload locale registry file
$xmlDao = new XMLDAO();
$listName = $this->getListName(); // i.e., 'List30'
$handler = new ONIXParserDOMHandler($listName);
$listName = $this->getListName(); // i.e., '30'

$temporaryFileManager = new TemporaryFileManager();
$fileManager = new FileManager();
Expand All @@ -63,7 +63,13 @@ public function _getCache(?string $locale = null): array
$xslTransformer->setRegisterPHPFunctions(true);

$xslFile = 'lib/pkp/xml/onixFilter.xsl';
$filteredXml = $xslTransformer->transform($filename, XSLTransformer::XSL_TRANSFORMER_DOCTYPE_FILE, $xslFile, XSLTransformer::XSL_TRANSFORMER_DOCTYPE_FILE, XSLTransformer::XSL_TRANSFORMER_DOCTYPE_STRING);
$filteredXml = $xslTransformer->transform(
$filename,
XSLTransformer::XSL_TRANSFORMER_DOCTYPE_FILE,
$xslFile,
XSLTransformer::XSL_TRANSFORMER_DOCTYPE_FILE,
XSLTransformer::XSL_TRANSFORMER_DOCTYPE_STRING
);
if (!$filteredXml) {
throw new \Exception('Unable to generate filtered XML!');
}
Expand All @@ -74,6 +80,7 @@ public function _getCache(?string $locale = null): array
$fp = fopen($tmpName, 'wb');
fwrite($fp, $filteredXml);
fclose($fp);
$handler = new ONIXParserDOMHandler($listName);
$data = $xmlDao->parseWithHandler($tmpName, $handler);
$fileManager->deleteByPath($tmpName);
} else {
Expand Down Expand Up @@ -101,13 +108,13 @@ public function _getCache(?string $locale = null): array
public function getFilename(string $locale): string
{
$masterLocale = LocaleInterface::DEFAULT_LOCALE;
$localizedFile = "locale/{$locale}/ONIX_BookProduct_CodeLists.xsd";
$localizedFile = "locale/{$locale}/ONIX_BookProduct_CodeLists.xml";
if (Locale::isLocaleValid($locale) && file_exists($localizedFile)) {
return $localizedFile;
}

// Fall back on the version for the master locale.
return "locale/{$masterLocale}/ONIX_BookProduct_CodeLists.xsd";
return "locale/{$masterLocale}/ONIX_BookProduct_CodeLists.xml";
}

/**
Expand Down
106 changes: 68 additions & 38 deletions classes/codelist/ONIXParserDOMHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
*
* @see XMLParser
*
* @brief This parser extracts a specific xs:simpleType based on a name attribute
* representing a code list within it. It returns the xs:enumeration values
* within it along with the xs:documentation elements which serve as textual
* descriptions of the Codelist values.
* @brief This parser extracts a specific CodeList based on a CodeListNumber
* representing a code list within it. It returns the Code values
* within it along with the CodeDescription elements which serve as textual
* descriptions of the Code values.
*
* Example: <xs:simpleType name="List30">...</xs:simpleType>
* Example: <CodeListNumber>28</CodeListNumber>
*/

namespace APP\codelist;
Expand All @@ -28,27 +28,36 @@

class ONIXParserDOMHandler extends XMLParserDOMHandler
{
/** @var The list being searched for */
public string $_listName;
/** @var string The list being searched for */
public string $listName;

public bool $_foundRequestedList = false;
public bool $foundRequestedList = false;

/** @var List of items the parser eventually returns */
public ?array $_listItems = [];
/** @var ?array List of items the parser eventually returns */
public ?array $listItems = [];

/** @var string to store the current character data */
public ?string $_currentValue = null;
/** @var ?string to store the current character data */
public ?string $currentValue = null;

/** @var bool currently inside an xs:documentation element */
public bool $_insideDocumentation = false;
/** @var bool currently inside a CodeListNumber element */
public bool $inCodeListNumber = false;

/** @var bool currently inside a CodeValue element */
public bool $inCodeValue = false;

/** @var bool currently inside a CodeDescription element */
public bool $inDescription = false;

/** @var bool currently inside a DeprecatedNumber element */
public bool $inDeprecated = false;

/**
* Constructor.
*/
public function __construct(string $listName)
{
parent::__construct();
$this->_listName = $listName;
$this->listName = $listName;
}

/**
Expand All @@ -59,21 +68,19 @@ public function startElement(PKPXMLParser|XMLParser $parser, string $tag, array
$this->currentData = null;

switch ($tag) {
case 'xs:simpleType':
if ($attributes['name'] == $this->_listName) {
$this->_foundRequestedList = true;
}
case 'CodeListNumber':
$this->inCodeListNumber = true;
break;
case 'xs:enumeration':
if ($this->_foundRequestedList) {
$this->_currentValue = $attributes['value'];
$this->_listItems[$this->_currentValue] = []; // initialize the array cell
case 'CodeValue':
if ($this->foundRequestedList) {
$this->inCodeValue = true;
}
break;
case 'xs:documentation':
if ($this->_foundRequestedList) {
$this->_insideDocumentation = true;
}
case 'CodeDescription':
$this->inDescription = true;
break;
case 'DeprecatedNumber':
$this->inDeprecated = true;
break;
}

Expand All @@ -92,38 +99,61 @@ public function startElement(PKPXMLParser|XMLParser $parser, string $tag, array
/**
* Callback function to act as the character data handler.
*/
public function characterData(PKPXMLParser|XMLParser $parser, string $data)
public function characterData(PKPXMLParser|XMLParser $parser, string $data): void
{
if ($this->_insideDocumentation) {
if (count($this->_listItems[$this->_currentValue]) == 1) {
$this->_listItems[$this->_currentValue][0] .= $data;
if ($this->inCodeListNumber && $this->listName = $data) {
$this->foundRequestedList = true; // @TODO may not need this
}

if ($this->inCodeValue) {
$this->currentValue = $data;
$this->listItems[$data] = []; // initialize the array cell
}

if ($this->inDescription) {
if (count($this->listItems[$this->currentValue]) == 1) {
$this->listItems[$this->currentValue][0] .= $data;
} else {
$this->_listItems[$this->_currentValue][0] = $data;
$this->listItems[$this->currentValue][0] = $data;
}
}

if ($this->inDeprecated) {
$this->listItems[$this->currentValue]['deprecated'] = 1;
}
}

/**
* Callback function to act as the end element handler.
*/
public function endElement(PKPXMLParser|XMLParser $parser, string $tag)
public function endElement(PKPXMLParser|XMLParser $parser, string $tag): void
{
switch ($tag) {
case 'xs:simpleType':
$this->_foundRequestedList = false;
case 'CodeListNumber':
$this->inCodeListNumber = false;
break;
case 'xs:documentation':
$this->_insideDocumentation = false;
case 'CodeValue':
$this->inCodeValue = false;
break;
case 'CodeDescription':
$this->inDescription = false;
break;
case 'DeprecatedNumber':
$this->inDeprecated = false;
break;
}

$this->currentNode->setValue($this->currentData);
$this->currentNode = & $this->currentNode->getParent();
$this->currentData = null;
}

/**
* Returns the array of found list items
*/
public function getResult(): mixed
{
return [$this->_listName => $this->_listItems];
return [$this->listName => $this->listItems];
}
}

Expand Down
2 changes: 1 addition & 1 deletion classes/components/forms/context/MastheadForm.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public function __construct($action, $locales, $context, $imageUploadUrl)

/** @var ONIXCodelistItemDAO */
$onixCodelistItemDao = DAORegistry::getDAO('ONIXCodelistItemDAO');
$codeTypes = $onixCodelistItemDao->getCodes('List44');
$codeTypes = $onixCodelistItemDao->getCodes('44');
$codeTypeOptions = array_map(function ($code, $name) {
return ['value' => $code, 'label' => $name];
}, array_keys($codeTypes), $codeTypes);
Expand Down
6 changes: 3 additions & 3 deletions classes/components/forms/submission/AudienceForm.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ public function __construct($action, $submission)

/** @var ONIXCodelistItemDAO */
$onixCodelistItemDao = DAORegistry::getDAO('ONIXCodelistItemDAO');
$audienceCodes = $this->getOptions($onixCodelistItemDao->getCodes('List28'));
$audienceRangeQualifiers = $this->getOptions($onixCodelistItemDao->getCodes('List30'));
$audienceRanges = $this->getOptions($onixCodelistItemDao->getCodes('List77'));
$audienceCodes = $this->getOptions($onixCodelistItemDao->getCodes('28'));
$audienceRangeQualifiers = $this->getOptions($onixCodelistItemDao->getCodes('30'));
$audienceRanges = $this->getOptions($onixCodelistItemDao->getCodes('77'));

$this->addField(new FieldSelect('audience', [
'label' => __('monograph.audience'),
Expand Down
14 changes: 7 additions & 7 deletions classes/monograph/Representative.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ public function getMonographId()
*
* @param int $monographId
*/
public function setMonographId($monographId)
public function setMonographId($monographId): void
{
return $this->setData('monographId', $monographId);
$this->setData('monographId', $monographId);
}

/**
* Set the ONIX code for this representative role (List93 for Suppliers, List69 for Agents)
*/
public function setRole($role)
public function setRole($role): void
{
$this->setData('role', $role);
}
Expand All @@ -62,19 +62,19 @@ public function getRole()
}

/**
* Get the human readable name for this ONIX code
* Get the human-readable name for this ONIX code
*
* @return string
*/
public function getNameForONIXCode()
{
$onixCodelistItemDao = DAORegistry::getDAO('ONIXCodelistItemDAO'); /** @var ONIXCodelistItemDAO $onixCodelistItemDao */
if ($this->getIsSupplier()) {
$listName = 'List93'; // List93 -> Publisher to retailers, Wholesaler, etc
$listName = '93'; // List 93 -> Supplier role
} else {
$listName = 'List69'; // List93 -> Local Publisher, Sales Agent, etc
$listName = '69'; // List 69 -> Agent role
}
$codes = & $onixCodelistItemDao->getCodes($listName);
$codes = $onixCodelistItemDao->getCodes($listName);
return $codes[$this->getRole()];
}

Expand Down
6 changes: 3 additions & 3 deletions classes/publicationFormat/IdentificationCode.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public function getPublicationFormatId()
*/
public function setPublicationFormatId($publicationFormatId)
{
return $this->setData('publicationFormatId', $publicationFormatId);
$this->setData('publicationFormatId', $publicationFormatId);
}

/**
Expand All @@ -63,14 +63,14 @@ public function getCode()
}

/**
* Get the human readable name for this ONIX code
* Get the human-readable name for this ONIX code
*
* @return string
*/
public function getNameForONIXCode()
{
$onixCodelistItemDao = DAORegistry::getDAO('ONIXCodelistItemDAO'); /** @var ONIXCodelistItemDAO $onixCodelistItemDao */
$codes = & $onixCodelistItemDao->getCodes('List5'); // List5 is for ISBN, GTIN-13, etc.
$codes = $onixCodelistItemDao->getCodes('5'); // List 5 is for product identifier type
return $codes[$this->getCode()];
}

Expand Down
Loading

0 comments on commit 7b3ac0e

Please sign in to comment.