diff --git a/assets/patches/hocr.patch b/assets/patches/hocr.patch new file mode 100644 index 0000000..dad25e6 --- /dev/null +++ b/assets/patches/hocr.patch @@ -0,0 +1,176 @@ +diff --git a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php +index 804153ab..fc31c36a 100644 +--- a/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php ++++ b/modules/islandora_iiif/src/Plugin/views/style/IIIFManifest.php +@@ -11,6 +11,7 @@ use Drupal\Core\Field\FieldItemInterface; + use Drupal\Core\Form\FormStateInterface; + use Drupal\Core\Messenger\MessengerInterface; + use Drupal\Core\Url; ++use Drupal\islandora\IslandoraUtils; + use Drupal\views\Plugin\views\style\StylePluginBase; + use Drupal\views\ResultRow; + use GuzzleHttp\Client; +@@ -35,6 +36,13 @@ use Symfony\Component\HttpFoundation\Request; + */ + class IIIFManifest extends StylePluginBase { + ++ /** ++ * Islandora utility functions. ++ * ++ * @var \Drupal\islandora\IslandoraUtils ++ */ ++ protected $utils; ++ + /** + * {@inheritdoc} + */ +@@ -108,10 +116,17 @@ class IIIFManifest extends StylePluginBase { + */ + protected $moduleHandler; + ++ /* ++ * The media use term for structured OCR text. ++ * ++ * @var \Drupal\taxonomy\TermInterface|null ++ */ ++ protected $structuredTextTerm; ++ + /** + * {@inheritdoc} + */ +- public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler) { ++ public function __construct(array $configuration, $plugin_id, $plugin_definition, SerializerInterface $serializer, Request $request, ImmutableConfig $iiif_config, EntityTypeManagerInterface $entity_type_manager, FileSystemInterface $file_system, Client $http_client, MessengerInterface $messenger, ModuleHandlerInterface $moduleHandler, IslandoraUtils $utils) { + parent::__construct($configuration, $plugin_id, $plugin_definition); + + $this->serializer = $serializer; +@@ -122,6 +137,10 @@ class IIIFManifest extends StylePluginBase { + $this->httpClient = $http_client; + $this->messenger = $messenger; + $this->moduleHandler = $moduleHandler; ++ $this->utils = $utils; ++ $this->structured_text_term = isset($this->options['structured_text_term_uri']) ++ ? $this->utils->getTermForUri($this->options['structured_text_term_uri']) ++ : FALSE; + } + + /** +@@ -139,7 +158,8 @@ class IIIFManifest extends StylePluginBase { + $container->get('file_system'), + $container->get('http_client'), + $container->get('messenger'), +- $container->get('module_handler') ++ $container->get('module_handler'), ++ $container->get('islandora.utils') + ); + } + +@@ -157,6 +177,7 @@ class IIIFManifest extends StylePluginBase { + * {@inheritdoc} + */ + public function render() { ++ $this->structuredTextTerm = $this->utils->getTermForUri($this->options['structured_text_term_uri']); + $json = []; + $iiif_address = $this->iiifConfig->get('iiif_server'); + if (!is_null($iiif_address) && !empty($iiif_address)) { +@@ -282,7 +303,7 @@ class IIIFManifest extends StylePluginBase { + ], + ]; + +- if ($ocr_url = $this->getOcrUrl($entity, $row, $i)) { ++ if ($ocr_url = $this->getOcrUrl($entity)) { + $tmp_canvas['seeAlso'] = [ + '@id' => $ocr_url, + 'format' => 'text/vnd.hocr+html', +@@ -362,28 +383,34 @@ class IIIFManifest extends StylePluginBase { + * + * @param \Drupal\Core\Entity\EntityInterface $entity + * The entity at the current row. +- * @param \Drupal\views\ResultRow $row +- * Result row. +- * @param int $delta +- * The delta in case there are multiple canvases on one media. + * + * @return string|false + * The absolute URL of the current row's structured text, + * or FALSE if none. + */ +- protected function getOcrUrl(EntityInterface $entity, ResultRow $row, $delta) { ++ protected function getOcrUrl(EntityInterface $entity) { + $ocr_url = FALSE; + $iiif_ocr_file_field = !empty($this->options['iiif_ocr_file_field']) ? array_filter(array_values($this->options['iiif_ocr_file_field'])) : []; + $ocrField = count($iiif_ocr_file_field) > 0 ? $this->view->field[$iiif_ocr_file_field[0]] : NULL; + if ($ocrField) { +- $ocr_entity = $ocrField->getEntity($row); ++ $ocr_entity = $entity; + $ocr_field_name = $ocrField->definition['field_name']; +- if (!is_null($ocr_field_name)) { ++ if (!is_null($ocrField_name)) { + $ocrs = $ocr_entity->{$ocr_field_name}; +- $ocr = isset($ocrs[$delta]) ? $ocrs[$delta] : FALSE; +- if ($ocr) { +- $ocr_url = $ocr->entity->createFileUrl(FALSE); +- } ++ $ocr = isset($ocrs[$i]) ? $ocrs[$i] : FALSE; ++ $ocr_url = $ocr->entity->createFileUrl(FALSE); ++ } ++ } ++ elseif ($this->structuredTextTerm) { ++ $parent_node = $this->utils->getParentNode($entity); ++ $ocr_entity_array = $this->utils->getMediaReferencingNodeAndTerm($parent_node, $this->structuredTextTerm); ++ $ocr_entity_id = is_array($ocr_entity_array) ? array_shift($ocr_entity_array) : NULL; ++ $ocr_entity = $ocr_entity_id ? $this->entityTypeManager->getStorage('media')->load($ocr_entity_id) : NULL; ++ if ($ocr_entity) { ++ $ocr_file_source = $ocr_entity->getSource(); ++ $ocr_fid = $ocr_file_source->getSourceFieldValue($ocr_entity); ++ $ocr_file = $this->entityTypeManager->getStorage('file')->load($ocr_fid); ++ $ocr_url = $ocr_file->createFileUrl(FALSE); + } + } + +@@ -486,10 +513,19 @@ class IIIFManifest extends StylePluginBase { + '#title' => $this->t('Structured OCR data file field'), + '#type' => 'checkboxes', + '#default_value' => $this->options['iiif_ocr_file_field'], +- '#description' => $this->t('The source of structured OCR text for each entity.'), ++ '#description' => $this->t('The source of structured OCR text for each entity. If the term setting below is left blank, it will be the same entity as the source image'), + '#options' => $field_options, + '#required' => FALSE, + ]; ++ $form['structured_text_term'] = [ ++ '#type' => 'entity_autocomplete', ++ '#target_type' => 'taxonomy_term', ++ '#title' => $this->t('Structured OCR text term'), ++ '#default_value' => $this->utils->getTermForUri($this->options['structured_text_term_uri']), ++ '#required' => FALSE, ++ '#description' => $this->t('Term indicating the media that holds structured text, such as hOCR, for the given object. Use this if the text is on a separate media from the tile source.'), ++ ]; ++ + } + + /** +@@ -502,4 +538,25 @@ class IIIFManifest extends StylePluginBase { + return ['json' => 'json']; + } + ++ /** ++ * Submit handler for options form. ++ * ++ * Used to store the structured text media term by URL instead of Ttid. ++ * ++ * @param array $form ++ * The form. ++ * @param \Drupal\Core\Form\FormStateInterface $form_state ++ * The form state object. ++ */ ++ // @codingStandardsIgnoreStart ++ public function submitOptionsForm(&$form, FormStateInterface $form_state) { ++ // @codingStandardsIgnoreEnd ++ $style_options = $form_state->getValue('style_options'); ++ $tid = $style_options['structured_text_term']; ++ $term = $this->entityTypeManager->getStorage('taxonomy_term')->load($tid); ++ $style_options['structured_text_term_uri'] = $this->utils->getUriForTerm($term); ++ $form_state->setValue('style_options', $style_options); ++ parent::submitOptionsForm($form, $form_state); ++ } ++ + } diff --git a/composer.json b/composer.json index 7d512fa..46e398d 100755 --- a/composer.json +++ b/composer.json @@ -131,6 +131,9 @@ "patches": { "drupal/matomo": { "https://www.drupal.org/project/matomo/issues/3363521": "https://www.drupal.org/files/issues/2023-06-29/matomo-3363521-7.patch" + }, + "drupal/islandora": { + "OCR from media: https://github.com/Islandora/islandora/pull/953": "assets/patches/hocr.patch" } } },