diff --git a/docs/changelog.rst b/docs/changelog.rst index f0eb8cec..1860e6ad 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,5 +1,48 @@ Changelog --------- +2.2.11 +^^^^^^ +- A space should not be placed before a mid row code if it follows a PAC command or a Tab Offset +- The backspace command should be treated like other commands and duplicates should be skipped if PAC commands are duplicated +- Prevent webvtt writer from creating a new cue in case of line break +- In case of style setting PAC which also breaks the line, we add the break first, then the style tag + +2.2.10 +^^^^^ +- Yanked. + +2.2.9 +^^^^^ +- Yanked. + +2.2.8 +^^^^^ +- Honor backspaces on captions in scc files +- When mid-row codes which are preceded by a PAC command don't add spaces +- Mid row codes which don't follow after a PAC and don't have a style reset command before will add a space to the end of the previous text node +- Mid row codes which don't follow after a PAC and have a style reset command before will add a space to the beginning of the next text node +- Background color codes to delete the space in front + +2.2.7 +^^^^^ +- The cursor moves automatically one column to the right after each character or Mid-Row Code received. + +2.2.6 +^^^^^ +- Pass the caption cue time with all error messages. + +2.2.5 +^^^^^ +- Yanked. + +2.2.4 +^^^^^ +- Skip duplicated extended characters. + +2.2.3 +^^^^^ +- Add new substitute character to ignore before extended character in SCC input files + 2.2.2 ^^^^^ - Remove support for Python 3.6 & 3.7 diff --git a/docs/conf.py b/docs/conf.py index c8befb7b..9b455abf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -53,9 +53,9 @@ # built documents. # # The short X.Y version. -version = '2.2.0' +version = '2.2.11' # The full version, including alpha/beta/rc tags. -release = '2.2.0' +release = '2.2.11' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/pycaption/__init__.py b/pycaption/__init__.py index b404c84d..adc9b501 100644 --- a/pycaption/__init__.py +++ b/pycaption/__init__.py @@ -10,7 +10,7 @@ from .transcript import TranscriptWriter from .webvtt import WebVTTReader, WebVTTWriter from .exceptions import ( - CaptionReadError, CaptionReadNoCaptions, CaptionReadSyntaxError, + CaptionReadError, CaptionReadNoCaptions, CaptionReadSyntaxError, CaptionLineLengthError ) diff --git a/pycaption/base.py b/pycaption/base.py index 563d7f89..8e3da975 100644 --- a/pycaption/base.py +++ b/pycaption/base.py @@ -114,13 +114,16 @@ class CaptionNode: STYLE = 2 BREAK = 3 - def __init__(self, type_, layout_info=None, content=None, start=None): + def __init__( + self, type_, layout_info=None, content=None, start=None, position=None + ): """ :type type_: int :type layout_info: Layout """ self.type_ = type_ self.content = content + self.position = position # Boolean. Marks the beginning/ end of a Style node. self.start = start @@ -139,19 +142,24 @@ def __repr__(self): raise RuntimeError(f'Unknown node type: {t}') @staticmethod - def create_text(text, layout_info=None): + def create_text(text, layout_info=None, position=None): return CaptionNode( - CaptionNode.TEXT, layout_info=layout_info, content=text) + type_=CaptionNode.TEXT, layout_info=layout_info, + position=position, content=text + ) @staticmethod def create_style(start, content, layout_info=None): return CaptionNode( - CaptionNode.STYLE, layout_info=layout_info, content=content, + type_=CaptionNode.STYLE, layout_info=layout_info, content=content, start=start) @staticmethod - def create_break(layout_info=None): - return CaptionNode(CaptionNode.BREAK, layout_info=layout_info) + def create_break(layout_info=None, content=None): + return CaptionNode( + type_=CaptionNode.BREAK, layout_info=layout_info, + content=content + ) class Caption: diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py index c5d864ad..ef74b406 100644 --- a/pycaption/scc/__init__.py +++ b/pycaption/scc/__init__.py @@ -81,11 +81,11 @@ import math import re import textwrap -from collections import deque +from collections import deque, defaultdict from copy import deepcopy from pycaption.base import ( - BaseReader, BaseWriter, CaptionSet, CaptionNode, + BaseReader, BaseWriter, CaptionSet ) from pycaption.exceptions import CaptionReadNoCaptions, InvalidInputError, \ CaptionReadTimingError, CaptionLineLengthError @@ -94,7 +94,7 @@ MICROSECONDS_PER_CODEWORD, CHARACTER_TO_CODE, SPECIAL_OR_EXTENDED_CHAR_TO_CODE, PAC_BYTES_TO_POSITIONING_MAP, PAC_HIGH_BYTE_BY_ROW, PAC_LOW_BYTE_BY_ROW_RESTRICTED, - PAC_TAB_OFFSET_COMMANDS, + PAC_TAB_OFFSET_COMMANDS, CUE_STARTING_COMMAND ) from .specialized_collections import ( # noqa: F401 TimingCorrectingCaptionList, NotifyingDict, CaptionCreator, @@ -164,6 +164,7 @@ def __init__(self, *args, **kw): ) self.last_command = '' + self.double_starter = False self.buffer_dict = NotifyingDict() @@ -223,6 +224,7 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0): # split lines lines = content.splitlines() + # loop through each line except the first for line in lines[1:]: self._translate_line(line) @@ -232,16 +234,24 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0): captions = CaptionSet({lang: self.caption_stash.get_all()}) # check captions for incorrect lengths - lines = [] + lines_too_long = defaultdict(list) for caption in self.caption_stash._collection: + caption_start = caption.to_real_caption().format_start() caption_text = "".join(caption.to_real_caption().get_text_nodes()) - lines.extend(caption_text.split("\n")) - lines_too_long = [line for line in lines if len(line) > 32] - - if bool(lines_too_long): - msg = "" - for line in lines_too_long: - msg += line + f" - Length { len(line)}" + "\n" + text_too_long = [line for line in caption_text.split("\n") if len(line) > 32] + if caption_start in lines_too_long: + lines_too_long[caption_start] = text_too_long + else: + lines_too_long[caption_start].extend(text_too_long) + + msg = "" + if bool(lines_too_long.keys()): + for key in lines_too_long: + if lines_too_long[key]: + msg += f"around {key} - " + for line in lines_too_long[key]: + msg += line + f" - Length { len(line)}" + "\n" + if len(msg): raise CaptionLineLengthError( f"32 character limit for caption cue in scc file.\n" f"Lines longer than 32:\n" @@ -299,15 +309,20 @@ def _translate_line(self, line): parts = r.findall(line.lower()) self.time_translator.start_at(parts[0][0]) + word_list = parts[0][2].split(' ') - # loop through each word - for word in parts[0][2].split(' '): - # ignore empty results or invalid commands + for idx, word in enumerate(word_list): word = word.strip() + previous_is_pac_or_tab = len(word_list) > 1 and ( + _is_pac_command(word_list[idx - 1]) or word_list[idx - 1] in PAC_TAB_OFFSET_COMMANDS + ) if len(word) == 4: - self._translate_word(word) + self._translate_word( + word=word, + previous_is_pac_or_tab=previous_is_pac_or_tab, + ) - def _translate_word(self, word): + def _translate_word(self, word, previous_is_pac_or_tab): if self._handle_double_command(word): # count frames for timing self.time_translator.increment_frames() @@ -316,7 +331,7 @@ def _translate_word(self, word): # TODO - check that all the positioning commands are here, or use # some other strategy to determine if the word is a command. if word in COMMANDS or _is_pac_command(word): - self._translate_command(word) + self._translate_command(word=word, previous_is_pac_or_tab=previous_is_pac_or_tab) # second, check if word is a special character elif word in SPECIAL_CHARS: @@ -337,23 +352,33 @@ def _handle_double_command(self, word): # up for redundancy in case the signal is garbled in transmission. # The decoder is programmed to ignore a second command when it is the # same as the first. - # Also like codes, Special Characters are always doubled up, + # If we have doubled commands we're skipping also + # doubled special characters and doubled extended characters # with only one member of each pair being displayed. - if word in COMMANDS or _is_pac_command(word) or word in SPECIAL_CHARS: - if word == self.last_command: - self.last_command = '' - return True + + doubled_types = word != "94a1" and word in COMMANDS or _is_pac_command(word) + if self.double_starter: + doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1" or word in SPECIAL_CHARS + + if word in CUE_STARTING_COMMAND and word != self.last_command: + self.double_starter = False + + if doubled_types and word == self.last_command: + if word in CUE_STARTING_COMMAND: + self.double_starter = True + self.last_command = '' + return True # Fix for the # repetition - elif _is_pac_command(word) and word in self.last_command: - self.last_command = '' + elif _is_pac_command(word) and word in self.last_command: + self.last_command = '' + return True + elif word in PAC_TAB_OFFSET_COMMANDS: + if _is_pac_command(self.last_command): + self.last_command += f" {word}" + return False + else: return True - elif word in PAC_TAB_OFFSET_COMMANDS: - if _is_pac_command(self.last_command): - self.last_command += f" {word}" - return False - else: - return True self.last_command = word return False @@ -362,12 +387,18 @@ def _translate_special_char(self, word): self.buffer.add_chars(SPECIAL_CHARS[word]) def _translate_extended_char(self, word): - self.buffer.remove_ascii_duplicate(EXTENDED_CHARS[word]) - + """ + Each of the 64 Extended Characters incorporates an automatic BS. + When an Extended Character is received, the cursor moves to the + left one column position (unless the Extended Character is the first + character on a row), erasing any character which may be in that location, + then displays the Extended Character. + """ + self.buffer.handle_backspace(word) # add to buffer self.buffer.add_chars(EXTENDED_CHARS[word]) - def _translate_command(self, word): + def _translate_command(self, word, previous_is_pac_or_tab): # if command is pop_up if word == '9420': self.buffer_dict.set_active('pop') @@ -436,7 +467,10 @@ def _translate_command(self, word): # If command is not one of the aforementioned, add it to buffer else: - self.buffer.interpret_command(word) + self.buffer.interpret_command( + command=word, + previous_is_pac_or_tab=previous_is_pac_or_tab + ) def _translate_characters(self, word): # split word into the 2 bytes diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py index d54d0b96..bc2fcd50 100644 --- a/pycaption/scc/constants.py +++ b/pycaption/scc/constants.py @@ -1,5 +1,4 @@ from itertools import product -from collections import defaultdict COMMANDS = { '9420': '', @@ -990,60 +989,74 @@ def _restructure_bytes_to_position_map(byte_to_pos_map): # taken from # http://www.theneitherworld.com/mcpoodle/SCC_TOOLS/DOCS/CC_CHARS.HTML INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION = { - '¡': "!", # inverted exclamation mark - '¤': "C", # currency - '¥': "Y", # yen - '¦': "-", # broken bar - '©': "c", # copyright sign - '«': '"', # left pointing double angle quotation mark - '»': '"', # right pointing double angle quotation mark - 'À': "A", - 'Á': "A", - 'Â': "A", - 'Ã': "A", - 'Ä': "A", - 'Å': "A", - 'Ç': "C", - 'È': "E", - 'É': "E", - 'Ê': "E", - 'Ë': "E", - 'Ì': "I", - 'Í': "I", - 'Î': "I", - 'Ï': "I", - 'Ò': "O", - 'Ó': "O", - 'Ô': ")", - 'Õ': "O", - 'Ö': "O", - 'Ø': "O", - 'Ù': "U", - 'Ú': "U", - 'Û': "U", - 'Ü': "U", - 'ß': "s", - 'ã': "a", - 'ä': "a", - 'å': "a", - 'ë': "e", - 'ì': "i", - 'ï': "i", - 'ò': "o", - 'õ': "o", - 'ö': "o", - 'ø': "o", - 'ù': "u", - 'ü': "u", - '—': "-", # em dash - '‘': "'", - '’': "'", - '“': '"', - '”': '"', - '•': ".", - '℠': "s", - '┌': "+", - '┐': "+", - '└': "+", - '┘': "+" + '¡': ["!", "i"], # inverted exclamation mark + '¤': ["C"], # currency + '¥': ["Y"], # yen + '¦': ["-"], # broken bar + '©': ["c"], # copyright sign + '«': ['"'], # left pointing double angle quotation mark + '»': ['"'], # right pointing double angle quotation mark + 'À': ["A"], + 'Á': ["A"], + 'Â': ["A"], + 'Ã': ["A"], + 'Ä': ["A"], + 'Å': ["A"], + 'Ç': ["C"], + 'È': ["E"], + 'É': ["E"], + 'Ê': ["E"], + 'Ë': ["E"], + 'Ì': ["I"], + 'Í': ["I"], + 'Î': ["I"], + 'Ï': ["I"], + 'Ò': ["O"], + 'Ó': ["O"], + 'Ô': [")"], + 'Õ': ["O"], + 'Ö': ["O"], + 'Ø': ["O"], + 'Ù': ["U"], + 'Ú': ["U"], + 'Û': ["U"], + 'Ü': ["U"], + 'ß': ["s"], + 'ã': ["a"], + 'ä': ["a"], + 'å': ["a"], + 'ë': ["e"], + 'ì': ["i"], + 'ï': ["i"], + 'ò': ["o"], + 'õ': ["o"], + 'ö': ["o"], + 'ø': ["o"], + 'ù': ["u"], + 'ü': ["u"], + '—': ["-"], # em dash + '‘': ["'"], + '’': ["'"], + '“': ['"'], + '”': ['"'], + '•': ["."], + '℠': ["s"], + '┌': ["+"], + '┐': ["+"], + '└': ["+"], + '┘': ["+"] } + +MID_ROW_CODES = [ + "9120", "91a1", "91a2", "9123", "91a4", "9125", "9126", + "91a7", "91a8", "9129", "912a", "91ab", "912c", "91ad", + "97ae", "972f", "91ae", "912f", "94a8" +] + +BACKGROUND_COLOR_CODES = [ + "1020", "10a1", "10a2", "1023", "10a4", "1025", "1026", + "10a7", "10a8", "1029", "102a", "10ab", "102c", "10ad", + "10ae", "102f", "97ad" +] + +CUE_STARTING_COMMAND = ['9425', '9426', '94a7', '9429', '9420'] diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py index d7bca856..4b8800ed 100644 --- a/pycaption/scc/specialized_collections.py +++ b/pycaption/scc/specialized_collections.py @@ -1,5 +1,4 @@ import collections -import unicodedata from ..base import CaptionList, Caption, CaptionNode from ..geometry import ( @@ -8,7 +7,8 @@ ) from .constants import ( PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS, - MICROSECONDS_PER_CODEWORD, INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION + MICROSECONDS_PER_CODEWORD, BACKGROUND_COLOR_CODES, + MID_ROW_CODES, EXTENDED_CHARS ) PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end") @@ -254,7 +254,10 @@ def create_and_store(self, node_buffer, start, end=0): layout_info = _get_layout_from_tuple(instruction.position) caption.nodes.append( CaptionNode.create_text( - instruction.get_text(), layout_info=layout_info), + text=instruction.text, + layout_info=layout_info, + position=instruction.position + ) ) caption.layout_info = layout_info @@ -287,6 +290,8 @@ def __init__(self, collection=None, position_tracker=None): else: self._collection = collection + self.last_style = None + self._position_tracer = position_tracker def is_empty(self): @@ -334,24 +339,45 @@ def add_chars(self, *chars): node.add_chars(*chars) - def interpret_command(self, command): + def interpret_command(self, command, previous_is_pac_or_tab=False): """Given a command determines whether to turn italics on or off, or to set the positioning This is mostly used to convert from the legacy-style commands :type command: str + :type previous_is_pac_or_tab: previous command code is for a PAC command + or a PAC_TAB_OFFSET_COMMANDS """ self._update_positioning(command) text = COMMANDS.get(command, '') + if command == "94a1": + self.handle_backspace("94a1") + + if command in BACKGROUND_COLOR_CODES: + # Since these codes are optional, they must be preceded + # with the space character (20h), + # which will be deleted when the code is applied. + # ex: 2080 97ad 94a1 + if ( + self._collection[-1].is_text_node() and + self._collection[-1].text[-1].isspace() + ): + self._collection[-1].text = self._collection[-1].text[:-1] + if 'italic' in text: + if self._position_tracer.is_linebreak_required(): + self._collection.append(_InstructionNode.create_break( + position=self._position_tracer.get_current_position())) + self._position_tracer.acknowledge_linebreak_consumed() if 'end' not in text: self._collection.append( _InstructionNode.create_italics_style( self._position_tracer.get_current_position()) ) + self.last_style = "italics on" else: self._collection.append( _InstructionNode.create_italics_style( @@ -359,6 +385,18 @@ def interpret_command(self, command): turn_on=False ) ) + self.last_style = "italics off" + + # mid row code that is not first code on the line + # (previous node is not a break node) + if command in MID_ROW_CODES and not previous_is_pac_or_tab: + if self.last_style == "italics off": + self.add_chars(' ') + else: + for node in self._collection[::-1]: + if node.is_text_node() and node.text: + node.text += ' ' + break def _update_positioning(self, command): """Sets the positioning information to use for the next nodes @@ -412,34 +450,31 @@ def from_list(cls, stash_list, position_tracker): return instance - def remove_ascii_duplicate(self, accented_character): + def handle_backspace(self, word): """ - Characters from the Extended Characters list are usually preceded by - their ASCII substitute, in case the decoder is not able to display - the special character. - - This is used to remove the substitute character in order to avoid - displaying both. - - :type accented_character: str + Move cursor back one position and delete that character """ - is_text_node = ( - self._collection and - self._collection[-1].is_text_node() and - self._collection[-1].text - ) - if is_text_node: - try: - ascii_char = unicodedata.normalize('NFD', accented_character) \ - .encode('ascii', 'strict').decode("utf-8") - except (UnicodeEncodeError, UnicodeDecodeError): - ascii_char = INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION[ - accented_character - ] - - if ascii_char and self._collection[-1].text[-1] == ascii_char: - self._collection[-1].text = self._collection[-1].text[:-1] - + node = self.get_previous_text_node() + # in case of no previous text nodes or + # if the backspace is required while no character + # do nothing + if node is None: + return + last_char = node.text[-1] + delete_previous_condition = ( + (word in EXTENDED_CHARS and last_char not in EXTENDED_CHARS.values()) or + word == "94a1" + ) + # in case extended char, perform backspace + # only if the previous character in not also extended + if delete_previous_condition: + node.text = node.text[:-1] + + def get_previous_text_node(self): + for node in self._collection[::-1]: + if node.is_text_node() and node.text: + return node + return None def _get_layout_from_tuple(position_tuple): diff --git a/pycaption/webvtt.py b/pycaption/webvtt.py index 925ac8d3..d40f02c3 100644 --- a/pycaption/webvtt.py +++ b/pycaption/webvtt.py @@ -394,6 +394,7 @@ def _group_cues_by_layout(self, nodes, caption_set): return [] current_layout = None + current_node = None # A list with layout groups. Since WebVTT only support positioning # for different cues, each layout group has to be represented in a @@ -402,17 +403,24 @@ def _group_cues_by_layout(self, nodes, caption_set): # A properly encoded WebVTT string (plain unicode must be properly # escaped before being appended to this string) s = '' + row, column, prev_row, prev_column = 0, 0, 0, 0 for i, node in enumerate(nodes): if node.type_ == CaptionNode.TEXT: if s and current_layout and node.layout_info != current_layout: # If the positioning changes from one text node to # another, a new WebVTT cue has to be created. - layout_groups.append((s, current_layout)) - s = '' + row, column = node.position if node.position else (0, 0) + prev_row, prev_column = current_node.position if current_node.position else (0, 0) + if row == prev_row + 1: + s += '\n' + else: + layout_groups.append((s, current_layout)) + s = '' # ATTENTION: This is where the plain unicode node content is # finally encoded as WebVTT. s += self._encode_illegal_characters(node.content) or ' ' current_layout = node.layout_info + current_node = node elif node.type_ == CaptionNode.STYLE: resulting_style = self._calculate_resulting_style( node.content, caption_set diff --git a/run_tests.sh b/run_tests.sh index 9b67c234..94d71883 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -1,5 +1,4 @@ -#!/bin/bash -eux - +#!/bin/bash DOCKER_CMD="docker-compose -p pycaption" SERVICE="test_py312" @@ -20,4 +19,11 @@ function cleanup { $DOCKER_CMD run --rm "$SERVICE" -cleanup +if [ $? != 0 ]; then + cleanup + exit 1 +else + cleanup +fi + + diff --git a/setup.py b/setup.py index afaf39ff..217443fa 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( name='pycaption', - version='2.2.2.dev2', + version='2.2.11', description='Closed caption converter', long_description=open(README_PATH).read(), author='Joe Norton', diff --git a/test_requirements.txt b/test_requirements.txt index 8ef1ace3..c9532a43 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,6 +1,5 @@ pytest pytest-cov -pytest-lazy-fixture beautifulsoup4>=4.12.1 lxml>=4.9.1 cssutils>=2.0.0 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 2e361fb8..74530ae8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,14 +54,14 @@ scc_that_generates_webvtt_with_proper_newlines, sample_scc_produces_captions_with_start_and_end_time_the_same, sample_scc_pop_on, sample_scc_multiple_positioning, sample_scc_with_italics, - sample_scc_empty, sample_scc_roll_up_ru2, sample_no_positioning_at_all_scc, + sample_scc_empty, sample_scc_roll_up_ru2, sample_scc_roll_up_ru3, + sample_no_positioning_at_all_scc, sample_scc_with_line_too_long, sample_scc_no_explicit_end_to_last_caption, sample_scc_flashing_cue, sample_scc_eoc_first_command, sample_scc_with_extended_characters, sample_scc_with_ampersand_character, sample_scc_multiple_formats, sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters, sample_scc_tab_offset, sample_scc_with_unknown_commands, sample_scc_special_and_extended_characters, - sample_scc_with_line_too_long ) from tests.fixtures.srt import ( # noqa: F401 sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty, diff --git a/tests/fixtures/scc.py b/tests/fixtures/scc.py index 23661d36..a1b66892 100644 --- a/tests/fixtures/scc.py +++ b/tests/fixtures/scc.py @@ -140,6 +140,46 @@ def sample_scc_roll_up_ru2(): 00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 +00:00:12;30 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 + +00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf + +00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7 + +00:00:17;01 9426 9426 94ad 94ad 9470 9470 57c8 4552 4520 d94f d5a7 5245 20d3 54c1 cec4 49ce c720 ce4f 572c + +00:00:18;19 9426 9426 94ad 94ad 9470 9470 4c4f 4fcb 49ce c720 4fd5 5420 54c8 4552 452c 2054 c8c1 54a7 d320 c14c 4c + +00:00:20;06 9426 9426 94ad 94ad 9470 9470 54c8 4520 4352 4f57 c4ae + +00:00:21;24 9426 9426 94ad 94ad 9470 9470 3e3e 2049 5420 57c1 d320 c74f 4fc4 2054 4f20 c245 2049 ce20 54c8 45 + +00:00:34;27 94a7 94ad 9470 c16e 6420 f2e5 73f4 eff2 e520 49ef f761 a773 20ec 616e 642c 20f7 61f4 e5f2 + +00:00:36;12 94a7 94ad 9470 c16e 6420 f7e9 ec64 ece9 e6e5 ae80 + +00:00:44;08 94a7 94ad 9470 3e3e 20c2 e96b e520 49ef f761 2c20 79ef 75f2 2073 ef75 f2e3 e520 e6ef f280 +""" + + +@pytest.fixture(scope="session") +def sample_scc_roll_up_ru3(): + return """\ +Scenarist_SCC V1.0 +00:00:00;22 9425 9425 94ad 94ad 9470 9470 3e3e 3e20 c849 ae80 + +00:00:02;23 9425 9425 94ad 94ad 9470 9470 49a7 cd20 cb45 d649 ce20 43d5 cece 49ce c720 c1ce c420 c154 + +00:00:04;17 9425 9425 94ad 94ad 9470 9470 49ce d645 d354 4f52 a7d3 20c2 c1ce cb20 5745 20c2 454c 4945 d645 2049 ce80 + +00:00:06;04 9425 9425 94ad 94ad 9470 9470 c845 4cd0 49ce c720 54c8 4520 4c4f 43c1 4c20 ce45 49c7 c8c2 4f52 c84f 4fc4 d380 + +00:00:09;21 9425 9425 94ad 94ad 9470 9470 c1ce c420 49cd d052 4fd6 49ce c720 54c8 4520 4c49 d645 d320 4f46 20c1 4c4c + +00:00:11;07 9425 9425 94ad 94ad 9470 9470 5745 20d3 4552 d645 ae80 + +00:00:12;07 9425 9425 94ad 94ad 9470 9470 91b0 9131 9132 9132 + 00:00:13;07 9425 9425 94ad 94ad 9470 9470 c1c2 c3c4 c580 91bf 00:00:14;07 9425 9425 94ad 94ad 9470 9470 9220 9220 92a1 92a2 92a7 @@ -367,9 +407,11 @@ def sample_scc_duplicate_special_characters(): return """\ Scenarist_SCC V1.0 -00:23:28;01 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f +00:23:28;01 9420 9420 91b0 91b0 9131 9131 9132 9132 91b3 91b3 9134 9134 91b5 91b5 91b6 91b6 9137 9137 9138 9138 91b9 91b9 91ba 91ba 913b 913b 91bc 91bc 913d 913d 913e 913e 91bf 91bf 942f + +00:33:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f -00:53:28;01 9420 91b0 9131 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc 913d 913e 91bf 942f +00:53:28;01 9420 91b0 9131 c1c1 9132 91b3 9134 91b5 91b6 9137 9138 91b9 91ba 913b 91bc c1c1 913d 913e 91bf 942f """ diff --git a/tests/test_dfxp.py b/tests/test_dfxp.py index 8294a3f3..c3e1136a 100644 --- a/tests/test_dfxp.py +++ b/tests/test_dfxp.py @@ -2,14 +2,12 @@ from pycaption import DFXPReader, CaptionReadNoCaptions from pycaption.exceptions import ( - CaptionReadSyntaxError, InvalidInputError, CaptionReadError, - CaptionReadTimingError, + CaptionReadSyntaxError, CaptionReadError, CaptionReadTimingError, ) from pycaption.geometry import ( UnitEnum, HorizontalAlignmentEnum, VerticalAlignmentEnum, ) from tests.mixins import ReaderTestingMixIn -from pytest_lazyfixture import lazy_fixture class TestDFXPReader(ReaderTestingMixIn): @@ -19,15 +17,20 @@ def setup_class(self): def test_positive_answer_for_detection(self, sample_dfxp): super().assert_positive_answer_for_detection(sample_dfxp) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_scc_pop_on'), - pytest.lazy_fixture('sample_srt'), - pytest.lazy_fixture('sample_webvtt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_microdvd(self, sample_microdvd): + super().assert_negative_answer_for_detection(sample_microdvd) + + def test_negative_answer_for_sami(self, sample_sami): + super().assert_negative_answer_for_detection(sample_sami) + + def test_negative_answer_for_scc_on_pop_on(self, sample_scc_pop_on): + super().assert_negative_answer_for_detection(sample_scc_pop_on) + + def test_negative_answer_for_srt(self, sample_srt): + super().assert_negative_answer_for_detection(sample_srt) + + def test_negative_answer_for_webvtt(self, sample_webvtt): + super().assert_negative_answer_for_detection(sample_webvtt) def test_caption_length(self, sample_dfxp): captions = DFXPReader().read(sample_dfxp) @@ -87,9 +90,6 @@ def test_invalid_timestamp(self, timestamp): with pytest.raises(CaptionReadTimingError) as exc_info: DFXPReader()._convert_timestamp_to_microseconds(timestamp) - assert exc_info.value.args[0].startswith( - f'Invalid timestamp: {timestamp}.') - def test_empty_file(self, sample_dfxp_empty): with pytest.raises(CaptionReadNoCaptions): DFXPReader().read(sample_dfxp_empty) diff --git a/tests/test_geometry.py b/tests/test_geometry.py index 597bbd78..447a85c0 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -140,4 +140,4 @@ def test_invalid_size_from_string(self, string): with pytest.raises(CaptionReadSyntaxError) as exc_info: Size.from_string(string) - assert exc_info.value.args[0].startswith(f"Invalid size: {string}.") + assert exc_info.value.args[0].startswith(f"Invalid size: {string}.") \ No newline at end of file diff --git a/tests/test_microdvd.py b/tests/test_microdvd.py index 7921b3c5..657b03f3 100644 --- a/tests/test_microdvd.py +++ b/tests/test_microdvd.py @@ -13,15 +13,20 @@ def setup_class(self): def test_positive_answer_for_detection(self, sample_microdvd): super().assert_positive_answer_for_detection(sample_microdvd) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_scc_pop_on'), - pytest.lazy_fixture('sample_srt'), - pytest.lazy_fixture('sample_webvtt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_detection_dfxp(self, sample_dfxp): + super().assert_negative_answer_for_detection(sample_dfxp) + + def test_negative_answer_for_detection_sami(self, sample_sami): + super().assert_negative_answer_for_detection(sample_sami) + + def test_negative_answer_for_detection_scc_pop_on(self, sample_scc_pop_on): + super().assert_negative_answer_for_detection(sample_scc_pop_on) + + def test_negative_answer_for_detection_srt(self, sample_srt): + super().assert_negative_answer_for_detection(sample_srt) + + def test_negative_answer_for_detection_webvtt(self, sample_webvtt): + super().assert_negative_answer_for_detection(sample_webvtt) def test_caption_length(self, sample_microdvd): captions = MicroDVDReader().read(sample_microdvd) diff --git a/tests/test_sami.py b/tests/test_sami.py index 9b07d059..66748475 100644 --- a/tests/test_sami.py +++ b/tests/test_sami.py @@ -15,15 +15,20 @@ def setup_method(self): def test_positive_answer_for_detection(self, sample_sami): super().assert_positive_answer_for_detection(sample_sami) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_scc_pop_on'), - pytest.lazy_fixture('sample_srt'), - pytest.lazy_fixture('sample_webvtt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_detection_dfxp(self, sample_dfxp): + super().assert_negative_answer_for_detection(sample_dfxp) + + def test_negative_answer_for_detection_microdvd(self, sample_microdvd): + super().assert_negative_answer_for_detection(sample_microdvd) + + def test_negative_answer_for_detection_scc_pop_on(self, sample_scc_pop_on): + super().assert_negative_answer_for_detection(sample_scc_pop_on) + + def test_negative_answer_for_detection_srt(self, sample_srt): + super().assert_negative_answer_for_detection(sample_srt) + + def test_negative_answer_for_detection_webvtt(self, sample_webvtt): + super().assert_negative_answer_for_detection(sample_webvtt) def test_caption_length(self, sample_sami): caption_set = self.reader.read(sample_sami) diff --git a/tests/test_scc.py b/tests/test_scc.py index dc5a0ed0..3b78e138 100644 --- a/tests/test_scc.py +++ b/tests/test_scc.py @@ -22,15 +22,20 @@ def setup_method(self): def test_positive_answer_for_detection(self, sample_scc_pop_on): super().assert_positive_answer_for_detection(sample_scc_pop_on) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_srt'), - pytest.lazy_fixture('sample_webvtt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_detection_dfxp(self, sample_dfxp): + super().assert_negative_answer_for_detection(sample_dfxp) + + def test_negative_answer_for_detection_microdvd(self, sample_microdvd): + super().assert_negative_answer_for_detection(sample_microdvd) + + def test_negative_answer_for_detection_sami(self, sample_sami): + super().assert_negative_answer_for_detection(sample_sami) + + def test_negative_answer_for_detection_srt(self, sample_srt): + super().assert_negative_answer_for_detection(sample_srt) + + def test_negative_answer_for_detection_webvtt(self, sample_webvtt): + super().assert_negative_answer_for_detection(sample_webvtt) def test_caption_length(self, sample_scc_pop_on): captions = SCCReader().read(sample_scc_pop_on) @@ -77,12 +82,10 @@ def test_positioning(self, sample_scc_multiple_positioning): ((40.0, UnitEnum.PERCENT), (41.0, UnitEnum.PERCENT)), ((20.0, UnitEnum.PERCENT), (71.0, UnitEnum.PERCENT)) ] - actual_positioning = [ caption_.layout_info.origin.serialized() for caption_ in captions.get_captions('en-US') ] - assert expected_positioning == actual_positioning def test_tab_offset(self, sample_scc_tab_offset): @@ -205,7 +208,7 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset): 'The I-10 Santa Monica Freeway', 'westbound is jammed,', 'due to a three-car accident', - 'blocking lanes 1 and 2', + 'blocking lanes 1 and 2' ] caption_set = SCCReader().read(sample_scc_duplicate_tab_offset) @@ -215,13 +218,15 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset): for node in cap_.nodes if node.type_ == CaptionNode.TEXT ] - assert expected_lines == actual_lines def test_skip_duplicate_special_characters( self, sample_scc_duplicate_special_characters): - expected_lines = ['®°½¿™¢£♪à èâêîôû', '®°½¿™¢£♪à èâêîôû'] - + expected_lines = [ + '®°½¿™¢£♪à èâêîôû', + '®°½¿™¢£♪à èâêîôû', + '®°AA½¿™¢£♪à èâêAAîôû' + ] caption_set = SCCReader().read(sample_scc_duplicate_special_characters) actual_lines = [ node.content @@ -229,7 +234,6 @@ def test_skip_duplicate_special_characters( for node in cap_.nodes if node.type_ == CaptionNode.TEXT ] - assert expected_lines == actual_lines def test_flashing_cue(self, sample_scc_flashing_cue): @@ -245,8 +249,9 @@ def test_line_too_long(self, sample_scc_with_line_too_long): assert exc_info.value.args[0].startswith( "32 character limit for caption cue in scc file.") - assert ("was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l Denison, a friend - Length 81" - in exc_info.value.args[0].split("\n")) + str_to_check = ("around 00:00:05.900 - was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l " + "Denison, a friend - Length 81") + assert str_to_check in exc_info.value.args[0].split("\n") class TestCoverageOnly: @@ -273,15 +278,16 @@ def test_freeze_rollup_captions_contents(self, sample_scc_roll_up_ru2): 'AND IMPROVING THE LIVES OF ALL', 'WE SERVE.', '®°½', + '®°½½', 'ABû', - 'ÁÁÉÓ¡', + 'ÁÉÓ¡', "WHERE YOU'RE STANDING NOW,", "LOOKING OUT THERE, THAT'S AL", 'THE CROWD.', '>> IT WAS GOOD TO BE IN TH', "And restore Iowa's land, water", 'And wildlife.', - '>> Bike Iowa, your source for', + '>> Bike Iowa, your source for' ] assert expected_texts == actual_texts @@ -317,8 +323,8 @@ def test_multiple_formats(self, sample_scc_multiple_formats): assert expected_text_lines == text_lines - def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): - scc1 = SCCReader().read(sample_scc_roll_up_ru2) + def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru3): + scc1 = SCCReader().read(sample_scc_roll_up_ru3) captions = scc1.get_captions('en-US') expected_timings = [ (733333.3333333333, 2766666.6666666665), @@ -340,7 +346,6 @@ def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2): ] actual_timings = [(c_.start, c_.end) for c_ in captions] - assert expected_timings == actual_timings def test_freeze_colon_spec_time(self, sample_scc_pop_on): @@ -377,6 +382,9 @@ def test_italics_commands_are_formatted_properly(self): # 4. to get new opening italic nodes after changing position, if 3 # happened # 5. to get a final italic closing node, if one is needed + # 9120 and 91ae are mid row codes and will add a space + # 9120 at the start of the following text node + # 91ae to the end of the previous text node node_creator.interpret_command('9470') # row 15, col 0 node_creator.interpret_command('9120') # italics off node_creator.interpret_command('9120') # italics off @@ -395,7 +403,7 @@ def test_italics_commands_are_formatted_properly(self): node_creator.add_chars('b') node_creator.interpret_command('91ae') # italics ON again node_creator.add_chars('b') - node_creator.interpret_command('9120') # italics OFF + node_creator.interpret_command('9120') # italics OFF adds space node_creator.interpret_command('9120') # italics OFF node_creator.interpret_command('1570') # row 6 col 0 @@ -414,32 +422,35 @@ def test_italics_commands_are_formatted_properly(self): result = list(node_creator) assert result[0].is_text_node() - assert result[1].requires_repositioning() - assert result[2].is_italics_node() - assert result[2].sets_italics_on() + assert result[1].is_text_node() + assert result[2].requires_repositioning() - assert result[3].is_text_node() + assert result[3].is_italics_node() + assert result[3].sets_italics_on() assert result[4].is_text_node() - assert result[5].is_text_node() - - assert result[6].is_italics_node() - assert result[6].sets_italics_off() + assert result[5].sets_italics_off() + assert result[6].is_text_node() - assert result[7].requires_repositioning() - assert result[8].is_text_node() + assert result[7].is_text_node() + assert result[8].sets_italics_on() - assert result[9].requires_repositioning() - assert result[10].is_italics_node() - assert result[10].sets_italics_on() + assert result[9].is_text_node() + assert result[10].is_text_node() - assert result[11].is_text_node() - assert result[12].is_explicit_break() + assert result[11].sets_italics_off() + assert result[12].is_text_node() assert result[13].is_text_node() - assert result[14].is_explicit_break() + assert result[14].requires_repositioning() assert result[15].is_text_node() - assert result[16].is_italics_node() - assert result[16].sets_italics_off() + assert result[16].requires_repositioning() + assert result[17].sets_italics_on() + assert result[18].is_text_node() + assert result[19].is_explicit_break() + assert result[20].is_text_node() + assert result[21].is_explicit_break() + assert result[22].is_text_node() + assert result[23].sets_italics_off() class CaptionDummy: diff --git a/tests/test_srt.py b/tests/test_srt.py index edeeab0c..3aedbede 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -11,15 +11,20 @@ def setup_class(self): def test_positive_answer_for_detection(self, sample_srt): super().assert_positive_answer_for_detection(sample_srt) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_scc_pop_on'), - pytest.lazy_fixture('sample_webvtt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_detection_dfxp(self, sample_dfxp): + super().assert_negative_answer_for_detection(sample_dfxp) + + def test_negative_answer_for_detection_microdvd(self, sample_microdvd): + super().assert_negative_answer_for_detection(sample_microdvd) + + def test_negative_answer_for_detection_sami(self, sample_sami): + super().assert_negative_answer_for_detection(sample_sami) + + def test_negative_answer_for_detection_scc_pop_on(self, sample_scc_pop_on): + super().assert_negative_answer_for_detection(sample_scc_pop_on) + + def test_negative_answer_for_detection_webvtt(self, sample_webvtt): + super().assert_negative_answer_for_detection(sample_webvtt) def test_caption_length(self, sample_srt): captions = self.reader.read(sample_srt) diff --git a/tests/test_webvtt.py b/tests/test_webvtt.py index 14e149e6..87739cb3 100644 --- a/tests/test_webvtt.py +++ b/tests/test_webvtt.py @@ -14,15 +14,20 @@ def setup_method(self): def test_positive_answer_for_detection(self, sample_webvtt): super().assert_positive_answer_for_detection(sample_webvtt) - @pytest.mark.parametrize('different_sample', [ - pytest.lazy_fixture('sample_dfxp'), - pytest.lazy_fixture('sample_microdvd'), - pytest.lazy_fixture('sample_sami'), - pytest.lazy_fixture('sample_scc_pop_on'), - pytest.lazy_fixture('sample_srt') - ]) - def test_negative_answer_for_detection(self, different_sample): - super().assert_negative_answer_for_detection(different_sample) + def test_negative_answer_for_detection_dfxp(self, sample_dfxp): + super().assert_negative_answer_for_detection(sample_dfxp) + + def test_negative_answer_for_detection_microdvd(self, sample_microdvd): + super().assert_negative_answer_for_detection(sample_microdvd) + + def test_negative_answer_for_detection_sami(self, sample_sami): + super().assert_negative_answer_for_detection(sample_sami) + + def test_negative_answer_for_detection_scc_pop_on(self, sample_scc_pop_on): + super().assert_negative_answer_for_detection(sample_scc_pop_on) + + def test_negative_answer_for_detection_srt(self, sample_srt): + super().assert_negative_answer_for_detection(sample_srt) def test_caption_length(self, sample_webvtt_2): captions = self.reader.read(sample_webvtt_2)