From 9734bf360c3b9e775dc3d5d1e2a7d4f85a9b9122 Mon Sep 17 00:00:00 2001
From: "Cimon Lucas (LCM)" <lucas_cimon@connect-tech.sncf>
Date: Sun, 20 Aug 2023 15:22:02 +0200
Subject: [PATCH] Fixing test_iss1601

---
 pypdf/_merger.py                  | 14 ++++++++------
 pypdf/generic/_data_structures.py |  3 ++-
 tests/__init__.py                 | 17 +++++++++++++++++
 tests/test_utils.py               | 20 ++++++++++++++++++++
 tests/test_writer.py              | 17 +++++++----------
 5 files changed, 54 insertions(+), 17 deletions(-)

diff --git a/pypdf/_merger.py b/pypdf/_merger.py
index 924f18495..7db7262c2 100644
--- a/pypdf/_merger.py
+++ b/pypdf/_merger.py
@@ -24,6 +24,7 @@
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
+from perfs import exec_time
 
 import warnings
 from io import BytesIO, FileIO, IOBase
@@ -321,12 +322,13 @@ def write(self, fileobj: Union[Path, StrByteType]) -> None:
         # Add pages to the PdfWriter
         # The commented out line below was replaced with the two lines below it
         # to allow PdfMerger to work with PyPdf 1.13
-        for page in self.pages:
-            self.output.add_page(page.pagedata)
-            pages_obj = cast(Dict[str, Any], self.output._pages.get_object())
-            page.out_pagedata = self.output.get_reference(
-                pages_obj[PA.KIDS][-1].get_object()
-            )
+        with exec_time("for page in self.pages:"):
+            for page in self.pages:
+                self.output.add_page(page.pagedata)
+                pages_obj = cast(Dict[str, Any], self.output._pages.get_object())
+                page.out_pagedata = self.output.get_reference(
+                    pages_obj[PA.KIDS][-1].get_object()
+                )
 
         # Once all pages are added, create outline items to point at those pages
         self._write_dests()
diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
index e2b63078e..4212e28ae 100644
--- a/pypdf/generic/_data_structures.py
+++ b/pypdf/generic/_data_structures.py
@@ -988,6 +988,7 @@ class ContentStream(DecodedStreamObject):
     * when .set_data() is called, ._operations is set to None
     * when .operations is set, ._data is set to None
     """
+
     def __init__(
         self,
         stream: Any,
@@ -1232,7 +1233,7 @@ def write_to_stream(
         self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
     ) -> None:
         if not self._data and self._operations:
-            self.get_data()  # this ensures ._data is rebuilt for ContentStream
+            self.get_data()  # this ensures ._data is rebuilt
         super().write_to_stream(stream, encryption_key)
 
 
diff --git a/tests/__init__.py b/tests/__init__.py
index 6b4305bec..0e86c1a8f 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -87,3 +87,20 @@ def get_object(self) -> "DummyObj":
 
     def get_reference(self, obj):
         return IndirectObject(idnum=1, generation=1, pdf=self)
+
+
+def is_sublist(child_list, parent_list):
+    """
+    Check if child_list is a sublist of parent_list, with respect to
+    * elements order
+    * elements repetition
+
+    Elements are compared using `==`
+    """
+    if len(child_list) == 0:
+        return True
+    if len(parent_list) == 0:
+        return False
+    if parent_list[0] == child_list[0]:
+        return is_sublist(child_list[1:], parent_list[1:])
+    return is_sublist(child_list, parent_list[1:])
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b87001773..47c04214a 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -25,6 +25,8 @@
 )
 from pypdf.errors import DeprecationError, PdfReadError, PdfStreamError
 
+from . import is_sublist
+
 TESTS_ROOT = Path(__file__).parent.resolve()
 PROJECT_ROOT = TESTS_ROOT.parent
 RESOURCE_ROOT = PROJECT_ROOT / "resources"
@@ -351,3 +353,21 @@ def test_parse_datetime_err():
         parse_iso8824_date("D:20210408T054711Z")
     assert ex.value.args[0] == "Can not convert date: D:20210408T054711Z"
     assert parse_iso8824_date("D:20210408054711").tzinfo is None
+
+
+def test_is_sublist():
+    # Basic checks:
+    assert is_sublist([0, 1], [0, 1, 2]) is True
+    assert is_sublist([0, 2], [0, 1, 2]) is True
+    assert is_sublist([1, 2], [0, 1, 2]) is True
+    assert is_sublist([0, 3], [0, 1, 2]) is False
+    # Ensure order is checked:
+    assert is_sublist([1, 0], [0, 1, 2]) is False
+    # Ensure duplicates are handled:
+    assert is_sublist([0, 1, 1], [0, 1, 1, 2]) is True
+    assert is_sublist([0, 1, 1], [0, 1, 2]) is False
+    # Edge cases with empty lists:
+    assert is_sublist([], [0, 1, 2]) is True
+    assert is_sublist([0, 1], []) is False
+    # Self-sublist edge case:
+    assert is_sublist([0, 1, 2], [0, 1, 2]) is True
diff --git a/tests/test_writer.py b/tests/test_writer.py
index 5be8ac497..741b51a28 100644
--- a/tests/test_writer.py
+++ b/tests/test_writer.py
@@ -28,7 +28,7 @@
     TextStringObject,
 )
 
-from . import get_data_from_url
+from . import get_data_from_url, is_sublist
 
 TESTS_ROOT = Path(__file__).parent.resolve()
 PROJECT_ROOT = TESTS_ROOT.parent
@@ -1238,23 +1238,20 @@ def test_iss1601():
     url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
     name = "badge-38.pdf"
     reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
+    original_cs_operations = ContentStream(reader.pages[0].get_contents(), reader).operations
     writer = PdfWriter()
     page_1 = writer.add_blank_page(
         reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
     )
     page_1.merge_transformed_page(reader.pages[0], Transformation())
-    assert (
-        ContentStream(reader.pages[0].get_contents(), reader).get_data()
-        in page_1.get_contents().get_data()
-    )
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
     page_1 = writer.add_blank_page(
         reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
     )
     page_1.merge_page(reader.pages[0])
-    assert (
-        ContentStream(reader.pages[0].get_contents(), reader).get_data()
-        in page_1.get_contents().get_data()
-    )
+    page_1_cs_operations = page_1.get_contents().operations
+    assert is_sublist(original_cs_operations, page_1_cs_operations)
 
 
 def test_attachments():
@@ -1539,7 +1536,7 @@ def test_watermark():
 
 
 @pytest.mark.enable_socket()
-@pytest.mark.timeout(2)  # this was a lot slower before PR #2086
+@pytest.mark.timeout(4)  # this was a lot slower before PR #2086
 def test_watermarking_speed():
     url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
     name = "bgwatermark.pdf"