diff --git a/.gitignore b/.gitignore
index 28d40da..18c25db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 # files
 *.pyc
 *.jp*g
-*.docx
 layout.json
 .vscode/
 
@@ -20,4 +19,4 @@ diff.png
 build/
 dist/
 *egg-info/
-pdf2docx*.rst
\ No newline at end of file
+pdf2docx*.rst
diff --git a/pdf2docx/common/Block.py b/pdf2docx/common/Block.py
index 3b46f0e..bdb48c6 100644
--- a/pdf2docx/common/Block.py
+++ b/pdf2docx/common/Block.py
@@ -141,4 +141,4 @@ def make_docx(self, *args, **kwargs):
         Raises:
             NotImplementedError
         """
-        raise NotImplementedError
\ No newline at end of file
+        raise NotImplementedError
diff --git a/pdf2docx/common/share.py b/pdf2docx/common/share.py
index e67b6e6..44b7d40 100644
--- a/pdf2docx/common/share.py
+++ b/pdf2docx/common/share.py
@@ -255,3 +255,56 @@ def inner(*args, **kwargs):
             return objects
         return inner
     return wrapper
+
+def is_list_item(text, bullets=True, numbers=True):
+    '''Returns `text` if `bullets` is true and `text` is a bullet character, or
+    `numbers` is true and `text` is not empty and consists entirely of digits
+    0-9. Otherwise returns None.
+
+    If `bullets` is True we use an internal list of bullet characters;
+    otherwise it should be a list of integer Unicode values.
+    '''
+    return False
+    if bullets is True:
+        bullets2 = (
+                # From https://en.wikipedia.org/wiki/Bullet_(typography).
+                0x2022, # BULLET (&bull;, &bullet;)
+                0x2023, # TRIANGULAR BULLET
+                0x2043, # HYPHEN BULLET (&hybull;)
+                0x204c, # BLACK LEFTWARDS BULLET
+                0x204d, # BLACK RIGHTWARDS BULLET
+                0x2219, # BULLET OPERATOR for use in mathematical notation primarily as a dot product instead of interpunct.
+                0x25c9, # FISHEYE used in Japan as a bullet, and called tainome.
+                0x25cb, # WHITE CIRCLE (&cir;)
+                0x25cf, # BLACK CIRCLE
+                0x25cf, # Bullet, black small circle.
+                0x25d8, # INVERSE BULLET
+                0x25e6, # WHITE BULLET
+                0x2619, # REVERSED ROTATED FLORAL HEART BULLET; see Fleuron (typography)
+                0x2765, # ROTATED HEAVY BLACK HEART BULLET
+                0x2767, # ROTATED FLORAL HEART BULLET; see Fleuron (typography)
+                0x29be, # CIRCLED WHITE BULLET (&olcir;)
+                0x29bf, # CIRCLED BULLET (&ofcir;)
+
+                # Additional.
+                0x25aa, # Black small square, square bullet.
+                0xf0b7, # "Private Use Character" but seems to be used by libreoffice for bullets.
+                )
+    else:
+        bullets2 = bullets
+    if bullets:
+        if len(text)==1:
+            c = text[0]
+            cc = ord(c)
+            if cc in bullets2:
+                if bullets is True and cc == 0xf0b7:
+                    return chr(0x2022)
+                return text
+    if numbers:
+        for c in text:
+            if isinstance(c, list):
+                c = c[0]
+            if c not in '0123456789':
+                break
+        else:
+            return text
diff --git a/pdf2docx/converter.py b/pdf2docx/converter.py
index 8da7e8a..337d9b3 100644
--- a/pdf2docx/converter.py
+++ b/pdf2docx/converter.py
@@ -106,7 +106,9 @@ def default_settings(self):
             'extract_stream_table'           : False,  # don't consider stream table when extracting tables
             'parse_lattice_table'            : True,   # whether parse lattice table or not; may destroy the layout if set False
             'parse_stream_table'             : True,   # whether parse stream table or not; may destroy the layout if set False
-            'delete_end_line_hyphen'         : False   # delete hyphen at the end of a line
+            'delete_end_line_hyphen'         : False,  # delete hyphen at the end of a line
+            'raw_exceptions'                 : False,  # Don't swallow exceptions
+            'list_not_table'                 : True,   # Avoid treating bullet list as table.
         }
 
     # -----------------------------------------------------------------------
@@ -182,6 +184,8 @@ def parse_pages(self, **kwargs):
             try:
                 page.parse(**kwargs)
             except Exception as e:
+                if kwargs['raw_exceptions']:
+                    raise
                 if not kwargs['debug'] and kwargs['ignore_page_error']:
                     logging.error('Ignore page %d due to parsing page error: %s', pid, e)
                 else:
@@ -224,6 +228,8 @@ def make_docx(self, filename_or_stream=None, **kwargs):
             try:
                 page.make_docx(docx_file)
             except Exception as e:
+                if kwargs['raw_exceptions']:
+                    raise
                 if not kwargs['debug'] and kwargs['ignore_page_error']:
                     logging.error('Ignore page %d due to making page error: %s', pid, e)
                 else:
diff --git a/pdf2docx/layout/Blocks.py b/pdf2docx/layout/Blocks.py
index a3ddff5..7f5e391 100644
--- a/pdf2docx/layout/Blocks.py
+++ b/pdf2docx/layout/Blocks.py
@@ -8,7 +8,7 @@
 from docx.shared import Pt
 from ..common import constants
 from ..common.Collection import ElementCollection
-from ..common.share import (BlockType, lower_round, rgb_value)
+from ..common.share import (BlockType, lower_round, rgb_value, is_list_item)
 from ..common.Block import Block
 from ..common.docx import (reset_paragraph_format, delete_paragraph)
 from ..text.TextBlock import TextBlock
@@ -176,7 +176,7 @@ def assign_to_tables(self, tables:list):
         self.reset(blocks)
 
 
-    def collect_stream_lines(self, potential_shadings:list, line_separate_threshold:float):
+    def collect_stream_lines(self, potential_shadings:list, line_separate_threshold:float, **kwargs):
         '''Collect elements in Line level (line or table bbox), which may contained in a stream table region.
         
         Table may exist on the following conditions:
@@ -230,11 +230,14 @@ def close_table():
             bbox = row.bbox
 
             # flow layout or not?
-            if not row.is_flow_layout(line_separate_threshold, cell_layout=cell_layout): 
-                table_lines.extend([sub_line(block) for block in row])
-
-            else:
+            if row.is_flow_layout(line_separate_threshold, cell_layout=cell_layout):
                 close_table()
+            elif kwargs.get('list_not_table') and is_list_item(row[0].text):
+                 # Don't interpret list-style bullet characters/numbers as
+                 # indicating a table.
+                 close_table()
+            else:
+                table_lines.extend([sub_line(block) for block in row])
 
             # contained in shading or not?
             for block in row:
diff --git a/pdf2docx/main.py b/pdf2docx/main.py
index e4b187b..717f6cf 100644
--- a/pdf2docx/main.py
+++ b/pdf2docx/main.py
@@ -41,6 +41,8 @@ def convert(pdf_file:str,
             cv.convert(docx_file, start, end, pages, **kwargs)
         except Exception as e:
             logging.error(e)
+            if kwargs['raw_exceptions']:
+                raise
         finally:
             cv.close()
 
diff --git a/pdf2docx/page/RawPageFitz.py b/pdf2docx/page/RawPageFitz.py
index 52dcaa9..4158e9a 100644
--- a/pdf2docx/page/RawPageFitz.py
+++ b/pdf2docx/page/RawPageFitz.py
@@ -4,6 +4,7 @@
 A wrapper of PyMuPDF Page as page engine.
 '''
 
+import fitz
 import logging
 from .RawPage import RawPage
 from ..image.ImagesExtractor import ImagesExtractor
@@ -22,6 +23,7 @@ def extract_raw_dict(self, **settings):
         if not self.page_engine: return raw_dict
 
         # actual page size
+        # `self.page_engine` is the `fitz.Page`.
         *_, w, h = self.page_engine.rect # always reflecting page rotation
         raw_dict.update({ 'width' : w, 'height': h })
         self.width, self.height = w, h
@@ -59,7 +61,15 @@ def _preprocess_text(self, **settings):
         if ocr==1: raise SystemExit("OCR feature is planned but not implemented yet.")
 
         # all text blocks no matter hidden or not
-        raw = self.page_engine.get_text('rawdict', flags=64)
+        sort = settings.get('sort')
+        raw = self.page_engine.get_text(
+                'rawdict',
+                flags=0
+                    | fitz.TEXT_MEDIABOX_CLIP
+                    | fitz.TEXT_CID_FOR_UNKNOWN_UNICODE
+                    ,
+                sort=sort,
+                )
         text_blocks = raw.get('blocks', [])
 
         # potential UnicodeDecodeError issue when trying to filter hidden text:
diff --git a/pdf2docx/table/Cell.py b/pdf2docx/table/Cell.py
index e2d3aff..cff0124 100644
--- a/pdf2docx/table/Cell.py
+++ b/pdf2docx/table/Cell.py
@@ -25,8 +25,11 @@ def text(self):
         '''Text contained in this cell.'''
         if not self: return None
         # NOTE: sub-table may exists in
-        return '\n'.join([block.text if block.is_text_block else '<NEST TABLE>'
-                                 for block in self.blocks])
+        # fixme: prev code did `if block.is_text_block`, but sometimes
+        # there is no `is_text_block` member; would be good to ensure
+        # this member is always present and avoid use of `hasattr()`.
+        return '\n'.join([block.text if hasattr(block, 'text') else '<NEST TABLE>'
+                                for block in self.blocks])
 
 
     @property
@@ -75,7 +78,12 @@ def make_docx(self, table, indexes):
         docx_cell = table.cell(i, j)
         if n_row*n_col!=1:
             _cell = table.cell(i+n_row-1, j+n_col-1)
-            docx_cell.merge(_cell)
+            try:
+                docx_cell.merge(_cell)
+            except Exception as e:
+                def show(c):
+                    return f'[_tc.top={c._tc.top} _tc.bottom={c._tc.bottom}]'
+                raise Exception(f'Failed to merge docx_cell={show(docx_cell)} _cell={show(_cell)}. {i=} {j=} {n_row=} {n_col=}') from e
 
         # ---------------------
         # cell width (cell height is set by row height)
diff --git a/pdf2docx/table/TablesConstructor.py b/pdf2docx/table/TablesConstructor.py
index 546b7e5..3faffdd 100644
--- a/pdf2docx/table/TablesConstructor.py
+++ b/pdf2docx/table/TablesConstructor.py
@@ -379,4 +379,4 @@ def _inner_borders(lines:Lines, outer_borders:tuple):
                 borders_ = TablesConstructor._inner_borders(rows_lines[j], (top, bottom, left, right))
                 borders.extend(borders_)
 
-        return borders
\ No newline at end of file
+        return borders
diff --git a/pdf2docx/text/Lines.py b/pdf2docx/text/Lines.py
index c9223e6..20b5ae9 100644
--- a/pdf2docx/text/Lines.py
+++ b/pdf2docx/text/Lines.py
@@ -11,6 +11,7 @@
 from ..common.Collection import ElementCollection
 from ..common.share import TextAlignment
 from ..common import constants
+from ..common.share import is_list_item
 
 
 class Lines(ElementCollection):
@@ -33,6 +34,11 @@ def restore(self, raws:list):
         return self
 
 
+    def text(self):
+        '''For debugging.'''
+        return '\n'.join([line.text for line in self])
+
+
     @property
     def image_spans(self):
         '''Get all ImageSpan instances.'''
@@ -72,8 +78,12 @@ def split_vertically_by_text(self, line_break_free_space_ratio:float, new_paragr
             end_of_sen = row[-1].text.strip().endswith(punc)
             w =  row[-1].bbox[2]-row[0].bbox[0]
 
+            if 0 and is_list_item(row[0].text[0]):
+                # Treat bullet list items as separate paragraphs.
+                start_of_para = True
+            
             # end of a sentense and free space at the end -> end of paragraph
-            if end_of_sen and w/W <= 1.0-line_break_free_space_ratio:
+            elif end_of_sen and w/W <= 1.0-line_break_free_space_ratio:
                 end_of_para = True
 
             # start of sentence and free space at the start -> start of paragraph
diff --git a/pdf2docx/text/TextBlock.py b/pdf2docx/text/TextBlock.py
index 46cd531..af111e1 100644
--- a/pdf2docx/text/TextBlock.py
+++ b/pdf2docx/text/TextBlock.py
@@ -468,4 +468,4 @@ def external_alignment():
         if alignment==TextAlignment.LEFT or alignment==TextAlignment.JUSTIFY:
             self.first_line_space = rows[0][0].bbox[idx0] - rows[1][0].bbox[idx0]
         
-        return alignment
\ No newline at end of file
+        return alignment
diff --git a/setup.py b/setup.py
index c4c05f9..5de3bce 100644
--- a/setup.py
+++ b/setup.py
@@ -28,19 +28,11 @@ def load_long_description(fname):
 
 def load_requirements(fname):
     '''Load requirements.'''
-    try:
-        # pip >= 10.0
-        from pip._internal.req import parse_requirements
-    except ImportError:
-        # pip < 10.0
-        from pip.req import parse_requirements
-
-    reqs = parse_requirements(fname, session=False)
-    try:
-        requirements = [str(ir.requirement) for ir in reqs]
-    except AttributeError:
-        requirements = [str(ir.req) for ir in reqs]
-    return requirements
+    ret = list()
+    with open(fname) as f:
+        for line in f:
+            ret.append(line)
+    return ret
 
 
 setup(
diff --git a/test/samples/demo-whisper_2_3.pdf b/test/samples/demo-whisper_2_3.pdf
new file mode 100644
index 0000000..0e6d1df
Binary files /dev/null and b/test/samples/demo-whisper_2_3.pdf differ
diff --git a/test/samples/pdf2docx-lists-bullets3.docx b/test/samples/pdf2docx-lists-bullets3.docx
new file mode 100644
index 0000000..76466f0
Binary files /dev/null and b/test/samples/pdf2docx-lists-bullets3.docx differ
diff --git a/test/test.py b/test/test.py
index 2a174d3..bc7a7ff 100644
--- a/test/test.py
+++ b/test/test.py
@@ -1,36 +1,48 @@
 '''
 The test framework: pytest, pytest-cov.
 
-To test the pdf conversion and converting quality, the idea is to convert generated docx to pdf,
-then check the image similarity between source pdf page and converted pdf page. Considering the 
-converting quality from docx to pdf, a Windows-based command line tool `OfficeToPDF` is used, in
-addition, an installation of Microsoft Word is required.
-
-To leverage the benefit of Github Action, the testing process is divided into three parts:
-  1. Convert sample pdf to docx with `pdf2docx`.
-  2. Convert generated docx to pdf for comparing.
-  3. Convert page to image and compare similarity with `python-opencv`.
-
-Test scripts on Part One and Three are applied with two test class respectively in this module,
-so they could be run separately with pytest command, e.g.
-
-- pytest -vs --no-header test.py::TestConversion for Part One
-- pytest -vs --no-header test.py::TestQuality for Part Three
-
-Links on MS Word to PDF conversion:
-  - https://github.com/cognidox/OfficeToPDF/releases
-  - https://github.com/AndyCyberSec/pylovepdf
-  - https://www.e-iceblue.com/Tutorials/Java/Spire.Doc-for-Java/Program-Guide/Conversion/Convert-Word-to-PDF-in-Java.html
+We have a set of PDF files as test inputs.
+
+For a test file foo.pdf, we convert it into a file foo.pdf.docx using pdf2docx.
+
+To check whether this has worked as expected, we use Python package docx2pdf
+(which uses Word) on Windows, or Libreoffice command line on other platforms,
+to convert foo.pdf.docx into foo.pdf.docx.pdf.
+
+We then compare foo.pdf.docx.pdf with the original foo.pdf file using opencv,
+generating a similarity value.
+
+So on Windows we require Word is installed, and on other platforms we require
+that Libreoffice is installed.
+
+If docx2pdf fails with `Object reference not set to an instance of an
+object. Did not convert`, it might be necessary to follow the instructions at:
+
+    https://stackoverflow.com/questions/24860351/object-reference-not-set-to-an-instance-of-an-object-did-not-convert
+
+    In a Cmd window run:
+        DCOMCNFG
+    Then:
+        Console Root > Component Services > Computers > My Computer > DCOM Config > Microsoft Word 97 - 2003 Document
+    Then: Right click then properties then Identity tab and set a username and
+    password.
 '''
 
+import glob
 import os
 import io
 import numpy as np
 import cv2 as cv
 import fitz
 from pdf2docx import Converter, parse
+import subprocess
+import time
+import shutil
+import platform
+import pytest
 
 
+root_path = os.path.abspath(f'{__file__}/../..')
 script_path = os.path.abspath(__file__) # current script path
 test_dir = os.path.dirname(script_path)
 sample_path = os.path.join(test_dir, 'samples')
@@ -99,6 +111,89 @@ def get_mssism(i1, i2, kernel=(15,15)):
     return np.mean(mssim[0:3])
 
 
+def run(command):
+   print(f'Running: {command}')
+   subprocess.run(command, shell=1, check=1)
+
+
+def document_to(in_, out):
+    if platform.system() == 'Windows':
+        return word_to(in_, out)
+    else:
+        return libreoffice_to(in_, out)
+
+
+_g_word_to_docx2pdf = False
+
+def word_to(in_, out):
+    global _g_word_to_docx2pdf
+    if not _g_word_to_docx2pdf:
+        run('pip install docx2pdf')
+        import docx2pdf
+        _g_word_to_docx2pdf = True
+    assert os.path.isfile(in_), f'Not a file: {in_=}'
+    run(f'docx2pdf {in_} {out}')
+    return
+    import docx2pdf
+    try:
+        docx2pdf.convert(in_, out)
+    except Exception as e:
+        print(f'docx2pdf.convert() raised exception: {e}')
+        raise
+    
+
+
+def libreoffice_to(in_, out):
+    '''Converts file to pdf using libreoffice. Returns generated path
+    f'{in_}.pdf'.'''
+    # Libreoffice does not allow direct specification of the output path and
+    # goes wrong wtih paths with multiple '.' characters, so we work on a
+    # temporary. Also it does not return non-zero if it fails so we check
+    # mtime.
+    #print(f'{in_=} {out=}')
+    assert os.path.isfile(in_)
+    in_root, in_ext = os.path.splitext(in_)
+    _, out_ext = os.path.splitext(out)
+    out_dir = os.path.dirname(out)
+    temp = f'{out_dir}/_temp_libreoffice_to'
+    in2 = f'{temp}{in_ext}'
+    out2 = f'{temp}{out_ext}'
+    shutil.copy2(in_, in2)
+    try:
+        t = time.time()
+        #print(f'{in_=} {in2=} {in_ext=}')
+        run(f'libreoffice --convert-to {out_ext[1:]} --outdir {out_dir} {in2}')
+        os.rename(out2, out)
+        t_out = os.path.getmtime(out)
+        assert t_out >= t, f'libreoffice failed to update/create {out=}'
+    finally:
+        os.remove(in2)
+        if os.path.isfile(out2):
+            os.remove(out2)
+
+
+def compare_pdf(pdf1, pdf2, num_pages=None):
+    #print(f'Comparing {pdf1=} {pdf2=}')
+    with fitz.Document(pdf1) as doc1, fitz.Document(pdf2) as doc2:
+        if num_pages:
+            n1 = num_pages
+        else:
+            n1 = len(doc1)
+            n2 = len(doc2)
+            if n1 != n2:
+                print(f'Differing numbers of pages: {n1=} {n2=}.')
+                return -1
+        sidx = 0
+        # Find average similarity.
+        for n in range(n1):
+            diff_png = f'{pdf2}.diff.{n}.png'
+            sidx_n = get_page_similarity(doc1[n], doc2[n], diff_png)
+            #print(f'Page {n}: {diff_png} {sidx_n=}.')
+            sidx += sidx_n
+        sidx /= n1
+        #print(f'{sidx=}')
+        return sidx
+
 
 class TestConversion:
     '''Test the converting process.'''
@@ -129,135 +224,6 @@ def convert_by_io_stream(self, filename):
         docx_file = os.path.join(output_path, f'{filename}.docx')
         with open(docx_file, 'wb') as f: f.write(out_stream.getvalue())
 
-    # ------------------------------------------
-    # stream
-    # ------------------------------------------
-    def test_io_stream(self):
-        '''test input/output file stream.'''
-        self.convert_by_io_stream('demo-text')
-
-    # ------------------------------------------
-    # layout: section
-    # ------------------------------------------
-    def test_section(self):
-        '''test page layout: section and column.'''
-        self.convert('demo-section')
-
-    def test_section_spacing(self):
-        '''test page layout: section vertical position.'''
-        self.convert('demo-section-spacing')
-
-    # ------------------------------------------
-    # text styles
-    # ------------------------------------------
-    def test_blank_file(self):
-        '''test blank file without any texts or images.'''
-        self.convert('demo-blank')
-
-    def test_text_format(self):
-        '''test text format, e.g. highlight, underline, strike-through.'''
-        self.convert('demo-text')
-
-    def test_text_alignment(self):
-        '''test text alignment.'''
-        self.convert('demo-text-alignment')    
-
-    def test_unnamed_fonts(self):
-        '''test unnamed fonts which destroys span bbox, and accordingly line/block layout.'''
-        self.convert('demo-text-unnamed-fonts')
-
-    def test_text_scaling(self):
-        '''test font size. In this case, the font size is set precisely with character scaling.'''
-        self.convert('demo-text-scaling')
-
-    def test_text_hidden(self):
-        '''test hidden text, which is ignore by default.'''
-        self.convert('demo-text-hidden')
-
-    # ------------------------------------------
-    # image styles
-    # ------------------------------------------
-    def test_image(self):
-        '''test inline-image.'''
-        self.convert('demo-image')
-
-    def test_vector_graphic(self):
-        '''test vector graphic.'''
-        self.convert('demo-image-vector-graphic')
-
-    def test_image_color_space(self):
-        '''test image color space.'''
-        self.convert('demo-image-colorspace')
-
-    def test_image_floating(self):
-        '''test floating images.'''
-        self.convert('demo-image-floating')
-
-    def test_image_rotation(self):
-        '''test rotating image due to pdf page rotation.'''
-        self.convert('demo-image-rotation')
-
-    def test_image_overlap(self):
-        '''test images with both intersection and page rotation.'''
-        self.convert('demo-image-overlap')
-
-
-    # ------------------------------------------
-    # table styles
-    # ------------------------------------------
-    def test_table_bottom(self):
-        '''page break due to table at the end of page.'''
-        self.convert('demo-table-bottom')
-
-    def test_table_format(self):
-        '''test table format, e.g.
-            - border and shading style
-            - vertical cell
-            - merged cell
-            - text format in cell
-        '''
-        self.convert('demo-table')
-
-    def test_stream_table(self):
-        '''test stream structure and shading.'''
-        self.convert('demo-table-stream')
-
-    def test_table_shading(self):
-        '''test simulating shape with shading cell.'''
-        self.convert('demo-table-shading')
-
-    def test_table_shading_highlight(self):
-        '''test distinguishing table shading and highlight.'''
-        self.convert('demo-table-shading-highlight')
-
-    def test_lattice_table(self):
-        '''test lattice table with very close text underlines to table borders.'''
-        self.convert('demo-table-close-underline')
-
-    def test_lattice_table_invoice(self):
-        '''test invoice sample file with lattice table, vector graphic.'''
-        self.convert('demo-table-lattice')
-
-    def test_lattice_cell(self):
-        '''test generating stream borders for lattice table cell.'''
-        self.convert('demo-table-lattice-one-cell')
-
-    def test_table_border_style(self):
-        '''test border style, e.g. width, color.'''
-        self.convert('demo-table-border-style')
-
-    def test_table_align_borders(self):
-        '''aligning stream table borders to simplify table structure.'''
-        self.convert('demo-table-align-borders')
-
-    def test_nested_table(self):
-        '''test nested tables.'''
-        self.convert('demo-table-nested')
-
-    def test_path_transformation(self):
-        '''test path transformation. In this case, the (0,0) origin is out of the page.'''
-        self.convert('demo-path-transformation')
-
 
     # ------------------------------------------
     # table contents
@@ -297,66 +263,103 @@ def test_multi_pages(self):
         assert os.path.isfile(docx_file)
 
 
+# We make a separate pytest test for each sample file.
+
+def _find_paths():
+    ret = list()
+    for path in glob.glob(f'{sample_path}/*.docx') + glob.glob(f'{sample_path}/*.pdf'):
+        path_leaf = os.path.basename(path)
+        if path_leaf.count('.') > 1:
+            continue
+        ret.append(os.path.relpath(path, root_path))
+    return ret
+
+g_paths = _find_paths()
+
+# We create a separate pytest for each sample file, paramaterised using the
+# path of the sample file relative to the pdf2docx directory.
+#
+# So one can run a specific test with:
+#
+# pytest pdf2docx/test/test.py::test_one[test/samples/demo-whisper_2_3.pdf]
 
-class TestQuality:
+@pytest.mark.parametrize('path', g_paths)
+def test_one(path):
     '''Check the quality of converted docx.
-    Note the docx files must be converted to PDF files in advance.
     '''
-
-    INDEX_MAP = {
+    
+    # Where there are two values, they are (sidx_required_word,
+    # sidx_required_libreoffice).
+    #
+    docx_to_sidx_required = {
         'demo-blank.pdf': 1.0,
         'demo-image-cmyk.pdf': 0.90,
         'demo-image-transparent.pdf': 0.90,
-        'demo-image-vector-graphic.pdf': 0.89,
+        'demo-image-vector-graphic.pdf': (0.89, 0.68),
         'demo-image.pdf': 0.90,
-        'demo-image-rotation.pdf': 0.90,
-        'demo-image-overlap.pdf': 0.90,
-        'demo-path-transformation.pdf': 0.90,
-        'demo-section-spacing.pdf': 0.90,
-        'demo-section.pdf': 0.70,
+        'demo-image-rotation.pdf': (0.90, 0.82),
+        'demo-image-overlap.pdf': (0.90, 0.70),
+        'demo-path-transformation.pdf': (0.89, 0.60),
+        'demo-section-spacing.pdf': (0.90, 0.86),
+        'demo-section.pdf': (0.70, 0.45),
         'demo-table-align-borders.pdf': 0.49,
-        'demo-table-border-style.pdf': 0.90,
+        'demo-table-border-style.pdf': (0.90, 0.89),
         'demo-table-bottom.pdf': 0.90,
-        'demo-table-close-underline.pdf': 0.58,
-        'demo-table-lattice-one-cell.pdf': 0.79,
-        'demo-table-lattice.pdf': 0.75,
+        'demo-table-close-underline.pdf': (0.57, 0.49),
+        'demo-table-lattice-one-cell.pdf': (0.79, 0.75),
+        'demo-table-lattice.pdf': (0.75, 0.59),
         'demo-table-nested.pdf': 0.84,
-        'demo-table-shading-highlight.pdf': 0.55,
-        'demo-table-shading.pdf': 0.80,
+        'demo-table-shading-highlight.pdf': (0.55, 0.45),
+        'demo-table-shading.pdf': (0.80, 0.60),
         'demo-table-stream.pdf': 0.55,
-        'demo-table.pdf': 0.90,
-        'demo-text-alignment.pdf': 0.90,
-        'demo-text-scaling.pdf': 0.80,
-        'demo-text-unnamed-fonts.pdf': 0.80,
+        'demo-table.pdf': (0.90, 0.75),
+        'demo-text-alignment.pdf': (0.90, 0.86),
+        'demo-text-scaling.pdf': (0.80, 0.65),
+        'demo-text-unnamed-fonts.pdf': (0.80, 0.77),
         'demo-text-hidden.pdf': 0.90,
-        'demo-text.pdf': 0.80
+        'demo-text.pdf': 0.80,
+        'pdf2docx-lists-bullets3.docx': (0.98, 0.99),
     }
 
-    def setup(self):
-        '''create output path if not exist.'''
-        if not os.path.exists(output_path): os.mkdir(output_path)
-
-
-    def test_quality(self):
-        '''Convert page to image and compare similarity.'''
-        for filename in os.listdir(output_path):
-            if not filename.endswith('pdf'): continue
-
-            source_pdf_file = os.path.join(sample_path, filename)
-            target_pdf_file = os.path.join(output_path, filename)
-
-            # open pdf    
-            source_pdf = fitz.open(source_pdf_file)
-            target_pdf = fitz.open(target_pdf_file)
-
-            # compare page count
-            if len(source_pdf)>1: continue # one page sample only
-            assert len(target_pdf)==1, f"\nThe page count of {filename} is incorrect."
-
-            # compare the first page
-            diff_png = os.path.join(output_path, f'{filename[:-4]}.png')
-            sidx = get_page_similarity(target_pdf[0], source_pdf[0], diff_png)
-            threshold = TestQuality.INDEX_MAP.get(filename, 0.10)
-            print(f'Checking {filename}: {sidx} v.s. {threshold}')
-            assert sidx>=threshold, 'Significant difference might exist since similarity index is lower than threshold.'
-
+    print(f'# Looking at: {path}')
+    if os.path.basename(path) == 'demo-whisper_2_3.pdf':
+        print(f'Ignoring {path=} because known to fail.')
+        return
+    path = f'{root_path}/{path}'
+    path_leaf = os.path.basename(path)
+    _, ext = os.path.splitext(path)
+    if ext == '.docx':
+        pdf = f'{path}.pdf'
+        document_to(path, pdf)
+    else:
+        pdf = path
+    docx2 = f'{pdf}.docx'
+    pages = None
+    if os.path.basename(path) == 'demo-whisper_2_3.pdf':
+        pages = [25, 26, 27]
+    else:
+        with fitz.Document(pdf) as doc:
+            if len(doc) > 1:
+                print(f'Not testing because more than one page: {path}')
+                return
+    #print(f'Calling parse() {pdf=} {docx2=}')
+    parse(pdf, docx2, pages=pages, raw_exceptions=True)
+    assert os.path.isfile(docx2)
+    pdf2 = f'{docx2}.pdf'
+    document_to(docx2, pdf2)
+    sidx = compare_pdf(pdf, pdf2, num_pages=1)
+
+    sidx_required = docx_to_sidx_required.get(path_leaf)
+    if sidx_required:
+        if isinstance(sidx_required, tuple):
+            sr_word, sr_libreoffice = sidx_required
+            sidx_required = sr_word if platform.system() == 'Windows' else sr_libreoffice
+
+        #print(f'{path=}: {sidx_required=} {sidx=}.')
+        if sidx < sidx_required:
+            print(f'{sidx=} too low - should be >= {sidx_required=}')
+            print(f'    {pdf}')
+            print(f'    {pdf2}')
+            assert 0
+    else:
+        print(f'# No sidx_required available for {path_leaf=}.')