From 6c48e554bb3b0dfb38872178a25d2e02465480b2 Mon Sep 17 00:00:00 2001 From: odhran-o-d <39832722+odhran-o-d@users.noreply.github.com> Date: Mon, 24 Jul 2023 20:06:36 +0100 Subject: [PATCH] made sure metadata name was updated with citation (#168) * made sure metadata name was updated with citation * added location awareness test --- paperqa/docs.py | 4 ++-- paperqa/version.py | 2 +- tests/test_paperqa.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/paperqa/docs.py b/paperqa/docs.py index b2cf76b3..f7ec3fed 100644 --- a/paperqa/docs.py +++ b/paperqa/docs.py @@ -453,7 +453,7 @@ async def process(match): try: citation = match.metadata["doc"]["citation"] if detailed_citations: - match.metadata["name"] + ": " + citation + citation = match.metadata["name"] + ": " + citation context = await summary_chain.arun( question=answer.question, # Add name so chunk is stated @@ -493,7 +493,7 @@ async def process(match): context_str = "\n\n".join( [ f"{c.text.name}: {c.context}" - + (f". Based on {c.text.doc.citation}" if detailed_citations else "") + + (f" Based on {c.text.doc.citation}" if detailed_citations else "") for c in answer.contexts ] ) diff --git a/paperqa/version.py b/paperqa/version.py index 903a158a..a5cfdf59 100644 --- a/paperqa/version.py +++ b/paperqa/version.py @@ -1 +1 @@ -__version__ = "3.4.0" +__version__ = "3.4.1" diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py index 3155e3aa..44071c84 100644 --- a/tests/test_paperqa.py +++ b/tests/test_paperqa.py @@ -23,6 +23,22 @@ async def on_llm_new_token(self, token: str, **kwargs: Any) -> None: print(token) +def test_location_awareness(): + tests_dir = os.path.dirname(os.path.abspath(__file__)) + doc_path = os.path.join(tests_dir, "paper.pdf") + with open(doc_path, "rb") as f: + docs = Docs() + docs.add_file(f, "Wellawatte et al, XAI Review, 2023") + answer = docs.get_evidence( + Answer( + question="Which page is the statement 'Deep learning (DL) is advancing the boundaries of computational" + + "chemistry because it can accurately model non-linear structure-function relationships.' on?" + ), + detailed_citations=True, + ) + assert "2" in answer.context or "1" in answer.context + + def test_maybe_is_text(): assert maybe_is_text("This is a test. The sample conc. was 1.0 mM (at 245 ^F)") assert not maybe_is_text("\\C0\\C0\\B1\x00")