Skip to content

Commit

Permalink
Printing the input text that led to bad JSON parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbraza committed Oct 23, 2024
1 parent 96dca14 commit f96859e
Showing 1 changed file with 9 additions and 13 deletions.
22 changes: 9 additions & 13 deletions paperqa/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,25 @@
def llm_parse_json(text: str) -> dict:
"""Read LLM output and extract JSON data from it."""
# fetch from markdown ```json if present
text = text.strip().split("```json")[-1].split("```")[0]
# split anything before the first {
text = "{" + text.split("{", 1)[-1]
# split anything after the last }
text = text.rsplit("}", 1)[0] + "}"
ptext = text.strip().split("```json")[-1].split("```")[0]
# split anything before the first { after the last }
ptext = ("{" + ptext.split("{", 1)[-1]).rsplit("}", 1)[0] + "}"

# escape new lines within strings
def replace_newlines(match: re.Match) -> str:
def escape_newlines(match: re.Match) -> str:
return match.group(0).replace("\n", "\\n")

# Match anything between double quotes
# including escaped quotes and other escaped characters.
# https://regex101.com/r/VFcDmB/1
pattern = r'"(?:[^"\\]|\\.)*"'
text = re.sub(pattern, replace_newlines, text)
ptext = re.sub(pattern, escape_newlines, ptext)
try:
return json.loads(text)
return json.loads(ptext)
except json.JSONDecodeError as e:
raise ValueError(
"Failed to parse JSON. Your model may not "
"be capable of supporting JSON output. Try "
"a different model or with "
"`Settings(prompts={'use_json': False})`"
f"Failed to parse JSON from text {text!r}. Your model may not be capable of"
" supporting JSON output or our parsing technique could use some work. Try"
" a different model or specify `Settings(prompts={'use_json': False})`"
) from e


Expand Down

0 comments on commit f96859e

Please sign in to comment.