Skip to content

Commit

Permalink
feat(appellate): Handle Attachment Pages in download_pdf
Browse files Browse the repository at this point in the history
This commit improves the download_pdf method to correctly handle cases where attachment pages are returned instead of the expected PDF documents.
  • Loading branch information
ERosendo committed Jan 24, 2025
1 parent 0c7c2d5 commit 0585115
Showing 1 changed file with 17 additions and 7 deletions.
24 changes: 17 additions & 7 deletions juriscraper/pacer/appellate_docket.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,24 @@ def download_pdf(
)
r = self.session.get(self.url, params=query_params)
r.raise_for_status()
if is_pdf(r):
logger.info(
"Got PDF binary data for document #%s in court %s",
pacer_doc_id,
self.court_id,

if b"Documents are attached to this filing" in r.content:
error_message = (
"This PACER document is part of an attachment page. "
"Our system currently lacks the metadata for this attachment. "
"Please purchase the attachment page and try again."
)
return r, ""
return None, "Unable to download PDF."
return None, error_message

if not is_pdf(r):
return None, "Unable to download PDF."

logger.info(
"Got PDF binary data for document #%s in court %s",
pacer_doc_id,
self.court_id,
)
return r, ""

@property
def metadata(self):
Expand Down

0 comments on commit 0585115

Please sign in to comment.