Skip to content

Commit

Permalink
Stringfy all children (close #158)
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael-E-Rose committed Dec 27, 2024
1 parent 374a336 commit eaa3280
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 17 deletions.
8 changes: 4 additions & 4 deletions pubmed_parser/medline_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,10 +579,10 @@ def parse_article_info(
medline = pubmed_article.find("MedlineCitation")
article = medline.find("Article")

if article.find("ArticleTitle") is not None:
title = stringify_children(article.find("ArticleTitle")).strip() or ""
else:
title = ""
try:
title = stringify_children(article.find("ArticleTitle")) or None
except AttributeError:
title = None

if article.find("Journal/JournalIssue/Volume") is not None:
volume = article.find("Journal/JournalIssue/Volume").text or ""
Expand Down
2 changes: 1 addition & 1 deletion pubmed_parser/pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def parse_pubmed_caption(path):

fig_captions = fig.find("caption")
if fig_captions is not None:
fig_captions = fig_captions.getchildren()
fig_captions = fig_captions.getchildren()[:1]
caption = " ".join([stringify_children(c) for c in fig_captions])

graphic = fig.find("graphic")
Expand Down
12 changes: 2 additions & 10 deletions pubmed_parser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,8 @@ def read_xml(path, nxml=False):


def stringify_children(node):
"""
Filters and removes possible Nones in texts and tails
ref: http://stackoverflow.com/questions/4624062/get-all-text-inside-a-tag-in-lxml
"""
parts = (
[node.text]
+ list(chain(*([c.text, c.tail] for c in node.getchildren())))
+ [node.tail]
)
return "".join(filter(None, parts))
"""Joins all string parts excluding empty parts."""
return "".join(text.strip() for text in node.itertext() if text)


def stringify_affiliation(node):
Expand Down
10 changes: 8 additions & 2 deletions tests/test_pubmed_oa_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,18 @@ def test_parse_pubmed_caption():

def test_parse_pubmed_caption_content():
"""This is a test for the caption content."""
fig_caption = 'Aerosol delivery of sACE22.v2.4‐IgG1 alleviates lung injury and improves survival of SARS‐CoV‐2 gamma variant infected K18‐hACE2 transgenic mice \n\n'
fig_caption = 'Aerosol delivery of sACE22.v2.4‐IgG1 alleviates lung injury and improves survival of SARS‐CoV‐2 gamma variant infected K18‐hACE2 transgenic mice'
assert captions_9539395[0]['fig_caption'] == fig_caption
assert captions_9539395[0]['fig_id'] == 'emmm202216109-fig-0001'
assert captions_9539395[0]['fig_label'] == 'Figure 1'
assert captions_9539395[8]['fig_label'] is None
fig_list_items = [('A', 'K18‐hACE2 transgenic mice were inoculated with SARS‐CoV‐2 isolate /Japan/TY7‐503/2021 (gamma variant) at 1\u2009×\u2009104 PFU. sACE22.v2.4‐IgG1 (7.5\u2009ml at 8.3\u2009mg/ml in PBS) was delivered to the mice by a nebulizer in 25\u2009min at 12\u2009h, 48\u2009h, and 84\u2009h postinoculation. PBS was aerosol delivered as control.'), ('B, C', 'Survival (B) and weight loss (C). N\u2009=\u200910 mice for each group. The P‐value of the survival curve by the Gehan–Breslow–Wilcoxon test is shown. Error bars for mouse weight are centered on the mean and show SEM.'), ('D', "Viral load in the lung was measured by RT–qPCR on Day 7. The mRNA expression levels of SARS‐CoV‐2 Spike, Nsp, and Rdrp are normalized to the housekeeping gene peptidylprolyl isomerase A (Ppia). Data are presented as mean\u2009±\u2009SEM, N\u2009=\u20094 mice per group. *P\u2009<\u20090.05 by the unpaired Student's t‐test with two‐sided."), ('E', "Cytokine expression levels of Tnfa, Ifng, Il1a, and Il1b were measured by RT–qPCR normalized by Ppia. Data are presented as mean\u2009±\u2009SEM, N\u2009=\u20094 mice per group. *P\u2009<\u20090.05 by the unpaired Student's t‐test with two‐sided."), ('F, G', 'Representative H&E staining of lung sections on Day 7 postinoculation for control PBS group (F) and inhalation of the sACE22.v2.4‐IgG1 group (G). Images at left are low magnifications. Boxed regions (black) are shown at higher magnification on the right. Lungs from 4 independent mice were sectioned, stained, and imaged.')]
fig_list_items = [
('A', 'K18‐hACE2 transgenic mice were inoculated with SARS‐CoV‐2 isolate /Japan/TY7‐503/2021 (gamma variant) at 1\u2009×\u2009104PFU. sACE22.v2.4‐IgG1 (7.5\u2009ml at 8.3\u2009mg/ml in PBS) was delivered to the mice by a nebulizer in 25\u2009min at 12\u2009h, 48\u2009h, and 84\u2009h postinoculation. PBS was aerosol delivered as control.'),
('B, C', 'Survival (B) and weight loss (C).N=\u200910 mice for each group. The P‐value of the survival curve by the Gehan–Breslow–Wilcoxon test is shown. Error bars for mouse weight are centered on the mean and show SEM.'),
('D', "Viral load in the lung was measured by RT–qPCR on Day 7. The mRNA expression levels of SARS‐CoV‐2 Spike, Nsp, and Rdrp are normalized to the housekeeping gene peptidylprolyl isomerase A (Ppia). Data are presented as mean\u2009±\u2009SEM,N=\u20094 mice per group. *P<\u20090.05 by the unpaired Student'st‐test with two‐sided."),
('E', "Cytokine expression levels of Tnfa, Ifng, Il1a, and Il1b were measured by RT–qPCR normalized by Ppia. Data are presented as mean\u2009±\u2009SEM,N=\u20094 mice per group. *P<\u20090.05 by the unpaired Student'st‐test with two‐sided."),
('F, G', 'Representative H&E staining of lung sections on Day 7 postinoculation for control PBS group (F) and inhalation of the sACE22.v2.4‐IgG1 group (G). Images at left are low magnifications. Boxed regions (black) are shown at higher magnification on the right. Lungs from 4 independent mice were sectioned, stained, and imaged.')
]
assert captions_9539395[0]['fig_list-items'] == fig_list_items
assert captions_9539395[0]['graphic_ref'] == 'EMMM-14-e16109-g008'
assert captions_9539395[8]['graphic_ref'] is None
Expand Down

0 comments on commit eaa3280

Please sign in to comment.