Skip to content

Commit

Permalink
Merge pull request #2 from pirovc/dev
Browse files Browse the repository at this point in the history
MultiTax version 1.1.0
  • Loading branch information
pirovc authored May 2, 2021
2 parents f0e962d + 5ee4dfd commit 07bc278
Show file tree
Hide file tree
Showing 17 changed files with 884 additions and 311 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ tax.build_lineages()
tax.leaves("p__Hadarchaeota")
# ['s__DG-33 sp004375695', 's__DG-33 sp001515185', 's__Hadarchaeum yellowstonense', 's__B75-G9 sp003661465', 's__WYZ-LMO6 sp004347925', 's__B88-G9 sp003660555']

# Search names and filter by rank
tax.search_name("Escherichia", exact=False, rank="genus")
# ['g__Escherichia', 'g__Escherichia_C']

# Show stats of loaded tax
tax.stats()
#{'leaves': 31910,
Expand Down Expand Up @@ -212,7 +216,6 @@ Not yet implemented. The goal here is to map different taxonomies if the linkage

## Further ideas

- Advanced name search
- Add/remove/update nodes
- Conversion between taxonomies (write on specific files/format)

Expand Down
6 changes: 3 additions & 3 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="generator" content="pdoc 6.4.1" />
<title>module list &ndash; pdoc 6.4.1</title>
<meta name="generator" content="pdoc 6.4.2" />
<title>module list &ndash; pdoc 6.4.2</title>
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2264%22%20height%3D%2264%22%20viewBox%3D%2244.5%202.5%2015%2015%22%3E%3Cpath%20d%3D%22M49.351%2021.041c-.233-.721-.546-2.408-.772-4.076-.042-.09-.067-.187-.046-.288-.166-1.347-.277-2.625-.241-3.351-1.378-1.008-2.271-2.586-2.271-4.362%200-.976.272-1.935.788-2.774.057-.094.122-.18.184-.268-.033-.167-.052-.339-.052-.516%200-1.477%201.202-2.679%202.679-2.679.791%200%201.496.352%201.987.9a6.3%206.3%200%200%201%201.001.029c.492-.564%201.207-.929%202.012-.929%201.477%200%202.679%201.202%202.679%202.679a2.65%202.65%200%200%201-.269%201.148c.383.747.595%201.572.595%202.41%200%202.311-1.507%204.29-3.635%205.107.037.699.147%202.27.423%203.294l.137.461c.156%202.136-4.612%205.166-5.199%203.215zm.127-4.919a4.78%204.78%200%200%200%20.775-.584c-.172-.115-.505-.254-.88-.378zm.331%202.302l.828-.502c-.202-.143-.576-.328-.984-.49zm.45%202.157l.701-.403c-.214-.115-.536-.249-.891-.376l.19.779zM49.13%204.141c0%20.152.123.276.276.276s.275-.124.275-.276-.123-.276-.276-.276-.275.124-.275.276zm.735-.389a1.15%201.15%200%200%201%20.314.783%201.16%201.16%200%200%201-1.162%201.162c-.457%200-.842-.27-1.032-.653-.026.117-.042.238-.042.362a1.68%201.68%200%200%200%201.679%201.679%201.68%201.68%200%200%200%201.679-1.679c0-.843-.626-1.535-1.436-1.654zm3.076%201.654a1.68%201.68%200%200%200%201.679%201.679%201.68%201.68%200%200%200%201.679-1.679c0-.037-.009-.072-.011-.109-.21.3-.541.508-.935.508a1.16%201.16%200%200%201-1.162-1.162%201.14%201.14%200%200%201%20.474-.912c-.015%200-.03-.005-.045-.005-.926.001-1.679.754-1.679%201.68zm1.861-1.265c0%20.152.123.276.276.276s.275-.124.275-.276-.123-.276-.276-.276-.275.124-.275.276zm1.823%204.823c0-.52-.103-1.035-.288-1.52-.466.394-1.06.64-1.717.64-1.144%200-2.116-.725-2.499-1.738-.383%201.012-1.355%201.738-2.499%201.738-.867%200-1.631-.421-2.121-1.062-.307.605-.478%201.267-.478%201.942%200%202.486%202.153%204.51%204.801%204.51s4.801-2.023%204.801-4.51zm-3.032%209.156l-.146-.492c-.276-1.02-.395-2.457-.444-3.268a6.11%206.11%200%200%201-1.18.115%206.01%206.01%200%200%201-2.536-.562l.006.175c.802.215%201.848.612%202.021%201.25.079.295-.021.601-.274.837l-.598.501c.667.304%201.243.698%201.311%201.179.02.144.022.507-.393.787l-.564.365c1.285.521%201.361.96%201.381%201.126.018.142.011.496-.427.746l-.854.489c.064-1.19%201.985-2.585%202.697-3.248zM49.34%209.925c0-.667%201-.667%201%200%200%20.653.818%201.205%201.787%201.205s1.787-.552%201.787-1.205c0-.667%201-.667%201%200%200%201.216-1.25%202.205-2.787%202.205s-2.787-.989-2.787-2.205zm-.887-7.633c-.093.077-.205.114-.317.114a.5.5%200%200%201-.318-.886L49.183.397a.5.5%200%200%201%20.703.068.5.5%200%200%201-.069.703zm7.661-.065c-.086%200-.173-.022-.253-.068l-1.523-.893c-.575-.337-.069-1.2.506-.863l1.523.892a.5.5%200%200%201%20.179.685c-.094.158-.261.247-.432.247z%22%20fill%3D%22%233bb300%22/%3E%3C/svg%3E"/>

<style type="text/css">/*!
Expand Down Expand Up @@ -94,7 +94,7 @@

</head>
<body><div class="pdoc pdoc-index">
<a id="pdoc-logo" title="pdoc 6.4.1" href="https://pdoc.dev"></a>
<a id="pdoc-logo" title="pdoc 6.4.2" href="https://pdoc.dev"></a>
<input id="pdoc-search" type="text" placeholder="search" aria-label="search for a module"/>
<h5>Available Modules</h5>
<div id="pdoc-modules">
Expand Down
473 changes: 368 additions & 105 deletions docs/multitax.html

Large diffs are not rendered by default.

285 changes: 160 additions & 125 deletions docs/multitax/multitax.html

Large diffs are not rendered by default.

28 changes: 27 additions & 1 deletion docs/multitax/utils.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="generator" content="pdoc 6.4.1" />
<meta name="generator" content="pdoc 6.4.2" />
<title>multitax.utils API documentation</title>
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg%20xmlns%3D%22http%3A//www.w3.org/2000/svg%22%20width%3D%2264%22%20height%3D%2264%22%20viewBox%3D%2244.5%202.5%2015%2015%22%3E%3Cpath%20d%3D%22M49.351%2021.041c-.233-.721-.546-2.408-.772-4.076-.042-.09-.067-.187-.046-.288-.166-1.347-.277-2.625-.241-3.351-1.378-1.008-2.271-2.586-2.271-4.362%200-.976.272-1.935.788-2.774.057-.094.122-.18.184-.268-.033-.167-.052-.339-.052-.516%200-1.477%201.202-2.679%202.679-2.679.791%200%201.496.352%201.987.9a6.3%206.3%200%200%201%201.001.029c.492-.564%201.207-.929%202.012-.929%201.477%200%202.679%201.202%202.679%202.679a2.65%202.65%200%200%201-.269%201.148c.383.747.595%201.572.595%202.41%200%202.311-1.507%204.29-3.635%205.107.037.699.147%202.27.423%203.294l.137.461c.156%202.136-4.612%205.166-5.199%203.215zm.127-4.919a4.78%204.78%200%200%200%20.775-.584c-.172-.115-.505-.254-.88-.378zm.331%202.302l.828-.502c-.202-.143-.576-.328-.984-.49zm.45%202.157l.701-.403c-.214-.115-.536-.249-.891-.376l.19.779zM49.13%204.141c0%20.152.123.276.276.276s.275-.124.275-.276-.123-.276-.276-.276-.275.124-.275.276zm.735-.389a1.15%201.15%200%200%201%20.314.783%201.16%201.16%200%200%201-1.162%201.162c-.457%200-.842-.27-1.032-.653-.026.117-.042.238-.042.362a1.68%201.68%200%200%200%201.679%201.679%201.68%201.68%200%200%200%201.679-1.679c0-.843-.626-1.535-1.436-1.654zm3.076%201.654a1.68%201.68%200%200%200%201.679%201.679%201.68%201.68%200%200%200%201.679-1.679c0-.037-.009-.072-.011-.109-.21.3-.541.508-.935.508a1.16%201.16%200%200%201-1.162-1.162%201.14%201.14%200%200%201%20.474-.912c-.015%200-.03-.005-.045-.005-.926.001-1.679.754-1.679%201.68zm1.861-1.265c0%20.152.123.276.276.276s.275-.124.275-.276-.123-.276-.276-.276-.275.124-.275.276zm1.823%204.823c0-.52-.103-1.035-.288-1.52-.466.394-1.06.64-1.717.64-1.144%200-2.116-.725-2.499-1.738-.383%201.012-1.355%201.738-2.499%201.738-.867%200-1.631-.421-2.121-1.062-.307.605-.478%201.267-.478%201.942%200%202.486%202.153%204.51%204.801%204.51s4.801-2.023%204.801-4.51zm-3.032%209.156l-.146-.492c-.276-1.02-.395-2.457-.444-3.268a6.11%206.11%200%200%201-1.18.115%206.01%206.01%200%200%201-2.536-.562l.006.175c.802.215%201.848.612%202.021%201.25.079.295-.021.601-.274.837l-.598.501c.667.304%201.243.698%201.311%201.179.02.144.022.507-.393.787l-.564.365c1.285.521%201.361.96%201.381%201.126.018.142.011.496-.427.746l-.854.489c.064-1.19%201.985-2.585%202.697-3.248zM49.34%209.925c0-.667%201-.667%201%200%200%20.653.818%201.205%201.787%201.205s1.787-.552%201.787-1.205c0-.667%201-.667%201%200%200%201.216-1.25%202.205-2.787%202.205s-2.787-.989-2.787-2.205zm-.887-7.633c-.093.077-.205.114-.317.114a.5.5%200%200%201-.318-.886L49.183.397a.5.5%200%200%201%20.703.068.5.5%200%200%201-.069.703zm7.661-.065c-.086%200-.173-.022-.253-.068l-1.523-.893c-.575-.337-.069-1.2.506-.863l1.523.892a.5.5%200%200%201%20.179.685c-.094.158-.261.247-.432.247z%22%20fill%3D%22%233bb300%22/%3E%3C/svg%3E"/>

Expand Down Expand Up @@ -57,6 +57,9 @@ <h2>API Documentation</h2>
<li>
<a class="function" href="#join_check">join_check</a>
</li>
<li>
<a class="function" href="#filter_function">filter_function</a>
</li>
</ul>


Expand Down Expand Up @@ -222,6 +225,10 @@ <h1 class="modulename">
<span class="k">return</span> <span class="n">sep</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="nb">str</span><span class="p">,</span> <span class="n">elements</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">return</span> <span class="s2">&quot;&quot;</span>


<span class="k">def</span> <span class="nf">filter_function</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">function</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
<span class="k">return</span> <span class="p">[</span><span class="n">elements</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="n">function</span><span class="p">,</span> <span class="n">elements</span><span class="p">))</span> <span class="k">if</span> <span class="n">v</span> <span class="o">==</span> <span class="n">value</span><span class="p">]</span>
</pre></div>

</details>
Expand Down Expand Up @@ -576,6 +583,25 @@ <h1 class="modulename">



</section>
<section id="filter_function">
<div class="attr function"><a class="headerlink" href="#filter_function">#&nbsp;&nbsp</a>


<span class="def">def</span>
<span class="name">filter_function</span><span class="signature">(elements, function, value)</span>:
</div>

<details>
<summary>View Source</summary>
<div class="codehilite"><pre><span></span><span class="k">def</span> <span class="nf">filter_function</span><span class="p">(</span><span class="n">elements</span><span class="p">,</span> <span class="n">function</span><span class="p">,</span> <span class="n">value</span><span class="p">):</span>
<span class="k">return</span> <span class="p">[</span><span class="n">elements</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="nb">map</span><span class="p">(</span><span class="n">function</span><span class="p">,</span> <span class="n">elements</span><span class="p">))</span> <span class="k">if</span> <span class="n">v</span> <span class="o">==</span> <span class="n">value</span><span class="p">]</span>
</pre></div>

</details>



</section>
</main>
</body>
Expand Down
6 changes: 3 additions & 3 deletions multitax/customtx.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def __init__(self, cols: list=["node", "parent", "rank", "name"], sep: str="\t",
Example:
tax_custom1 = CustomTx(files="my_custom_tax.tsv", cols=["node","parent","rank"])
tax_custom2 = CustomTx(files="my_custom_tax.tsv", cols={"node": 0, "parent": 1, "name": 5, "rank": 3})
tax_custom1 = CustomTx(files="my_custom_tax.tsv", cols=["node","parent","rank"])
tax_custom2 = CustomTx(files="my_custom_tax.tsv", cols={"node": 0, "parent": 1, "name": 5, "rank": 3})
"""

self._cols = self._parse_cols(cols)
Expand All @@ -29,7 +29,7 @@ def __repr__(self):
args = ['{}={}'.format(k, repr(v)) for (k, v) in vars(self).items()]
return 'CustomTx({})'.format(', '.join(args))

def _parse(self, fhs):
def _parse(self, fhs, **kwargs):
nodes = {}
ranks = {}
names = {}
Expand Down
2 changes: 1 addition & 1 deletion multitax/greengenestx.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __repr__(self):
args = ['{}={}'.format(k, repr(v)) for (k, v) in vars(self).items()]
return 'GreengenesTx({})'.format(', '.join(args))

def _parse(self, fhs):
def _parse(self, fhs, **kwargs):
nodes = {}
ranks = {}
names = {}
Expand Down
2 changes: 1 addition & 1 deletion multitax/gtdbtx.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __repr__(self):
args = ['{}={}'.format(k, repr(v)) for (k, v) in vars(self).items()]
return 'GtdbTx({})'.format(', '.join(args))

def _parse(self, fhs):
def _parse(self, fhs, **kwargs):
nodes = {}
ranks = {}
names = {}
Expand Down
77 changes: 49 additions & 28 deletions multitax/multitax.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class MultiTax(object):

version = "1.0.0"
version = "1.1.0"

_default_urls = []
_default_root_node = "1"
Expand All @@ -22,25 +22,27 @@ def __init__(self,
undefined_rank: str=None,
build_name_nodes: bool=False,
build_node_children: bool=False,
build_rank_nodes: bool=False):
build_rank_nodes: bool=False,
extended_names: bool=False):
"""
Main constructor of MultiTax and sub-classes
Parameters:
* **files** *[str, list]*: One or more local files to parse
* **urls** *[str, list]*: One or more urls to download and parse
* **output_prefix** *[str]*: Directory to write downloaded files
* **root_node** *[str]*: Define an alternative root node
* **root_parent** *[str]*: Define the root parent node identifier
* **files** *[str, list]*: One or more local files to parse.
* **urls** *[str, list]*: One or more urls to download and parse.
* **output_prefix** *[str]*: Directory to write downloaded files.
* **root_node** *[str]*: Define an alternative root node.
* **root_parent** *[str]*: Define the root parent node identifier.
* **root_name** *[str]*: Define an alternative root name. Set to None to use original name.
* **root_rank** *[str]*: Define an alternative root rank. Set to None to use original name.
* **undefined_node** *[str]*: Define a default return value for undefined nodes
* **undefined_name** *[str]*: Define a default return value for undefined names
* **undefined_rank** *[str]*: Define a default return value for undefined ranks
* **build_node_children** *[bool]*: Build node,children dict (otherwise it will be created on first use)
* **build_name_nodes** *[bool]*: Build name,nodes dict (otherwise it will be created on first use)
* **build_rank_nodes** *[bool]*: Build rank,nodes dict (otherwise it will be created on first use)
* **undefined_node** *[str]*: Define a default return value for undefined nodes.
* **undefined_name** *[str]*: Define a default return value for undefined names.
* **undefined_rank** *[str]*: Define a default return value for undefined ranks.
* **build_node_children** *[bool]*: Build node,children dict (otherwise it will be created on first use).
* **build_name_nodes** *[bool]*: Build name,nodes dict (otherwise it will be created on first use).
* **build_rank_nodes** *[bool]*: Build rank,nodes dict (otherwise it will be created on first use).
* **extended_names** *[bool]*: Parse extended names if available.
Example:
tax_ncbi = NcbiTx()
Expand Down Expand Up @@ -78,7 +80,7 @@ def __init__(self,

if fhs:
# Parse taxonomy
self._nodes, self._ranks, self._names = self._parse(fhs)
self._nodes, self._ranks, self._names = self._parse(fhs, extended_names=extended_names)
close_files(fhs)
# Save sources for stats (files or urls)
self.sources = list(fhs.keys())
Expand Down Expand Up @@ -171,29 +173,48 @@ def children(self, node: str):
else:
return []

def search_name(self, text: str):
def search_name(self, text: str, rank: str=None, exact: bool=True):
"""
Searches names containing a certain text (case sensitive) and return their respective nodes.
Search node by exact or partial name
Parameters:
* **text** *[str]*: Text to search.
* **rank** *[str]*: Filter results by rank.
* **exact** *[bool]*: Exact or partial name search (both case sensitive).
Returns: list of matching nodes
"""
# Setup on first use
if not self._name_nodes:
self._name_nodes = reverse_dict(self._names)

matching_nodes = []
for name in self._name_nodes:
if exact:
ret = self._exact_name(text, self._name_nodes)
else:
ret = self._partial_name(text, self._name_nodes)

# Only return nodes of chosen rank
if rank:
return filter_function(ret, self.rank, rank)
else:
return ret

def _partial_name(self, text: str, names: dict):
"""
Searches names containing a certain text (case sensitive) and return their respective nodes.
"""
matching_nodes = set()
for name in names:
if text in name:
matching_nodes.extend(self._name_nodes[name])
return matching_nodes
matching_nodes.update(names[name])
return list(matching_nodes)

def nodes_name(self, name: str):
def _exact_name(self, text: str, names: dict):
"""
Returns list of nodes of a given exact name.
Returns list of nodes of a given exact name (case sensitive).
"""
# Setup on first use
if not self._name_nodes:
self._name_nodes = reverse_dict(self._names)
if name in self._name_nodes:
return self._name_nodes[name]
if text in names:
return names[text]
else:
return []

Expand Down
Loading

0 comments on commit 07bc278

Please sign in to comment.