Skip to content

Commit

Permalink
Merge pull request #117 from rki-mf1/feat/autoformat-black
Browse files Browse the repository at this point in the history
Reformat code using black + zimports. Fix file endings. Remove execute bit.
  • Loading branch information
matthuska authored Oct 24, 2023
2 parents 5d8f344 + cc50414 commit 133af0e
Show file tree
Hide file tree
Showing 19 changed files with 6,029 additions and 4,749 deletions.
12 changes: 12 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# A set of commits to ignore when running "git blame"
# To use this file by default for this repo, run the command (in the project
# dir):
#
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs
#
# To disable this feature, run the command:
#
# $ git config blame.ignoreRevsFile ""

# Reformat code using black
921c47b2cd31a52d74e7dd42a4cdbf2e756e2209
Empty file modified LICENSE
100755 → 100644
Empty file.
82 changes: 40 additions & 42 deletions README.md
100755 → 100644

Large diffs are not rendered by default.

Empty file modified lib/EDNAFULL
100755 → 100644
Empty file.
113 changes: 59 additions & 54 deletions lib/Lineages_UPDATER.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#!/usr/bin/python
# Maintainer: KongkitimanonK
# The method originally came from
# The method originally came from
# https://github.com/cov-lineages/pango-designation.
# We just adapt and change some parts to be used in covsonar, vocal etc.
import os
import pandas as pd
from tempfile import mkstemp, mkdtemp
import json
import os

import pandas as pd
import requests

import warnings
# warnings.simplefilter(action='ignore', category=FutureWarning)

# Due to the https://github.com/cov-lineages/pango-designation/issues/853
Expand All @@ -18,7 +17,7 @@
# from pango_aliasor.aliasor import Aliasor
# except ModuleNotFoundError as e:
# print("Dependency `pango_aliasor` missing, please install using `pip install pango_aliasor`")
# raise e
# raise e

# In covSonar 1 we will copy the class from https://github.com/corneliusroemer/pango_aliasor/blob/main/src/pango_aliasor/aliasor.py
# but for V.2 we will switch using the package to "pip install pango_aliasor"
Expand All @@ -29,7 +28,9 @@ def __init__(self, alias_file=None):
if alias_file is None:
import importlib.resources

with importlib.resources.open_text("pango_designation", "alias_key.json") as file:
with importlib.resources.open_text(
"pango_designation", "alias_key.json"
) as file:
file = json.load(file)

else:
Expand All @@ -45,21 +46,21 @@ def __init__(self, alias_file=None):

self.realias_dict = {v: k for k, v in self.alias_dict.items()}

def compress(self,name):
name_split = name.split('.')
def compress(self, name):
name_split = name.split(".")
levels = len(name_split) - 1
num_indirections = (levels -1) // 3
num_indirections = (levels - 1) // 3
if num_indirections <= 0:
return name
alias = ".".join(name_split[0:(3*num_indirections + 1)])
ending = ".".join(name_split[(3*num_indirections + 1):])
return self.realias_dict[alias] + '.' + ending
alias = ".".join(name_split[0 : (3 * num_indirections + 1)])
ending = ".".join(name_split[(3 * num_indirections + 1) :])
return self.realias_dict[alias] + "." + ending

def uncompress(self,name):
def uncompress(self, name):
# A function that prints the output to the screen.
if(pd.isna(name)):
if pd.isna(name):
return ""
name_split = name.split('.')
name_split = name.split(".")
letter = name_split[0]
try:
unaliased = self.alias_dict[letter]
Expand All @@ -68,49 +69,52 @@ def uncompress(self,name):
if len(name_split) == 1:
return name
if len(name_split) == 2:
return unaliased + '.' + name_split[1]
return unaliased + "." + name_split[1]
else:
return unaliased + '.' + ".".join(name_split[1:])
return unaliased + "." + ".".join(name_split[1:])


def lts(lineage):
items = []
for item in lineage.split("."):
item_string = str(item)
items.append((5-len(item))*"0" + item_string)
items.append((5 - len(item)) * "0" + item_string)
return "".join(items)


def download_source(tmp_dir):
lineages_url='https://raw.githubusercontent.com/cov-lineages/pango-designation/master/lineages.csv'
alias_key_url='https://raw.githubusercontent.com/cov-lineages/pango-designation/master/pango_designation/alias_key.json'
lineag=os.path.join(tmp_dir,'lineags.csv')
alias_key=os.path.join(tmp_dir,'alias_key.json')
print('Download lineages')
lineages_url = "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/lineages.csv"
alias_key_url = "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/pango_designation/alias_key.json"
lineag = os.path.join(tmp_dir, "lineags.csv")
alias_key = os.path.join(tmp_dir, "alias_key.json")
print("Download lineages")
url_content = requests.get(lineages_url).content
csv_file = open(lineag, 'wb')
csv_file = open(lineag, "wb")
csv_file.write(url_content)
csv_file.close()
print('Download alias_key')
print("Download alias_key")
items = requests.get(alias_key_url)
data = items.json()
with open( alias_key , 'w') as f:
with open(alias_key, "w") as f:
json.dump(data, f)
return alias_key,lineag
return alias_key, lineag


def process_lineage(alias_key_path, lineages_path, output):
"""
# handle duplicate values
with open(alias_key_path) as f:
# load json objects to dictionaries
# load json objects to dictionaries
data_dict = json.load(f)
for k, v in data_dict.items():
if type(v) is list:
data_dict[k] = list(set(v))
# rewrite the json
# rewrite the json
with open(alias_key_path ,'w') as nf:
json.dump(data_dict, nf)
"""
print('Create all lineages')
print("Create all lineages")
aliasor = Aliasor(alias_key_path)
df_lineages = pd.read_csv(lineages_path)
lineages = df_lineages.lineage.unique()
Expand All @@ -122,36 +126,37 @@ def process_lineage(alias_key_path, lineages_path, output):
uncompressed_lineages.sort(key=lts)

sorted_lineages = list(map(aliasor.compress, uncompressed_lineages))
print('Calculate parent-child relationship')
print("Calculate parent-child relationship")
# -- Fill the sub child --
# the current method is working, but it is not good in term of performance
# the algoritm use 3 loops
_final_list=[]
for _id in sorted_lineages:
_final_list = []
for _id in sorted_lineages:
alias_lineage_char = aliasor.uncompress(_id)
sub_lineage_list = []
row_dict = {}
#print(_id, '=',alias_lineage_char) # check alias name
for name_ in uncompressed_lineages: # fetch all lineage again
for index, letter in enumerate(name_.split('.')):
if index!=0:
letter=root+'.'+letter
root=letter
# print(_id, '=',alias_lineage_char) # check alias name
for name_ in uncompressed_lineages: # fetch all lineage again
for index, letter in enumerate(name_.split(".")):
if index != 0:
letter = root + "." + letter
root = letter
else:
root=letter
root = letter
if letter == alias_lineage_char:
sub_lineage_list.append(aliasor.compress(name_))

sub_lineage_list.remove(aliasor.compress(alias_lineage_char)) # remove root lineage
if(len(sub_lineage_list)>0):
row_dict['lineage']= _id
row_dict['sublineage']= ",".join(sub_lineage_list)
else:
row_dict['lineage']= _id
row_dict['sublineage']= "none"
_final_list.append(row_dict)
print('Write output:',output)
df = pd.DataFrame.from_dict(_final_list, orient='columns')
df = df[df.lineage != '']
df.sort_values(by=['lineage']).to_csv(output, sep="\t", index=False)

sub_lineage_list.remove(
aliasor.compress(alias_lineage_char)
) # remove root lineage
if len(sub_lineage_list) > 0:
row_dict["lineage"] = _id
row_dict["sublineage"] = ",".join(sub_lineage_list)
else:
row_dict["lineage"] = _id
row_dict["sublineage"] = "none"
_final_list.append(row_dict)
print("Write output:", output)
df = pd.DataFrame.from_dict(_final_list, orient="columns")
df = df[df.lineage != ""]
df.sort_values(by=["lineage"]).to_csv(output, sep="\t", index=False)
Empty file modified lib/__init__.py
100755 → 100644
Empty file.
Empty file modified lib/db.sqlite
100755 → 100644
Empty file.
Empty file modified lib/doctest_b117.fna
100755 → 100644
Empty file.
Empty file modified lib/doctest_b117.pickle
100755 → 100644
Empty file.
2 changes: 1 addition & 1 deletion lib/migrate/4.sql
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ LEFT JOIN sequence USING (seqhash)
LEFT JOIN sequence2prot USING (seqhash)
LEFT JOIN prot USING (varid);

COMMIT;
COMMIT;
Empty file modified lib/ref.fna
100755 → 100644
Empty file.
Empty file modified lib/ref.gff3
100755 → 100644
Empty file.
Loading

0 comments on commit 133af0e

Please sign in to comment.