Skip to content

Commit

Permalink
Merge tag 'itrb-deployment-20231026' into production
Browse files Browse the repository at this point in the history
  • Loading branch information
edeutsch committed Oct 30, 2023
2 parents 486ca01 + fa575ad commit ac67d26
Show file tree
Hide file tree
Showing 27 changed files with 938 additions and 910 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

# Stephen Ramsey, Oregon State University

# When you run this shell script, make sure your CWD is in `/home/ubuntu`
# In a bash terminal session, run the script like this
# (in this example, port 8080 is specified on the CLI):
#
# cd ~ && source <(curl -s https://raw.githubusercontent.com/RTXteam/RTX/master/DockerBuild/test-instance-scripts/build-test-arax-from-fresh-instance.sh) 8080

set -o nounset -o pipefail -o errexit

arax_base=/mnt/data/orangeboard
Expand All @@ -28,7 +34,7 @@ sudo mkdir -p ${arax_base}/databases
sudo chown ubuntu.ubuntu ${arax_base}/databases

# do a test login to arax.ncats.io, to make sure rsync won't hang up later
ssh -q -oStrictHostKeyChecking=no rtxconfig@arax.ncats.io exit
ssh -q -oStrictHostKeyChecking=no rtxconfig@arax-databases.rtx.ai exit

# do a test login to araxconfig.rtx.ai, to make sure the scp won't hang up later
ssh -q -oStrictHostKeyChecking=no [email protected] exit
Expand Down
46 changes: 7 additions & 39 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
- [How does ARAX work?](#how-does-arax-work)
- [The Reasoners Standard Application Programming Interface](#the-reasoners-standard-application-programming-interface)
- [What knowledge providers does ARAX use?](#what-knowledge-providers-does-arax-use)
- [RTX-KG1](#rtx-kg1)
- [RTX-KG2](#rtx-kg2)
- [Columbia Open Health Data (COHD)](#columbia-open-health-data-cohd)
- [PubMed](#pubmed)
Expand Down Expand Up @@ -210,49 +209,18 @@ development process for the Reasoners Standard API.
Currently, ARAX/RTX directly accesses four main knowledge providers in order to
handle queries, along with several additional APIs for identifier mapping.

## RTX-KG1

RTX-KG1 is a knowledge graph comprising 130k nodes and 3.5M relationships that
is built by integrating concepts and concept-predicate-concept triples obtained
from 17 different knowledge providers by way of their web APIs:

1. Pathway Commons 2
2. Disease Ontology
3. Monarch Project Biolink API
4. Drug-Gene Interactions Database
5. KEGG
6. UniProtKB
7. DisGeNet
8. OMIM
9. ChEMBL
10. SIDER
11. Pharos
12. MyChem.info
13. miRGate
14. Gene Ontology
15. Monarch SciGraph API
16. Reactome
17. PubChem

RTX-KG1 complies with the Biolink model-based Translator Knowledge Graph object
model standard. RTX-KG1 is hosted in a Neo4j graph database server and can be
accessed at [kg1endpoint.rtx.ai:7474](http://kg1endpoint.rtx.ai:7474) (username
is `neo4j`; contact Team Expander Agent for the password). Alternatively, a
Neo4j dump file (in gzipped tar archive format) of KG1 can be downloaded without
password from the [kg1endpoint server](http://kg1endpoint.rtx.ai).

## RTX-KG2

RTX-KG2 is a knowledge graph comprising 7.5M nodes and 34.3M relationships
RTX-KG2 (GitHub project area is [RTXteam/RTX-KG2](https://github.com/RTXteam/RTX-KG2))
is a knowledge graph comprising 7.5M nodes and 34.3M relationships
that is built by integrating concepts and concept-predicate-concept triples
obtained from:

1. *All of the KG1 knowledge providers*
2. Unified Medical Language System (UMLS; including SNOMED CT)
3. NCBI Genes
4. Ensembl Genes
5. UniChem
6. Semantic Medline Database (SemMedDB)
1. Unified Medical Language System (UMLS; including SNOMED CT)
2. NCBI Genes
3. Ensembl Genes
4. UniChem
5. Semantic Medline Database (SemMedDB)

RTX-KG2 complies with the Biomedical Data Translator Knowledge Graph object
model standard, which is based on the Biolink model. RTX-KG2 is hosted in a
Expand Down
112 changes: 60 additions & 52 deletions code/ARAX/ARAXQuery/ARAX_background_tasker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,45 +19,49 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)
FREQ_KP_INFO_CACHER_SEC = 3600
FREQ_CHECK_ONGOING_SEC = 60

class ARAXBackgroundTasker:

class ARAXBackgroundTasker:

def __init__(self):
def __init__(self, run_kp_info_cacher=True):
self.run_kp_info_cacher = run_kp_info_cacher
timestamp = str(datetime.datetime.now().isoformat())
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker created")


def run_tasks(self, config):
def run_tasks(self):

timestamp = str(datetime.datetime.now().isoformat())
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker starting")

#### Set up the query tracker
# Set up the query tracker
query_tracker = ARAXQueryTracker()
kp_info_cacher = KPInfoCacher()
kp_info_cacher_counter = 0

#### Clear the table of existing queries
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: Clearing any potential stale queries in ongoing query table")
if self.run_kp_info_cacher:
kp_info_cacher = KPInfoCacher()
kp_info_cacher_counter = 0

# Clear the table of existing queries
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: Clearing any "
"potential stale queries in ongoing query table")
query_tracker.clear_ongoing_queries()

#### Print out our packages for debugging
if True:
# Print out our packages for debugging
if False: # set to true to print out the packages
eprint("Installed packages:")
for location, modname, flag in pkgutil.iter_modules():
location = f"{location}"
if 'RTX' not in location:
try:
version_str = version(modname)
eprint(f" {modname} {version_str}")
except:
except Exception:
eprint(f" {modname} ???")
else:
pass


#### Check in on the NodeSynonymizer database, which sometimes gets corrupted
node_synonymizer_path = os.path.dirname(os.path.abspath(__file__)) + "/../NodeSynonymizer"
# Check in on the NodeSynonymizer database, which sometimes gets
# corrupted
node_synonymizer_path = os.path.dirname(os.path.abspath(__file__)) + \
"/../NodeSynonymizer"
files = os.listdir(node_synonymizer_path)
already_printed_header = False
link_counter = 0
Expand All @@ -81,21 +85,19 @@ def run_tasks(self, config):
try:
os.unlink(filepath)
except Exception as error:
eprint(f"ERROR: Unable to delete file with error {error}")
eprint("ERROR: Unable to delete file with error "
f"{error}")

if file_counter != 1 or link_counter != 1:
eprint("ERROR: NodeSynonymizer state is weird. "
f"file_counter: {file_counter} "
f"link_counter: {link_counter} "
"Recommend running the database_manager and restarting")
# try:
# subprocess.check_call( [ 'python3', node_synonymizer_path + "/../ARAXQuery/ARAX_database_manager.py" ] )
# except Exception as error:
# eprint(f"ERROR: Attempt to run database manager failed with {error}")

"Recommend restarting, which will rerun the database "
"manager")

#### Check in on the databases directory
node_synonymizer_path = os.path.dirname(os.path.abspath(__file__)) + "/../NodeSynonymizer"
# Check in on the databases directory
node_synonymizer_path = os.path.dirname(os.path.abspath(__file__)) + \
"/../NodeSynonymizer"
files = os.listdir(node_synonymizer_path)
eprint("INFO: Current contents of the databases area:")

Expand All @@ -106,53 +108,59 @@ def run_tasks(self, config):
if os.path.islink(filepath):
resolved_path = os.path.dirname(os.readlink(filepath))
eprint(f" {resolved_path}")
result = subprocess.run(['ls', '-l', resolved_path], stdout=subprocess.PIPE)
result = subprocess.run(['ls', '-l', resolved_path],
stdout=subprocess.PIPE)
eprint(result.stdout.decode('utf-8'))
eprint("INFO: End listing databases area contents")



#### Loop forever doing various things
# Loop forever doing various things
my_pid = os.getpid()
while True:

#### Run the KP Info Cacher less frequently
timestamp = str(datetime.datetime.now().isoformat())
if kp_info_cacher_counter == 0:
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: Running refresh_kp_info_caches()")
try:
kp_info_cacher.refresh_kp_info_caches()
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: Completed refresh_kp_info_caches()")
except Exception as error:
exception_type, exception_value, exception_traceback = sys.exc_info()
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: refresh_kp_info_caches() failed: {error}: {repr(traceback.format_exception(exception_type, exception_value, exception_traceback))}")
kp_info_cacher_counter += 1
if kp_info_cacher_counter * FREQ_CHECK_ONGOING_SEC > \
FREQ_KP_INFO_CACHER_SEC:
kp_info_cacher_counter = 0

ongoing_queries_by_remote_address = query_tracker.check_ongoing_queries()
# Run the KP Info Cacher less frequently
if self.run_kp_info_cacher:
if kp_info_cacher_counter == 0:
timestamp = str(datetime.datetime.now().isoformat())
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: Running "
"refresh_kp_info_caches()")
try:
kp_info_cacher.refresh_kp_info_caches()
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: "
"Completed refresh_kp_info_caches()")
except Exception as error:
e_type, e_value, e_traceback =\
sys.exc_info()
err_str = repr(traceback.format_exception(e_type,
e_value,
e_traceback))
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker: "
"refresh_kp_info_caches() failed: "
f"{error}: {err_str}")
kp_info_cacher_counter += 1
if kp_info_cacher_counter * FREQ_CHECK_ONGOING_SEC > \
FREQ_KP_INFO_CACHER_SEC:
kp_info_cacher_counter = 0

ongoing_queries_by_addr = query_tracker.check_ongoing_queries()
n_ongoing_queries = 0
n_clients = 0
for client, n_queries in ongoing_queries_by_remote_address.items():
for client, n_queries in ongoing_queries_by_addr.items():
n_clients += 1
n_ongoing_queries += n_queries

load_tuple = psutil.getloadavg()

timestamp = str(datetime.datetime.now().isoformat())
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker (PID {my_pid}) status: waiting. Current load is {load_tuple}, n_clients={n_clients}, n_ongoing_queries={n_ongoing_queries}")
eprint(f"{timestamp}: INFO: ARAXBackgroundTasker "
f"(PID {my_pid}) status: waiting. Current "
f"load is {load_tuple}, n_clients={n_clients}, "
f"n_ongoing_queries={n_ongoing_queries}")
time.sleep(FREQ_CHECK_ONGOING_SEC)



##################################################################################################
def main():

background_tasker = ARAXBackgroundTasker()

config = {}
background_tasker.run_tasks( config )
background_tasker.run_tasks()


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit ac67d26

Please sign in to comment.