Skip to content

Commit

Permalink
Merge pull request #83 from mwang87/splash
Browse files Browse the repository at this point in the history
SPLASH integration
  • Loading branch information
mwang87 authored Sep 15, 2020
2 parents 40ee41a + 7d9db0a commit 8d2f776
Show file tree
Hide file tree
Showing 11 changed files with 192 additions and 75 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/loadtest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ jobs:
flask flex-swagger gunicorn locust lxml matplotlib numba numpy \
pillow pytest pyzbar qrcode rdkit requests requests-cache scipy \
spectrum_utils werkzeug zbar
apt-get update -y && apt-get install -y git-core
source activate usi
pip install "git+git://github.com/berlinguyinca/spectra-hash.git#egg=splash&subdirectory=python"
echo "source activate usi" > ~/.bashrc
- name: Load testing with locust
run: |
export PATH="$HOME/miniconda/bin:$PATH"
source ~/.bashrc
locust -f test/locustfile.py --headless -u 100 -r 10 \
-H https://metabolomics-usi.ucsd.edu/ -t 120s
5 changes: 4 additions & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ jobs:
- name: Install dependencies
run: |
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
bash miniconda.sh -b -p $HOME/miniconda
bash miniconda.sh -b -p $HOME/miniconda || true
export PATH="$HOME/miniconda/bin:$PATH"
conda config --set always_yes yes --set changeps1 no
conda update -q conda
conda create -n usi -c conda-forge -c bioconda -c defaults flake8 \
flask flex-swagger gunicorn locust lxml matplotlib numba numpy \
pillow pytest pyzbar qrcode rdkit requests requests-cache scipy \
spectrum_utils werkzeug zbar
apt-get update -y && apt-get install -y git-core
source activate usi
pip install "git+git://github.com/berlinguyinca/spectra-hash.git#egg=splash&subdirectory=python"
echo "source activate usi" > ~/.bashrc
- name: Lint with flake8
run: |
Expand Down
22 changes: 7 additions & 15 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@ FROM continuumio/miniconda3:4.8.2
MAINTAINER Mingxun Wang "[email protected]"

WORKDIR /app
RUN apt-get update -y
RUN conda create -n usi -c rdkit rdkit=2019.09.3.0
RUN /bin/bash -c "source activate usi"
RUN apt-get update -y && \
apt-get install -y libxrender-dev && \
apt-get install -y git-core
RUN conda create -y -n usi -c conda-forge -c bioconda -c defaults flask \
gunicorn matplotlib numba numpy openssl qrcode rdkit requests \
requests-cache scipy spectrum_utils werkzeug
RUN /bin/bash -c 'source activate usi && pip install "git+git://github.com/berlinguyinca/spectra-hash.git#egg=splash&subdirectory=python"'
RUN echo "source activate usi" > ~/.bashrc
RUN conda install -n usi -c anaconda flask
RUN conda install -n usi -c anaconda gunicorn
RUN conda install -n usi -c anaconda requests
RUN conda install -n usi -c bioconda spectrum_utils
RUN conda install -n usi -c conda-forge xmltodict
RUN conda install -n usi -c conda-forge qrcode
RUN conda install -n usi -c conda-forge requests-cache
RUN conda install -n usi -c anaconda scipy

RUN apt-get install -y libxrender-dev

RUN conda update -n usi -c anaconda openssl

COPY . /app
WORKDIR /app
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ attach:

# Github Testing with Act
test-actions-unit:
act -j unit-test -P ubuntu-latest=nektos/act-environments-ubuntu:18.04 -b
act -j unit-test -P ubuntu-latest=nektos/act-environments-ubuntu:18.04

test-actions-locust:
act -j load-test -P ubuntu-latest=nektos/act-environments-ubuntu:18.04 -b
act -j load-test -P ubuntu-latest=nektos/act-environments-ubuntu:18.04
83 changes: 54 additions & 29 deletions parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import requests
import spectrum_utils.spectrum as sus
import splash

import parsing_legacy
from error import UsiError
Expand Down Expand Up @@ -58,6 +59,8 @@
flags=re.IGNORECASE)
ms2lda_task_pattern = re.compile('^TASK-(\d+)$', flags=re.IGNORECASE)

splash_builder = splash.Splash()


def _match_usi(usi: str):
# First try matching as an official USI, then as a metabolomics draft USI.
Expand All @@ -70,35 +73,53 @@ def _match_usi(usi: str):


@functools.lru_cache(100)
def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
def parse_usi(usi: str) -> Tuple[sus.MsmsSpectrum, str, str]:
"""
Retrieve the spectrum associated with the given USI.
Parameters
----------
usi : str
The USI of the spectrum to be retrieved from its resource.
Returns
-------
Tuple[sus.MsmsSpectrum, str, str]
A tuple of the `MsmsSpectrum`, its source link, and its SPLASH.
"""
try:
match = _match_usi(usi)
except UsiError as e:
# FIXME: Legacy parsing is attempted when the USI is invalid.
try:
return parsing_legacy.parse_usi_legacy(usi)
spectrum, source_link = parsing_legacy.parse_usi_legacy(usi)
splash_key = splash_builder.splash(splash.Spectrum(
list(zip(spectrum.mz, spectrum.intensity)),
splash.SpectrumType.MS))
return spectrum, source_link, splash_key
except ValueError:
raise e
try:
collection = match.group(1).lower()
# Send all proteomics USIs to MassIVE.
if (
collection.startswith('msv') or
collection.startswith('pxd') or
collection.startswith('pxl') or
collection.startswith('rpxd') or
collection == 'massivekb'
):
return _parse_msv_pxd(usi)
if (collection.startswith('msv') or collection.startswith('pxd') or
collection.startswith('pxl') or collection.startswith('rpxd')
or collection == 'massivekb'):
spectrum, source_link = _parse_msv_pxd(usi)
elif collection == 'gnps':
return _parse_gnps(usi)
spectrum, source_link = _parse_gnps(usi)
elif collection == 'massbank':
return _parse_massbank(usi)
spectrum, source_link = _parse_massbank(usi)
elif collection == 'ms2lda':
return _parse_ms2lda(usi)
spectrum, source_link = _parse_ms2lda(usi)
elif collection == 'motifdb':
return _parse_motifdb(usi)
spectrum, source_link = _parse_motifdb(usi)
else:
raise UsiError(f'Unknown USI collection: {match.group(1)}', 400)
splash_key = splash_builder.splash(splash.Spectrum(
list(zip(spectrum.mz, spectrum.intensity)),
splash.SpectrumType.MS))
return spectrum, source_link, splash_key
except requests.exceptions.Timeout:
raise UsiError('Timeout while retrieving the USI from an external '
'resource', 504)
Expand Down Expand Up @@ -143,8 +164,9 @@ def _parse_gnps_task(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
charge = int(spectrum_dict['precursor'].get('charge', 0))
else:
precursor_mz, charge = 0, 0
return (sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity),
source_link)

spectrum = sus.MsmsSpectrum(usi, precursor_mz, charge, mz, intensity)
return spectrum, source_link
except (requests.exceptions.HTTPError, json.decoder.JSONDecodeError):
raise UsiError('Unknown GNPS task USI', 404)

Expand All @@ -169,13 +191,10 @@ def _parse_gnps_library(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
spectrum_dict['spectruminfo']['peaks_json']))
source_link = (f'https://gnps.ucsd.edu/ProteoSAFe/'
f'gnpslibraryspectrum.jsp?SpectrumID={index}')

spectrum = sus.MsmsSpectrum(
usi,
float(spectrum_dict['annotations'][0]['Precursor_MZ']),
int(spectrum_dict['annotations'][0]['Charge']),
mz,
intensity,
)
usi, float(spectrum_dict['annotations'][0]['Precursor_MZ']),
int(spectrum_dict['annotations'][0]['Charge']), mz, intensity)
return spectrum, source_link
except requests.exceptions.HTTPError:
raise UsiError('Unknown GNPS library USI', 404)
Expand Down Expand Up @@ -206,8 +225,9 @@ def _parse_massbank(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
break
source_link = (f'https://massbank.eu/MassBank/'
f'RecordDisplay.jsp?id={index}')
return (sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity),
source_link)

spectrum = sus.MsmsSpectrum(usi, precursor_mz, 0, mz, intensity)
return spectrum, source_link
except requests.exceptions.HTTPError:
raise UsiError('Unknown MassBank USI', 404)

Expand All @@ -234,8 +254,10 @@ def _parse_ms2lda(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
raise UsiError(f'MS2LDA error: {spectrum_dict["error"]}', 404)
mz, intensity = zip(*spectrum_dict['peaks'])
source_link = f'http://ms2lda.org/basicviz/show_doc/{index}/'
return sus.MsmsSpectrum(usi, float(spectrum_dict['precursor_mz']), 0,
mz, intensity), source_link

spectrum = sus.MsmsSpectrum(
usi, float(spectrum_dict['precursor_mz']), 0, mz, intensity)
return spectrum, source_link
except requests.exceptions.HTTPError:
raise UsiError('Unknown MS2LDA USI', 404)

Expand Down Expand Up @@ -286,8 +308,9 @@ def _parse_msv_pxd(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
source_link = (f'https://massive.ucsd.edu/ProteoSAFe/'
f'QueryMSV?id={dataset_identifier}')

return sus.MsmsSpectrum(usi, precursor_mz, charge, mz,
intensity), source_link
spectrum = sus.MsmsSpectrum(
usi, precursor_mz, charge, mz, intensity)
return spectrum, source_link
except requests.exceptions.HTTPError:
pass
raise UsiError('Unsupported/unknown USI', 404)
Expand All @@ -307,6 +330,8 @@ def _parse_motifdb(usi: str) -> Tuple[sus.MsmsSpectrum, str]:
lookup_request.raise_for_status()
mz, intensity = zip(*json.loads(lookup_request.text))
source_link = f'http://ms2lda.org/motifdb/motif/{index}/'
return sus.MsmsSpectrum(usi, 0, 0, mz, intensity), source_link

spectrum = sus.MsmsSpectrum(usi, 0, 0, mz, intensity)
return spectrum, source_link
except requests.exceptions.HTTPError:
raise UsiError('Unknown MOTIFDB USI', 404)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ requests_cache
scipy
spectrum_utils
werkzeug
git+git://github.com/berlinguyinca/spectra-hash.git#egg=splash&subdirectory=python
16 changes: 16 additions & 0 deletions templates/mirror.html
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
</a>
(% endif %)
</div>
(% if splash_key1 is not none %)
<div class="col-4 font-weight-bold text-right">
SPLASH Identifier 1
</div>
<div class="col-8">
(( splash_key1 ))
</div>
(% endif %)
</div>
<div class="row">
<div class="col-4 font-weight-bold text-right">
Expand All @@ -50,6 +58,14 @@
</a>
(% endif %)
</div>
(% if splash_key2 is not none %)
<div class="col-4 font-weight-bold text-right">
SPLASH Identifier 2
</div>
<div class="col-8">
(( splash_key2 ))
</div>
(% endif %)
</div>
<div class="row mt-2">
<div class="col-2 offset-4">
Expand Down
8 changes: 8 additions & 0 deletions templates/spectrum.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,14 @@
</a>
(% endif %)
</div>
(% if splash_key is not none %)
<div class="col-4 font-weight-bold text-right">
SPLASH Identifier
</div>
<div class="col-8">
(( splash_key ))
</div>
(% endif %)
</div>
<div class="row mt-2">
<div class="col-2 offset-2">
Expand Down
26 changes: 26 additions & 0 deletions test/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import functools
import imghdr
import io
import itertools
Expand All @@ -11,6 +12,8 @@
import flex
import PIL
import pytest
import requests
import spectrum_utils.spectrum as sus
import urllib.parse
from lxml import etree
from pyzbar import pyzbar
Expand All @@ -22,6 +25,19 @@
from usi_test_data import usis_to_test


@functools.lru_cache(None)
def _get_splash_remote(spectrum):
payload = {'ions': [{'mass': float(mz), 'intensity': float(intensity)}
for mz, intensity in zip(spectrum.mz,
spectrum.intensity)],
'type': 'MS'}
headers = {'Content-type': 'application/json; charset=UTF-8'}
splash_response = requests.post(
'https://splash.fiehnlab.ucdavis.edu/splash/it',
data=json.dumps(payload), headers=headers)
return splash_response.text


def _get_custom_plotting_args_str():
width, height = 20.0, 10.0
mz_min, mz_max = 50.0, 500.0
Expand Down Expand Up @@ -374,9 +390,13 @@ def test_peak_json(client):
assert 'peaks' in response_dict
assert 'n_peaks' in response_dict
assert 'precursor_mz' in response_dict
assert 'splash' in response_dict
assert response_dict['n_peaks'] == len(response_dict['peaks'])
for peak in response_dict['peaks']:
assert len(peak) == 2
mz, intensity = zip(*response_dict['peaks'])
assert response_dict['splash'] == _get_splash_remote(
sus.MsmsSpectrum(usi, 0, 0, mz, intensity))


def test_peak_json_invalid(client):
Expand Down Expand Up @@ -412,6 +432,12 @@ def test_peak_proxi_json(client):
assert attribute['name'] == 'selected ion m/z'
elif attribute['accession'] == 'MS:1000041':
assert attribute['name'] == 'charge state'
elif attribute['accession'] == 'MS:1002599':
assert attribute['name'] == 'splash key'
assert attribute['value'] == _get_splash_remote(
sus.MsmsSpectrum(usi, 0, 0, response_dict['mzs'],
response_dict['intensities']))

# Validate that the response matches the PROXI Swagger API definition.
flex.core.validate_api_response(schema, raw_request=flask.request,
raw_response=response)
Expand Down
Loading

0 comments on commit 8d2f776

Please sign in to comment.