Skip to content

Commit

Permalink
Merge pull request #43 from 3D-e-Chem/json-error-42
Browse files Browse the repository at this point in the history
Json error 42
  • Loading branch information
sverhoeven authored Feb 23, 2017
2 parents c3ec658 + 9e71a1a commit 73b3fc9
Show file tree
Hide file tree
Showing 19 changed files with 1,022 additions and 577 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,16 @@ Formatted as described on http://keepachangelog.com/.

## Unreleased

### Fixes
## [2.2.0] - 2017-02-23

### Changed

- Canned methods can now raise exception with ids which could not be found and data for ids which could

### Fixed

- Fetch fragment with no molblock throws error (#41)
- Not found response of web service should be JSON (#42)

## [2.1.0] - 2017-01-17

Expand Down
3 changes: 2 additions & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ install:
build_script:
- cmd: pip install -r requirements.txt
test_script:
- cmd: pytest --ignore .\tests\test_frozen.py --ignore .\tests\test_script_dive.py --junitxml=junit-results.xml
# TODO dont exclude files
- cmd: pytest --ignore .\tests\test_frozen.py --ignore .\tests\script\test_dive.py --junitxml=junit-results.xml
on_finish:
- ps: >-
$url = "https://ci.appveyor.com/api/testresults/junit/$($env:APPVEYOR_JOB_ID)"
Expand Down
102 changes: 87 additions & 15 deletions kripodb/canned.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,27 @@

from __future__ import absolute_import

import numpy as np
import pandas as pd
from requests import HTTPError

from .db import FragmentsDb
from .pairs import similar, open_similarity_matrix
from .webservice.client import WebserviceClient
from .webservice.client import WebserviceClient, IncompleteFragments


class IncompleteHits(Exception):
def __init__(self, absent_identifiers, hits):
"""List of hits and list of identifiers for which no information could be found
Args:
absent_identifiers (List[str]): List of identifiers for which no information could be found
hits (pandas.DataFrame): Data frame with query_fragment_id, hit_frag_id and score columns
"""
message = 'Some query fragment identifiers could not be found'
super(IncompleteHits, self).__init__(message)
self.absent_identifiers = absent_identifiers
self.hits = hits


def similarities(queries, similarity_matrix_filename_or_url, cutoff, limit=1000):
Expand Down Expand Up @@ -54,26 +70,48 @@ def similarities(queries, similarity_matrix_filename_or_url, cutoff, limit=1000)
Returns:
pandas.DataFrame: Data frame with query_fragment_id, hit_frag_id and score columns
Raises:
IncompleteHits: When one or more of the identifiers could not be found.
"""
hits = []
absent_identifiers = []
if similarity_matrix_filename_or_url.startswith('http'):
client = WebserviceClient(similarity_matrix_filename_or_url)
for query in queries:
qhits = client.similar_fragments(query, cutoff, limit)
hits.extend(qhits)
try:
qhits = client.similar_fragments(query, cutoff, limit)
hits.extend(qhits)
except HTTPError as e:
if e.response.status_code == 404:
absent_identifiers.append(query)
else:
similarity_matrix = open_similarity_matrix(similarity_matrix_filename_or_url)
for query in queries:
for query_id, hit_id, score in similar(query, similarity_matrix, cutoff, limit):
hit = {'query_frag_id': query_id,
'hit_frag_id': hit_id,
'score': score,
}
hits.append(hit)
try:
for query_id, hit_id, score in similar(query, similarity_matrix, cutoff, limit):
hit = {'query_frag_id': query_id,
'hit_frag_id': hit_id,
'score': score,
}
hits.append(hit)
except KeyError:
absent_identifiers.append(query)

similarity_matrix.close()

return pd.DataFrame(hits)
if absent_identifiers:
if len(hits) > 0:
df = pd.DataFrame(hits, columns=['query_frag_id', 'hit_frag_id', 'score'])
else:
# empty hits array will give dataframe without columns
df = pd.DataFrame({'query_frag_id': pd.Series(dtype=str),
'hit_frag_id': pd.Series(dtype=str),
'score': pd.Series(dtype=np.double)
}, columns=['query_frag_id', 'hit_frag_id', 'score'])
raise IncompleteHits(absent_identifiers, df)

return pd.DataFrame(hits, columns=['query_frag_id', 'hit_frag_id', 'score'])


def fragments_by_pdb_codes(pdb_codes, fragments_db_filename_or_url, prefix=''):
Expand Down Expand Up @@ -104,16 +142,32 @@ def fragments_by_pdb_codes(pdb_codes, fragments_db_filename_or_url, prefix=''):
Returns:
pandas.DataFrame: Data frame with fragment information
Raises:
IncompleteFragments: When one or more of the identifiers could not be found.
"""
if fragments_db_filename_or_url.startswith('http'):
client = WebserviceClient(fragments_db_filename_or_url)
fragments = client.fragments_by_pdb_codes(pdb_codes)
try:
fragments = client.fragments_by_pdb_codes(pdb_codes)
except IncompleteFragments as e:
df = pd.DataFrame(e.fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
raise IncompleteFragments(e.absent_identifiers, df)
else:
fragmentsdb = FragmentsDb(fragments_db_filename_or_url)
fragments = []
absent_identifiers = []
for pdb_code in pdb_codes:
for fragment in fragmentsdb.by_pdb_code(pdb_code):
fragments.append(fragment)
try:
for fragment in fragmentsdb.by_pdb_code(pdb_code):
fragments.append(fragment)
except LookupError as e:
absent_identifiers.append(pdb_code)
if absent_identifiers:
df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
raise IncompleteFragments(absent_identifiers, df)

df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
Expand Down Expand Up @@ -146,13 +200,31 @@ def fragments_by_id(fragment_ids, fragments_db_filename_or_url, prefix=''):
Returns:
pandas.DataFrame: Data frame with fragment information
Raises:
IncompleteFragments: When one or more of the identifiers could not be found.
"""
if fragments_db_filename_or_url.startswith('http'):
client = WebserviceClient(fragments_db_filename_or_url)
fragments = client.fragments_by_id(fragment_ids)
try:
fragments = client.fragments_by_id(fragment_ids)
except IncompleteFragments as e:
df = pd.DataFrame(e.fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
raise IncompleteFragments(e.absent_identifiers, df)
else:
fragmentsdb = FragmentsDb(fragments_db_filename_or_url)
fragments = [fragmentsdb[frag_id] for frag_id in fragment_ids]
fragments = []
absent_identifiers = []
for frag_id in fragment_ids:
try:
fragments.append(fragmentsdb[frag_id])
except KeyError:
absent_identifiers.append(frag_id)
if absent_identifiers:
df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
raise IncompleteFragments(absent_identifiers, df)

df = pd.DataFrame(fragments)
df.rename(columns=lambda x: prefix + x, inplace=True)
Expand Down
2 changes: 1 addition & 1 deletion kripodb/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version_info__ = ('2', '1', '0')
__version_info__ = ('2', '2', '0')
__version__ = '.'.join(__version_info__)
43 changes: 37 additions & 6 deletions kripodb/webservice/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@
from __future__ import absolute_import
import requests
from rdkit.Chem.AllChem import MolFromMolBlock
from requests import HTTPError


class IncompleteFragments(Exception):

def __init__(self, absent_identifiers, fragments):
"""List of fragments and list of identifiers for which no information could be found
Args:
absent_identifiers (List[str]): List of identifiers for which no information could be found
fragments (List[dict]): List of fragment information that could be retrieved
"""
message = 'Some identifiers could not be found'
super(IncompleteFragments, self).__init__(message)
self.absent_identifiers = absent_identifiers
self.fragments = fragments


class WebserviceClient(object):
Expand All @@ -41,6 +57,9 @@ def similar_fragments(self, fragment_id, cutoff, limit=1000):
Returns:
list[dict]: Query fragment identifier, hit fragment identifier and similarity score
Raises:
request.HTTPError: When fragment_id could not be found
"""
url = self.base_url + '/fragments/{fragment_id}/similar'.format(fragment_id=fragment_id)
params = {'cutoff': cutoff, 'limit': limit}
Expand Down Expand Up @@ -74,24 +93,36 @@ def fragments_by_id(self, fragment_ids, chunk_size=100):
list[dict]: List of fragment information
Raises:
requests.HTTPError: When one of the identifiers could not be found.
IncompleteFragments: When one or more of the identifiers could not be found.
"""
return self._fetch_chunked_fragments('fragment_ids', fragment_ids, chunk_size)

def _fetch_chunked_fragments(self, idtype, ids, chunk_size):
fragments = []
absent_identifiers = []
for start in range(0, len(ids), chunk_size):
stop = chunk_size + start
fragments += self._fetch_fragments(idtype, ids[start:stop])
(chunk_fragments, chunk_absent_identifiers) = self._fetch_fragments(idtype, ids[start:stop])
fragments += chunk_fragments
absent_identifiers += chunk_absent_identifiers
if chunk_absent_identifiers:
raise IncompleteFragments(absent_identifiers, fragments)
return fragments

def _fetch_fragments(self, idtype, ids):
url = self.base_url + '/fragments?{idtype}={ids}'.format(idtype=idtype, ids=','.join(ids))
response = requests.get(url)
response.raise_for_status()
fragments = response.json()
absent_identifiers = []
try:
response = requests.get(url)
response.raise_for_status()
fragments = response.json()
except HTTPError as e:
if e.response.status_code == 404:
body = e.response.json()
fragments = body['fragments']
absent_identifiers = body['absent_identifiers']
# Convert molblock string to RDKit Mol object
for fragment in fragments:
if fragment['mol'] is not None:
fragment['mol'] = MolFromMolBlock(fragment['mol'])
return fragments
return fragments, absent_identifiers
43 changes: 33 additions & 10 deletions kripodb/webservice/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""Kripo datafiles wrapped in a webservice"""
from __future__ import absolute_import

from kripodb.db import FragmentsDb
from pkg_resources import resource_filename
import logging

Expand All @@ -24,11 +23,12 @@
from six.moves.urllib_parse import urlparse

import connexion
from flask import current_app, abort
from flask import current_app
from flask.json import JSONEncoder

from ..version import __version__
from ..db import FragmentsDb
from ..pairs import open_similarity_matrix
from ..version import __version__

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,10 +73,17 @@ def get_similar_fragments(fragment_id, cutoff, limit):
for hit_id, score in raw_hits:
hits.append({'query_frag_id': query_id, 'hit_frag_id': hit_id, 'score': score})
except LookupError:
abort(404, 'Fragment with identifier \'{0}\' not found'.format(fragment_id))
return fragment_not_found(fragment_id)
return hits


def fragment_not_found(fragment_id):
title = 'Not Found'
description = 'Fragment with identifier \'{0}\' not found'.format(fragment_id)
ext = {'identifier': fragment_id}
return connexion.problem(404, title, description, ext=ext)


def get_fragments(fragment_ids=None, pdb_codes=None):
"""Retrieve fragments based on their identifier or PDB code.
Expand All @@ -93,20 +100,34 @@ def get_fragments(fragment_ids=None, pdb_codes=None):
fragments_db_filename = current_app.config['db_fn']
with FragmentsDb(fragments_db_filename) as fragmentsdb:
fragments = []
missing_ids = []
if fragment_ids:
for frag_id in fragment_ids:
try:
fragments.append(fragmentsdb[frag_id])
except LookupError:
abort(404, 'Fragment with identifier \'{0}\' not found'.format(frag_id))
missing_ids.append(frag_id)

if pdb_codes:
for pdb_code in pdb_codes:
try:
for fragment in fragmentsdb.by_pdb_code(pdb_code):
fragments.append(fragment)
except LookupError:
abort(404, 'Fragments with PDB code \'{0}\' not found'.format(pdb_code))
missing_ids.append(pdb_code)
# TODO if fragment_ids and pdb_codes are both None then return paged list of all fragments
if missing_ids:
title = 'Not found'
label = 'identifiers'
if pdb_codes:
label = 'PDB codes'
description = 'Fragments with {1} \'{0}\' not found'.format(','.join(missing_ids), label)
# connexion.problem is using json.dumps instead of flask custom json encoder, so performing convert myself
# TODO remove mol2string conversion when https://github.com/zalando/connexion/issues/266 is fixed
for fragment in fragments:
fragment['mol'] = MolToMolBlock(fragment['mol'])
ext = {'absent_identifiers': missing_ids, 'fragments': fragments}
return connexion.problem(404, title, description, ext=ext)
return fragments


Expand All @@ -133,11 +154,10 @@ def get_fragment_svg(fragment_id, width, height):
with FragmentsDb(fragments_db_filename) as fragmentsdb:
try:
fragment = fragmentsdb[fragment_id]
LOGGER.warning([fragment_id, width, height])
mol = fragment['mol']
return mol2svg(mol, width, height)
except LookupError:
abort(404, 'Fragment with identifier \'{0}\' not found'.format(fragment_id))
return fragment_not_found(fragment_id)


def get_version():
Expand All @@ -162,11 +182,14 @@ def wsgi_app(sim_matrix, frags_db_fn, external_url='http://localhost:8084/kripo'
"""
app = connexion.App(__name__)
url = urlparse(external_url)
swagger_file = resource_filename(__name__, 'swagger.json')
app.add_api(swagger_file, base_path=url.path, arguments={'hostport': url.netloc, 'scheme': url.scheme})
swagger_file = resource_filename(__name__, 'swagger.yaml')
app.app.json_encoder = KripodbJSONEncoder
app.app.config['matrix'] = sim_matrix
app.app.config['db_fn'] = frags_db_fn
arguments = {'hostport': url.netloc, 'scheme': url.scheme, 'version': __version__}
# Keep validate_responses turned off, because of conflict with connexion.problem
# see https://github.com/zalando/connexion/issues/266
app.add_api(swagger_file, base_path=url.path, arguments=arguments)
return app


Expand Down
Loading

0 comments on commit 73b3fc9

Please sign in to comment.