Skip to content

Commit

Permalink
Merge pull request #2 from choderalab/initial_api_clients
Browse files Browse the repository at this point in the history
Added hgnc and requests_wrapper. Merging without pre-commit CI since repo not public.
  • Loading branch information
jessicaw9910 authored Feb 20, 2024
2 parents 91432d6 + d47a1ac commit da95d66
Show file tree
Hide file tree
Showing 2 changed files with 205 additions and 0 deletions.
166 changes: 166 additions & 0 deletions missense_kinase_toolkit/src/hgnc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from __future__ import annotations

import requests

import requests_wrapper


def maybe_get_symbol_from_hgnc_search(
input_symbol_or_id: str,
input_is_hgnc_symbol: bool = True,
) -> list[str] | None:
"""Get gene name from HGNC REST API using either a gene symbol or an Ensembl gene ID
Parameters
----------
input_symbol_or_id : str
Gene symbol or Ensembl gene ID
input_is_hgnc_symbol : bool
If True, input_symbol_or_id is a gene symbol, otherwise it is an Ensembl gene ID
Returns
-------
list[str] | None
List of gene names that match input_symbol_or_id; empty list if no match and None if request fails
"""
if input_is_hgnc_symbol:
url = f"https://rest.genenames.org/search/symbol:{input_symbol_or_id}"
else:
url = f"https://rest.genenames.org/search/ensembl_gene_id:{input_symbol_or_id}"

res = requests_wrapper.get_cached_session().get(
url, headers={"Accept": "application/json"}
)

if res.ok:
list_hgnc_gene_name = extract_list_from_hgnc_response_docs(res, "symbol")
else:
list_hgnc_gene_name = None
print_status_code_if_res_not_ok(res)

return list_hgnc_gene_name


def maybe_get_info_from_hgnc_fetch(
hgnc_gene_symbol: str,
list_to_extract: list[str] | None = None,
) -> dict | None:
"""Get gene information for a given HGNC gene name from gene symbol report using HGNC REST API
Parameters
----------
hgnc_gene_symbol : str
HGNC gene symbol
list_to_extract : list[str] | None
List of fields to extract from the response; if None, defaults to ["locus_type"]
Returns
-------
dict | None
Dictionary of gene information; empty list if no match and None if request fails or field not found
"""
url = f"https://rest.genenames.org/fetch/symbol/{hgnc_gene_symbol}"
res = requests_wrapper.get_cached_session().get(
url, headers={"Accept": "application/json"}
)

if list_to_extract is None:
list_to_extract = ["locus_type"]

list_out = []
if res.ok:
set_keys = generate_key_set_hgnc_response_docs(res)
for entry in list_to_extract:
if entry not in set_keys:
list_out.append(None)
else:
list_entry = extract_list_from_hgnc_response_docs(res, entry)
list_out.append(list_entry)
else:
list_out = [None for _ in list_to_extract]
print_status_code_if_res_not_ok(res)

dict_out = dict(zip(list_to_extract, list_out))

return dict_out


def extract_list_from_hgnc_response_docs(
res_input: requests.models.Response,
str_to_extract: str,
) -> list[str]:
"""Extract a list of values from the response documents of an HGNC REST API request
Parameters
----------
res_input : requests.models.Response
Response object from an HGNC REST API request
str_to_extract : str
Key to extract from the response documents
Returns
-------
list[str]
List of values extracted from the response documents
"""
if res_input.json()["response"]["numFound"] >= 1:
list_output = [
doc[str_to_extract] for doc in res_input.json()["response"]["docs"]
]
else:
list_output = []
return list_output


def generate_key_set_hgnc_response_docs(
res_input: requests.models.Response,
) -> set[str]:
"""Generate a set of keys present in the response documents of an HGNC REST API request
Parameters
----------
res_input : requests.models.Response
Response object from an HGNC REST API request
Returns
-------
set[str]
Set of keys present in the response documents
"""
list_keys = [set(doc.keys()) for doc in res_input.json()["response"]["docs"]]
set_keys = set.union(*list_keys)
return set_keys


def print_status_code_if_res_not_ok(
res_input: requests.models.Response,
dict_status_code: dict[int, str] | None = None,
) -> None:
"""Print the status code and status message if the response is not OK
Parameters
----------
res_input : requests.models.Response
Response object from an HGNC REST API request
dict_status_code : dict[int, str] | None
Dictionary of status codes and status messages; if None, defaults to a standard set of status codes
Returns
-------
None
"""
if dict_status_code is None:
dict_status_code = {
400: "Bad request",
404: "Not found",
415: "Unsupported media type",
500: "Server error",
503: "Service unavailable",
}

try:
print(
f"Error code: {res_input.status_code} ({dict_status_code[res_input.status_code]})"
)
except KeyError:
print(f"Error code: {res_input.status_code}")
39 changes: 39 additions & 0 deletions missense_kinase_toolkit/src/requests_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from requests_cache import CachedSession
import os
from functools import cache
from requests.adapters import HTTPAdapter, Retry

# this script was written by Jeff Quinn (MSKCC, Tansey lab)

ETL_REQUEST_CACHE_VAR = "ETL_REQUEST_CACHE"

def add_retry_to_session(
session,
retries=5,
backoff_factor=0.3,
status_forcelist=(429, 500, 501, 502, 503, 504),
):
retry = Retry(
total=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
allowed_methods=False,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session


@cache
def get_cached_session():
if "ETL_REQUEST_CACHE" in os.environ:
cache_location = os.environ["ETL_REQUEST_CACHE"]

session = CachedSession(
cache_location, allowable_codes=(200, 404, 400), backend="sqlite"
)
else:
session = CachedSession(backend="memory")

return add_retry_to_session(session)

0 comments on commit da95d66

Please sign in to comment.