-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from choderalab/initial_api_clients
Added hgnc and requests_wrapper. Merging without pre-commit CI since repo not public.
- Loading branch information
Showing
2 changed files
with
205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
from __future__ import annotations | ||
|
||
import requests | ||
|
||
import requests_wrapper | ||
|
||
|
||
def maybe_get_symbol_from_hgnc_search( | ||
input_symbol_or_id: str, | ||
input_is_hgnc_symbol: bool = True, | ||
) -> list[str] | None: | ||
"""Get gene name from HGNC REST API using either a gene symbol or an Ensembl gene ID | ||
Parameters | ||
---------- | ||
input_symbol_or_id : str | ||
Gene symbol or Ensembl gene ID | ||
input_is_hgnc_symbol : bool | ||
If True, input_symbol_or_id is a gene symbol, otherwise it is an Ensembl gene ID | ||
Returns | ||
------- | ||
list[str] | None | ||
List of gene names that match input_symbol_or_id; empty list if no match and None if request fails | ||
""" | ||
if input_is_hgnc_symbol: | ||
url = f"https://rest.genenames.org/search/symbol:{input_symbol_or_id}" | ||
else: | ||
url = f"https://rest.genenames.org/search/ensembl_gene_id:{input_symbol_or_id}" | ||
|
||
res = requests_wrapper.get_cached_session().get( | ||
url, headers={"Accept": "application/json"} | ||
) | ||
|
||
if res.ok: | ||
list_hgnc_gene_name = extract_list_from_hgnc_response_docs(res, "symbol") | ||
else: | ||
list_hgnc_gene_name = None | ||
print_status_code_if_res_not_ok(res) | ||
|
||
return list_hgnc_gene_name | ||
|
||
|
||
def maybe_get_info_from_hgnc_fetch( | ||
hgnc_gene_symbol: str, | ||
list_to_extract: list[str] | None = None, | ||
) -> dict | None: | ||
"""Get gene information for a given HGNC gene name from gene symbol report using HGNC REST API | ||
Parameters | ||
---------- | ||
hgnc_gene_symbol : str | ||
HGNC gene symbol | ||
list_to_extract : list[str] | None | ||
List of fields to extract from the response; if None, defaults to ["locus_type"] | ||
Returns | ||
------- | ||
dict | None | ||
Dictionary of gene information; empty list if no match and None if request fails or field not found | ||
""" | ||
url = f"https://rest.genenames.org/fetch/symbol/{hgnc_gene_symbol}" | ||
res = requests_wrapper.get_cached_session().get( | ||
url, headers={"Accept": "application/json"} | ||
) | ||
|
||
if list_to_extract is None: | ||
list_to_extract = ["locus_type"] | ||
|
||
list_out = [] | ||
if res.ok: | ||
set_keys = generate_key_set_hgnc_response_docs(res) | ||
for entry in list_to_extract: | ||
if entry not in set_keys: | ||
list_out.append(None) | ||
else: | ||
list_entry = extract_list_from_hgnc_response_docs(res, entry) | ||
list_out.append(list_entry) | ||
else: | ||
list_out = [None for _ in list_to_extract] | ||
print_status_code_if_res_not_ok(res) | ||
|
||
dict_out = dict(zip(list_to_extract, list_out)) | ||
|
||
return dict_out | ||
|
||
|
||
def extract_list_from_hgnc_response_docs( | ||
res_input: requests.models.Response, | ||
str_to_extract: str, | ||
) -> list[str]: | ||
"""Extract a list of values from the response documents of an HGNC REST API request | ||
Parameters | ||
---------- | ||
res_input : requests.models.Response | ||
Response object from an HGNC REST API request | ||
str_to_extract : str | ||
Key to extract from the response documents | ||
Returns | ||
------- | ||
list[str] | ||
List of values extracted from the response documents | ||
""" | ||
if res_input.json()["response"]["numFound"] >= 1: | ||
list_output = [ | ||
doc[str_to_extract] for doc in res_input.json()["response"]["docs"] | ||
] | ||
else: | ||
list_output = [] | ||
return list_output | ||
|
||
|
||
def generate_key_set_hgnc_response_docs( | ||
res_input: requests.models.Response, | ||
) -> set[str]: | ||
"""Generate a set of keys present in the response documents of an HGNC REST API request | ||
Parameters | ||
---------- | ||
res_input : requests.models.Response | ||
Response object from an HGNC REST API request | ||
Returns | ||
------- | ||
set[str] | ||
Set of keys present in the response documents | ||
""" | ||
list_keys = [set(doc.keys()) for doc in res_input.json()["response"]["docs"]] | ||
set_keys = set.union(*list_keys) | ||
return set_keys | ||
|
||
|
||
def print_status_code_if_res_not_ok( | ||
res_input: requests.models.Response, | ||
dict_status_code: dict[int, str] | None = None, | ||
) -> None: | ||
"""Print the status code and status message if the response is not OK | ||
Parameters | ||
---------- | ||
res_input : requests.models.Response | ||
Response object from an HGNC REST API request | ||
dict_status_code : dict[int, str] | None | ||
Dictionary of status codes and status messages; if None, defaults to a standard set of status codes | ||
Returns | ||
------- | ||
None | ||
""" | ||
if dict_status_code is None: | ||
dict_status_code = { | ||
400: "Bad request", | ||
404: "Not found", | ||
415: "Unsupported media type", | ||
500: "Server error", | ||
503: "Service unavailable", | ||
} | ||
|
||
try: | ||
print( | ||
f"Error code: {res_input.status_code} ({dict_status_code[res_input.status_code]})" | ||
) | ||
except KeyError: | ||
print(f"Error code: {res_input.status_code}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from requests_cache import CachedSession | ||
import os | ||
from functools import cache | ||
from requests.adapters import HTTPAdapter, Retry | ||
|
||
# this script was written by Jeff Quinn (MSKCC, Tansey lab) | ||
|
||
ETL_REQUEST_CACHE_VAR = "ETL_REQUEST_CACHE" | ||
|
||
def add_retry_to_session( | ||
session, | ||
retries=5, | ||
backoff_factor=0.3, | ||
status_forcelist=(429, 500, 501, 502, 503, 504), | ||
): | ||
retry = Retry( | ||
total=retries, | ||
backoff_factor=backoff_factor, | ||
status_forcelist=status_forcelist, | ||
allowed_methods=False, | ||
) | ||
adapter = HTTPAdapter(max_retries=retry) | ||
session.mount("http://", adapter) | ||
session.mount("https://", adapter) | ||
return session | ||
|
||
|
||
@cache | ||
def get_cached_session(): | ||
if "ETL_REQUEST_CACHE" in os.environ: | ||
cache_location = os.environ["ETL_REQUEST_CACHE"] | ||
|
||
session = CachedSession( | ||
cache_location, allowable_codes=(200, 404, 400), backend="sqlite" | ||
) | ||
else: | ||
session = CachedSession(backend="memory") | ||
|
||
return add_retry_to_session(session) |