From bc9cafbb024a459ca48b35a13303f9be30348996 Mon Sep 17 00:00:00 2001 From: jessicaw9910 Date: Tue, 20 Feb 2024 12:50:44 -0500 Subject: [PATCH 1/2] Added code to hgnc.py --- missense_kinase_toolkit/src/hgnc.py | 164 ++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/missense_kinase_toolkit/src/hgnc.py b/missense_kinase_toolkit/src/hgnc.py index e69de29..c74bf87 100644 --- a/missense_kinase_toolkit/src/hgnc.py +++ b/missense_kinase_toolkit/src/hgnc.py @@ -0,0 +1,164 @@ +import requests + +# from pan_preclinical_etl import requests_wrapper + + +def maybe_get_symbol_from_hgnc_search( + input_symbol_or_id: str, + input_is_hgnc_symbol: bool = True, +) -> list[str] | None: + """Get gene name from HGNC REST API using either a gene symbol or an Ensembl gene ID + + Parameters + ---------- + input_symbol_or_id : str + Gene symbol or Ensembl gene ID + input_is_hgnc_symbol : bool + If True, input_symbol_or_id is a gene symbol, otherwise it is an Ensembl gene ID + + Returns + ------- + list[str] | None + List of gene names that match input_symbol_or_id; empty list if no match and None if request fails + """ + if input_is_hgnc_symbol: + url = f"https://rest.genenames.org/search/symbol:{input_symbol_or_id}" + else: + url = f"https://rest.genenames.org/search/ensembl_gene_id:{input_symbol_or_id}" + + res = requests_wrapper.get_cached_session().get( + url, headers={"Accept": "application/json"} + ) + + if res.ok: + list_hgnc_gene_name = extract_list_from_hgnc_response_docs(res, "symbol") + else: + list_hgnc_gene_name = None + print_status_code_if_res_not_ok(res) + + return list_hgnc_gene_name + + +def maybe_get_info_from_hgnc_fetch( + hgnc_gene_symbol: str, + list_to_extract: list[str] | None = None, +) -> dict | None: + """Get gene information for a given HGNC gene name from gene symbol report using HGNC REST API + + Parameters + ---------- + hgnc_gene_symbol : str + HGNC gene symbol + list_to_extract : list[str] | None + List of fields to extract from the response; if None, defaults to ["locus_type"] + + Returns + ------- + dict | None + Dictionary of gene information; empty list if no match and None if request fails or field not found + """ + url = f"https://rest.genenames.org/fetch/symbol/{hgnc_gene_symbol}" + res = requests_wrapper.get_cached_session().get( + url, headers={"Accept": "application/json"} + ) + + if list_to_extract is None: + list_to_extract = ["locus_type"] + + list_out = [] + if res.ok: + set_keys = generate_key_set_hgnc_response_docs(res) + for entry in list_to_extract: + if entry not in set_keys: + list_out.append(None) + else: + list_entry = extract_list_from_hgnc_response_docs(res, entry) + list_out.append(list_entry) + else: + list_out = [None for _ in list_to_extract] + print_status_code_if_res_not_ok(res) + + dict_out = dict(zip(list_to_extract, list_out)) + + return dict_out + + +def extract_list_from_hgnc_response_docs( + res_input: requests.models.Response, + str_to_extract: str, +) -> list[str]: + """Extract a list of values from the response documents of an HGNC REST API request + + Parameters + ---------- + res_input : requests.models.Response + Response object from an HGNC REST API request + str_to_extract : str + Key to extract from the response documents + + Returns + ------- + list[str] + List of values extracted from the response documents + """ + if res_input.json()["response"]["numFound"] >= 1: + list_output = [ + doc[str_to_extract] for doc in res_input.json()["response"]["docs"] + ] + else: + list_output = [] + return list_output + + +def generate_key_set_hgnc_response_docs( + res_input: requests.models.Response, +) -> set[str]: + """Generate a set of keys present in the response documents of an HGNC REST API request + + Parameters + ---------- + res_input : requests.models.Response + Response object from an HGNC REST API request + + Returns + ------- + set[str] + Set of keys present in the response documents + """ + list_keys = [set(doc.keys()) for doc in res_input.json()["response"]["docs"]] + set_keys = set.union(*list_keys) + return set_keys + + +def print_status_code_if_res_not_ok( + res_input: requests.models.Response, + dict_status_code: dict[int, str] | None = None, +) -> None: + """Print the status code and status message if the response is not OK + + Parameters + ---------- + res_input : requests.models.Response + Response object from an HGNC REST API request + dict_status_code : dict[int, str] | None + Dictionary of status codes and status messages; if None, defaults to a standard set of status codes + + Returns + ------- + None + """ + if dict_status_code is None: + dict_status_code = { + 400: "Bad request", + 404: "Not found", + 415: "Unsupported media type", + 500: "Server error", + 503: "Service unavailable", + } + + try: + print( + f"Error code: {res_input.status_code} ({dict_status_code[res_input.status_code]})" + ) + except KeyError: + print(f"Error code: {res_input.status_code}") \ No newline at end of file From d47a1ac865a284f96e98f12a4ecf861c4a615072 Mon Sep 17 00:00:00 2001 From: jessicaw9910 Date: Tue, 20 Feb 2024 13:10:45 -0500 Subject: [PATCH 2/2] Created requests_wrapper.py --- missense_kinase_toolkit/src/hgnc.py | 4 +- .../src/requests_wrapper.py | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 missense_kinase_toolkit/src/requests_wrapper.py diff --git a/missense_kinase_toolkit/src/hgnc.py b/missense_kinase_toolkit/src/hgnc.py index c74bf87..a01b7af 100644 --- a/missense_kinase_toolkit/src/hgnc.py +++ b/missense_kinase_toolkit/src/hgnc.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import requests -# from pan_preclinical_etl import requests_wrapper +import requests_wrapper def maybe_get_symbol_from_hgnc_search( diff --git a/missense_kinase_toolkit/src/requests_wrapper.py b/missense_kinase_toolkit/src/requests_wrapper.py new file mode 100644 index 0000000..3826aba --- /dev/null +++ b/missense_kinase_toolkit/src/requests_wrapper.py @@ -0,0 +1,39 @@ +from requests_cache import CachedSession +import os +from functools import cache +from requests.adapters import HTTPAdapter, Retry + +# this script was written by Jeff Quinn (MSKCC, Tansey lab) + +ETL_REQUEST_CACHE_VAR = "ETL_REQUEST_CACHE" + +def add_retry_to_session( + session, + retries=5, + backoff_factor=0.3, + status_forcelist=(429, 500, 501, 502, 503, 504), +): + retry = Retry( + total=retries, + backoff_factor=backoff_factor, + status_forcelist=status_forcelist, + allowed_methods=False, + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session + + +@cache +def get_cached_session(): + if "ETL_REQUEST_CACHE" in os.environ: + cache_location = os.environ["ETL_REQUEST_CACHE"] + + session = CachedSession( + cache_location, allowable_codes=(200, 404, 400), backend="sqlite" + ) + else: + session = CachedSession(backend="memory") + + return add_retry_to_session(session) \ No newline at end of file