Skip to content

Commit

Permalink
Merge pull request #13 from colin-combe/sql_timing
Browse files Browse the repository at this point in the history
Sql timing
  • Loading branch information
sureshhewabi authored Aug 13, 2024
2 parents cdea279 + 58e8821 commit 6c9eaf5
Showing 1 changed file with 67 additions and 5 deletions.
72 changes: 67 additions & 5 deletions app/routes/xiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ async def get_data_object(ids, pxid):
data["matches"] = await get_matches(cur, ids)
# data["peptides"] = await get_peptides(cur, data["matches"], ids)
# data["proteins"] = await get_proteins(cur, data["peptides"])
data["peptides"] = await get_all_peptides(cur, ids)
data["peptides"] = await get_peptides2(cur, ids)
data["proteins"] = await get_all_proteins(cur, ids)
cur.close()
except (Exception, psycopg2.DatabaseError) as e:
Expand Down Expand Up @@ -224,7 +224,7 @@ async def get_results_metadata(cur, ids):
async def get_matches(cur, ids):
# todo - check whats going on with this rank =1 and pass_threshold = True in mascot data, rank =1 condition seems to speeds things up (but should be redundant)
# todo - rename 'si' to 'm'
query = """WITH submodpep AS (SELECT * FROM modifiedpeptide WHERE upload_id = ANY(%s))
query = """WITH submodpep AS (SELECT * FROM modifiedpeptide WHERE upload_id = ANY(%s) AND link_site1 > -1)
SELECT si.id AS id, si.pep1_id AS pi1, si.pep2_id AS pi2,
si.scores AS sc,
cast (si.upload_id as text) AS si,
Expand All @@ -241,9 +241,8 @@ async def get_matches(cur, ids):
INNER JOIN submodpep mp2 ON si.pep2_id = mp2.id AND si.upload_id = mp2.upload_id
WHERE si.upload_id = ANY(%s)
AND si.pass_threshold = TRUE
AND si.rank = 1
AND mp1.link_site1 > 0
AND mp2.link_site1 > 0;"""
AND mp1.link_site1 > -1
AND mp2.link_site1 > -1;"""
cur.execute(query, [ids, ids])
return cur.fetchall()

Expand Down Expand Up @@ -435,6 +434,69 @@ async def get_all_peptides(cur, ids):
cur.execute(query, [ids, ids])
return cur.fetchall()

@log_execution_time_async
@xiview_data_router.get('/get_xiview_peptides2', tags=["xiVIEW"])
async def get_xiview_peptides2(project, file=None):
"""
Get all the peptides.
URLs have the following structure:
https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_peptides?project=PXD020453&file=Cullin_SDA_1pcFDR.mzid
Users may provide only projects, meaning we need to have an aggregated view.
https: // www.ebi.ac.uk / pride / archive / xiview / get_xiview_peptides?project=PXD020453
:return: json of the peptides
"""
logger.info(f"get_xiview_peptides for {project}, file: {file}")
most_recent_upload_ids = await get_most_recent_upload_ids(project, file)

conn = None
data = {}
error = None

try:
conn = await get_db_connection()
cur = conn.cursor(cursor_factory=RealDictCursor)
data = await get_peptides2(cur, most_recent_upload_ids)
cur.close()
except (Exception, psycopg2.DatabaseError) as e:
logger.error(e)
return {"error": "Database error"}, 500
finally:
if conn is not None:
conn.close()

start_time = time.time()
json_bytes = orjson.dumps(data)
logger.info(f'peptides json dump time: {time.time() - start_time}')
log_json_size(json_bytes, "peptides") # slows things down a little
return Response(json_bytes, media_type='application/json')


@log_execution_time_async
async def get_peptides2(cur, ids):
query = """with submatch as (select pep1_id, pep2_id, upload_id from match where upload_id = ANY(%s) and pass_threshold = true),
pep_ids as (select pep1_id, upload_id from submatch union select pep2_id, upload_id from submatch),
subpp AS (select * from peptideevidence WHERE upload_id = ANY(%s))
select mp.id,
cast(mp.upload_id as text) AS u_id,
mp.base_sequence AS seq,
array_agg(pp.dbsequence_id) AS prt,
array_agg(pp.pep_start) AS pos,
array_agg(pp.is_decoy) AS dec,
mp.link_site1 AS ls1,
mp.link_site2 AS ls2,
mp.mod_accessions as m_as,
mp.mod_positions as m_ps,
mp.mod_monoiso_mass_deltas as m_ms,
mp.crosslinker_modmass as cl_m from pep_ids pi
inner join modifiedpeptide mp on mp.upload_id = pi.upload_id and pi.pep1_id = mp.id
JOIN subpp AS pp
ON mp.id = pp.peptide_id AND mp.upload_id = pp.upload_id
GROUP BY mp.id, mp.upload_id, mp.base_sequence;"""

cur.execute(query, [ids, ids])
return cur.fetchall()


@log_execution_time_async
@xiview_data_router.get('/get_xiview_proteins', tags=["xiVIEW"])
Expand Down

0 comments on commit 6c9eaf5

Please sign in to comment.