-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
66 lines (48 loc) · 1.87 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from flask import Flask, render_template, request
import numpy as np
import pandas as pd
import nltk.data
import pickle
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25L
def rank(query):
PM_Articles = pd.read_csv("data/PM classified data/Final_PM.csv", encoding='latin1')
embedder = SentenceTransformer('bert-base-nli-mean-tokens')
corpus = []
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
#Title = PM_Articles['Title']
#Abstract = PM_Articles['Abstract']
NCT_ID = PM_Articles['NCT ID']
gene = "BRAF (V600E)"
for article in PM_Articles.Abstract.apply(lambda row: row.lower()):
corpus.extend(tokenizer.tokenize(article))
embedding_file = "data/models/PM_Articles.emb"
with open(embedding_file, mode='rb') as emb_f:
corpus_embeddings = pickle.load(emb_f)
bm25 = BM25L(corpus)
tokenized_gene = gene.split(" ")
BM25_Score = bm25.get_scores(tokenized_gene) * 2
query_embeddings = embedder.encode(query)
topk=10
score_corpus = np.sum(query_embeddings * corpus_embeddings, axis=1) / np.linalg.norm(corpus_embeddings, axis=1)
results=[]
score_list=[]
topk_idx = np.argsort(score_corpus)[::-1][:topk]
i = 0
for idx in topk_idx:
i = i + 1
score = score_corpus[idx] + BM25_Score[idx]
results.append('https://clinicaltrials.gov/ct2/show/' + NCT_ID[idx] + '?term=' + NCT_ID[idx] + '&draw=2&rank=1 ')
score_list.append(score)
return results ,score_list
app = Flask(__name__)
@app.route('/')
def home():
return render_template('search.html')
@app.route('/search/results', methods=['GET', 'POST'])
def search_request():
search_term = request.form["input"]
articlelink,score=rank(search_term)
return render_template('results.html', res=articlelink ,score=score)
if __name__ == '__main__':
app.run(debug=True)