-
Notifications
You must be signed in to change notification settings - Fork 0
/
sparql_queries_NTDs_RDF_examples.txt
212 lines (178 loc) · 6.65 KB
/
sparql_queries_NTDs_RDF_examples.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# What genes are associated to Leishmaniasis and Chagas Disease (include external identifiers for Ensembl)?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT DISTINCT ?gene ?genename ?ensemblid
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D007896,
mesh:D014355;
rdfs:label ?genename;
biolink:xref ?ensemblid;
FILTER(regex(STR(?ensemblid), "ensembl", "i")).
}
# List 20 SNVs of genes associated to African Trypanosomiasis, including their phenotypes and chromosome locations
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT DISTINCT ?gene ?genename ?SNV ?SNVpehnotype ?SNVallele ?chr
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D014355;
rdfs:label ?genename.
?SNV a biolink:Snv;
biolink:is_sequence_variant_of ?gene;
biolink:has_phenotype ?SNVpehnotype;
biolink:has_attribute ?SNVallele;
biolink:has_sequence_location ?chr.
}
LIMIT 20
# List 10 drug-SNVs associations, including their alleles and phenotypes
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT DISTINCT ?drug ?drugname ?SNV ?SNVallele ?SNVpehnotype
WHERE {
?drug a biolink:Drug;
rdfs:label ?drugname.
?SNV a biolink:Snv;
biolink:associated_with ?drug;
biolink:has_attribute ?SNVallele;
biolink:has_phenotype ?SNVpehnotype.
}
LIMIT 10
# What drugs are used to treat Chagas disease and African Trypanosomiasis (include external identifiers for PubChem)?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT DISTINCT ?drug ?drugname ?pubchem_id
WHERE {
?drug a biolink:Drug;
biolink:treats mesh:D014353,
mesh:D014355;
rdfs:label ?drugname;
biolink:xref ?pubchem_id;
FILTER(regex(STR(?pubchem_id), "pubchem", "i")).
}
# What drugs are used to treat at least two out of the three NTDs (include external identifiers for CHEMBL)?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT DISTINCT ?drug ?drugname ?chembl_id
WHERE {
{
?drug a biolink:Drug;
biolink:treats mesh:D014355,
mesh:D014353;
rdfs:label ?drugname;
biolink:xref ?chembl_id;
FILTER(regex(STR(?chembl_id), "chembl", "i")).
}UNION{
?drug a biolink:Drug;
biolink:treats mesh:D014353,
mesh:D007896;
rdfs:label ?drugname;
biolink:xref ?chembl_id;
FILTER(regex(STR(?chembl_id), "chembl", "i")).
}UNION{
?drug a biolink:Drug;
biolink:treats mesh:D014355,
mesh:D007896;
rdfs:label ?drugname;
biolink:xref ?chembl_id;
FILTER(regex(STR(?chembl_id), "chembl", "i")).
}
}
# What drug against Chagas disease has the highest number of protein targets?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?drug ?drugname (COUNT(DISTINCT ?protein) AS ?n_proteintargets)
WHERE {
?drug a biolink:Drug;
biolink:treats mesh:D014355;
biolink:physically_interacts_with ?protein;
rdfs:label ?drugname .
}
GROUP BY ?drug
ORDER BY DESC(?n_proteintargets)
LIMIT 1
# What are the top 20 biological processes with the highest number of genes associated to Leishmaniasis?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?bio_process ?bio_processname (COUNT(DISTINCT ?gene) AS ?count)
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D007896;
biolink:acts_upstream_of ?bio_process.
?bio_process a biolink:BiologicalProcess;
rdfs:label ?bio_processname .
}
GROUP BY ?bio_process
ORDER BY DESC(?count)
LIMIT 20
# What are the top 20 molecular functions with the highest number of genes associated to Chagas Disease?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?mol_funct ?mol_functname (COUNT(DISTINCT ?gene) AS ?count)
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D014355;
biolink:participates_in ?mol_funct.
?mol_funct a biolink:MolecularActivity;
rdfs:label ?mol_functname .
}
GROUP BY ?mol_funct
ORDER BY DESC(?count)
LIMIT 20
# What are the top 10 cellular components with the highest number of genes associated to African Trypanosomiasis?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?cell_comp ?cell_compname (COUNT(DISTINCT ?gene) AS ?count)
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D014353;
biolink:active_in ?cell_comp.
?cell_comp a biolink:CellularComponent;
rdfs:label ?cell_compname .
}
GROUP BY ?cell_comp
ORDER BY DESC(?count)
LIMIT 10
# What are the top 30 pathways associated to the highest number of genes involved in Leishmaniasis (include the data source of the pathways)?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?path ?path_name ?datasource (COUNT(DISTINCT ?gene) AS ?count)
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D007896;
biolink:participates_in ?path.
?path a biolink:Pathway;
rdfs:label ?path_name;
biolink:provided_by ?datasource.
}
GROUP BY ?path
ORDER BY DESC(?count)
LIMIT 30
# What are the top 20 pathways associated to the highest number of genes involved in the three NTDs (include the data source of the pathways)?
PREFIX biolink: <https://w3id.org/biolink/vocab/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mesh: <https://bioregistry.io/mesh:>
SELECT ?path ?path_name ?datasource (COUNT(DISTINCT ?gene) AS ?count)
WHERE {
?gene a biolink:Gene;
biolink:gene_associated_with_condition mesh:D014353,
mesh:D014355,
mesh:D007896;
biolink:participates_in ?path.
?path a biolink:Pathway;
rdfs:label ?path_name ;
biolink:provided_by ?datasource.
}
GROUP BY ?path
ORDER BY DESC(?count)
LIMIT 20