-
Notifications
You must be signed in to change notification settings - Fork 10
/
references.bib
5507 lines (5507 loc) · 559 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{wishart_2007,
title = {{HMDB}: the human metabolome database.},
author = {Wishart, David S and Tzur, Dan and Knox, Craig and Eisner, Roman and Guo, An Chi and Young, Nelson and Cheng, Dean and Jewell, Kevin and Arndt, David and Sawhney, Summit and Fung, Chris and Nikolai, Lisa and Lewis, Mike and Coutouly, Marie-Aude and Forsythe, Ian and Tang, Peter and Shrivastava, Savita and Jeroncic, Kevin and Stothard, Paul and Amegbey, Godwin and Block, David and Hau, David D and Wagner, James and Miniaci, Jessica and Clements, Melisa and Gebremedhin, Mulu and Guo, Natalie and Zhang, Ying and Duggan, Gavin E and Macinnis, Glen D and Weljie, Alim M and Dowlatabadi, Reza and Bamforth, Fiona and Clive, Derrick and Greiner, Russ and Li, Liang and Marrie, Tom and Sykes, Brian D and Vogel, Hans J and Querengesser, Lori},
pages = {D521-6},
url = {http://dx.doi.org/10.1093/nar/gkl923},
year = {2007},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {35},
number = {Database issue},
issn = {1362-4962},
doi = {10.1093/nar/gkl923},
pmid = {17202168},
pmcid = {PMC1899095},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}) is currently the most complete and comprehensive curated collection of human metabolite and human metabolism data in the world. It contains records for more than 2180 endogenous metabolites with information gathered from thousands of books, journal articles and electronic databases. In addition to its comprehensive literature-derived data, the {HMDB} also contains an extensive collection of experimental metabolite concentration data compiled from hundreds of mass spectra ({MS}) and Nuclear Magnetic resonance ({NMR}) metabolomic analyses performed on urine, blood and cerebrospinal fluid samples. This is further supplemented with thousands of {NMR} and {MS} spectra collected on purified, reference metabolites. Each metabolite entry in the {HMDB} contains an average of 90 separate data fields including a comprehensive compound description, names and synonyms, structural information, physico-chemical data, reference {NMR} and {MS} spectra, biofluid concentrations, disease associations, pathway information, enzyme data, gene sequence data, {SNP} and mutation data as well as extensive links to images, references and other public databases. Extensive searching, relational querying and data browsing tools are also provided. The {HMDB} is designed to address the broad needs of biochemists, clinical chemists, physicians, medical geneticists, nutritionists and members of the metabolomics community. The {HMDB} is available at: www.hmdb.ca.}
}
@article{goecks_2010,
title = {Galaxy: a comprehensive approach for supporting accessible, reproducible, and transparent computational research in the life sciences.},
author = {Goecks, Jeremy and Nekrutenko, Anton and Taylor, James and Galaxy Team},
pages = {R86},
url = {http://dx.doi.org/10.1186/gb-2010-11-8-r86},
year = {2010},
month = {aug},
day = {25},
urldate = {2018-01-13},
journal = {Genome Biology},
volume = {11},
number = {8},
doi = {10.1186/gb-2010-11-8-r86},
pmid = {20738864},
pmcid = {PMC2945788},
f1000-projects = {shared citations},
abstract = {Increased reliance on computational approaches in the life sciences has revealed grave concerns about how accessible and reproducible computation-reliant results truly are. Galaxy http://usegalaxy.org, an open web-based platform for genomic research, addresses these problems. Galaxy automatically tracks and manages data provenance and provides support for capturing the context and intent of computational methods. Galaxy Pages are interactive, web-based documents that provide users with a medium to communicate a complete computational analysis.}
}
@article{gentleman_2004,
title = {Bioconductor: open software development for computational biology and bioinformatics.},
author = {Gentleman, Robert C and Carey, Vincent J and Bates, Douglas M and Bolstad, Ben and Dettling, Marcel and Dudoit, Sandrine and Ellis, Byron and Gautier, Laurent and Ge, Yongchao and Gentry, Jeff and Hornik, Kurt and Hothorn, Torsten and Huber, Wolfgang and Iacus, Stefano and Irizarry, Rafael and Leisch, Friedrich and Li, Cheng and Maechler, Martin and Rossini, Anthony J and Sawitzki, Gunther and Smith, Colin and Smyth, Gordon and Tierney, Luke and Yang, Jean Y H and Zhang, Jianhua},
pages = {R80},
url = {http://dx.doi.org/10.1186/gb-2004-5-10-r80},
year = {2004},
month = {sep},
day = {15},
urldate = {2017-02-20},
journal = {Genome Biology},
volume = {5},
number = {10},
doi = {10.1186/gb-2004-5-10-r80},
pmid = {15461798},
pmcid = {PMC545600},
f1000-projects = {shared citations},
abstract = {The Bioconductor project is an initiative for the collaborative creation of extensible software for computational biology and bioinformatics. The goals of the project include: fostering collaborative development and widespread use of innovative software, reducing barriers to entry into interdisciplinary scientific research, and promoting the achievement of remote reproducibility of research results. We describe details of our aims and methods, identify current challenges, compare Bioconductor to other open bioinformatics projects, and provide working examples.}
}
@article{sansone_2012,
title = {Toward interoperable bioscience data.},
author = {Sansone, Susanna-Assunta and Rocca-Serra, Philippe and Field, Dawn and Maguire, Eamonn and Taylor, Chris and Hofmann, Oliver and Fang, Hong and Neumann, Steffen and Tong, Weida and Amaral-Zettler, Linda and Begley, Kimberly and Booth, Tim and Bougueleret, Lydie and Burns, Gully and Chapman, Brad and Clark, Tim and Coleman, Lee-Ann and Copeland, Jay and Das, Sudeshna and de Daruvar, Antoine and de Matos, Paula and Dix, Ian and Edmunds, Scott and Evelo, Chris T and Forster, Mark J and Gaudet, Pascale and Gilbert, Jack and Goble, Carole and Griffin, Julian L and Jacob, Daniel and Kleinjans, Jos and Harland, Lee and Haug, Kenneth and Hermjakob, Henning and Ho Sui, Shannan J and Laederach, Alain and Liang, Shaoguang and Marshall, Stephen and {McGrath}, Annette and Merrill, Emily and Reilly, Dorothy and Roux, Magali and Shamu, Caroline E and Shang, Catherine A and Steinbeck, Christoph and Trefethen, Anne and Williams-Jones, Bryn and Wolstencroft, Katherine and Xenarios, Ioannis and Hide, Winston},
pages = {121-126},
url = {http://dx.doi.org/10.1038/ng.1054},
year = {2012},
month = {feb},
day = {1},
urldate = {2018-03-14},
journal = {Nature Genetics},
volume = {44},
number = {2},
doi = {10.1038/ng.1054},
pmid = {22281772},
pmcid = {PMC3428019},
f1000-projects = {shared citations},
abstract = {To make full use of research data, the bioscience community needs to adopt technologies and reward mechanisms that support interoperability and promote the growth of an open 'data commoning' culture. Here we describe the prerequisites for data commoning and present an established and growing ecosystem of solutions using the shared 'Investigation-Study-Assay' framework to support that vision.}
}
@article{oboyle_2011,
title = {Open Babel: An open chemical toolbox.},
author = {O'Boyle, Noel M and Banck, Michael and James, Craig A and Morley, Chris and Vandermeersch, Tim and Hutchison, Geoffrey R},
pages = {33},
url = {http://dx.doi.org/10.1186/1758-2946-3-33},
year = {2011},
month = {oct},
day = {7},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {3},
doi = {10.1186/1758-2946-3-33},
pmid = {21982300},
pmcid = {PMC3198950},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: A frequent problem in computational modeling is the interconversion of chemical structures between different formats. While standard interchange formats exist (for example, Chemical Markup Language) and de facto standards have arisen (for example, {SMILES} format), the need to interconvert formats is a continuing problem due to the multitude of different application areas for chemistry data, differences in the data stored by different formats ({0D} versus {3D}, for example), and competition between software along with a lack of vendor-neutral formats. {RESULTS}: We discuss, for the first time, Open Babel, an open-source chemical toolbox that speaks the many languages of chemical data. Open Babel version 2.3 interconverts over 110 formats. The need to represent such a wide variety of chemical and molecular data requires a library that implements a wide range of cheminformatics algorithms, from partial charge assignment and aromaticity detection, to bond order perception and canonicalization. We detail the implementation of Open Babel, describe key advances in the 2.3 release, and outline a variety of uses both in terms of software products and scientific research, including applications far beyond simple format interconversion. {CONCLUSIONS}: Open Babel presents a solution to the proliferation of multiple chemical file formats. In addition, it provides a variety of useful utilities from conformer searching and {2D} depiction, to filtering, batch conversion, and substructure and similarity searching. For developers, it can be used as a programming library to handle chemical data in areas such as organic chemistry, drug design, materials science, and computational chemistry. It is freely available under an open-source license from http://openbabel.org.}
}
@article{huber_2015,
title = {Orchestrating high-throughput genomic analysis with Bioconductor.},
author = {Huber, Wolfgang and Carey, Vincent J and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S and Bravo, Hector Corrada and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D and Irizarry, Rafael A and Lawrence, Michael and Love, Michael I and {MacDonald}, James and Obenchain, Valerie and Oleś, Andrzej K and Pagès, Hervé and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin},
pages = {115-121},
url = {http://dx.doi.org/10.1038/nmeth.3252},
year = {2015},
month = {feb},
urldate = {2017-02-20},
journal = {Nature Methods},
volume = {12},
number = {2},
doi = {10.1038/nmeth.3252},
pmid = {25633503},
pmcid = {PMC4509590},
f1000-projects = {shared citations},
abstract = {Bioconductor is an open-source, open-development software project for the analysis and comprehension of high-throughput data in genomics and molecular biology. The project aims to enable interdisciplinary research, collaboration and rapid development of scientific software. Based on the statistical programming language R, Bioconductor comprises 934 interoperable packages contributed by a large, diverse community of scientists. Packages cover a range of bioinformatic and statistical applications. They undergo formal initial review and continuous automated testing. We present an overview for prospective users and contributors.}
}
@article{fukushima_2009,
title = {Integrated omics approaches in plant systems biology.},
author = {Fukushima, Atsushi and Kusano, Miyako and Redestig, Henning and Arita, Masanori and Saito, Kazuki},
pages = {532-538},
url = {http://dx.doi.org/10.1016/j.cbpa.2009.09.022},
year = {2009},
month = {dec},
urldate = {2019-07-01},
journal = {Current Opinion in Chemical Biology},
volume = {13},
number = {5-6},
doi = {10.1016/j.cbpa.2009.09.022},
pmid = {19837627},
f1000-projects = {shared citations},
abstract = {Understanding cellular metabolic systems is vital not only for determining the function of enzymatic genes, but also for elucidating the coordination among various metabolic pathways. In this context, high-throughput experiments can provide us with essential, albeit only partial information. Integration of metabolite profiling with other multiple 'omics' data (e.g. transcript profiling), is required to reconstruct complex networks that characterize the phenotypes in the cell. Here, we review recent approaches to integrate multiple omics data in higher plants. We especially focus on metabolomics data management, normalization, meta-omics data analysis, and an integrative approach with other omics data. Further prospects for using metabolomics and the key points to be addressed are discussed. This could be a valuable strategy for a systems-level understanding of plant systems.}
}
@article{yu_2014,
title = {Improving peak detection in high-resolution {LC}/{MS} metabolomics data using preexisting knowledge and machine learning approach.},
author = {Yu, Tianwei and Jones, Dean P},
pages = {2941-2948},
url = {http://dx.doi.org/10.1093/bioinformatics/btu430},
year = {2014},
month = {oct},
day = {15},
urldate = {2018-01-15},
journal = {Bioinformatics},
volume = {30},
number = {20},
doi = {10.1093/bioinformatics/btu430},
pmid = {25005748},
pmcid = {PMC4184266},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Peak detection is a key step in the preprocessing of untargeted metabolomics data generated from high-resolution liquid chromatography-mass spectrometry ({LC}/{MS}). The common practice is to use filters with predetermined parameters to select peaks in the {LC}/{MS} profile. This rigid approach can cause suboptimal performance when the choice of peak model and parameters do not suit the data characteristics. {RESULTS}: Here we present a method that learns directly from various data features of the extracted ion chromatograms ({EICs}) to differentiate between true peak regions from noise regions in the {LC}/{MS} profile. It utilizes the knowledge of known metabolites, as well as robust machine learning approaches. Unlike currently available methods, this new approach does not assume a parametric peak shape model and allows maximum flexibility. We demonstrate the superiority of the new approach using real data. Because matching to known metabolites entails uncertainties and cannot be considered a gold standard, we also developed a probabilistic receiver-operating characteristic ({pROC}) approach that can incorporate uncertainties. {AVAILABILITY} {AND} {IMPLEMENTATION}: The new peak detection approach is implemented as part of the {apLCMS} package available at http://web1.sph.emory.edu/{apLCMS}/ {CONTACT}: tyu8@emory.edu {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{strimmer_2008,
title = {fdrtool: a versatile R package for estimating local and tail area-based false discovery rates.},
author = {Strimmer, Korbinian},
pages = {1461-1462},
url = {http://dx.doi.org/10.1093/bioinformatics/btn209},
year = {2008},
month = {jun},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {24},
number = {12},
doi = {10.1093/bioinformatics/btn209},
pmid = {18441000},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: False discovery rate ({FDR}) methodologies are essential in the study of high-dimensional genomic and proteomic data. The R package 'fdrtool' facilitates such analyses by offering a comprehensive set of procedures for {FDR} estimation. Its distinctive features include: (i) many different types of test statistics are allowed as input data, such as P-values, z-scores, correlations and t-scores; (ii) simultaneously, both local {FDR} and tail area-based {FDR} values are estimated for all test statistics and (iii) empirical null models are fit where possible, thereby taking account of potential over- or underdispersion of the theoretical null. In addition, 'fdrtool' provides readily interpretable graphical output, and can be applied to very large scale (in the order of millions of hypotheses) multiple testing problems. Consequently, 'fdrtool' implements a flexible {FDR} estimation scheme that is unified across different test statistics and variants of {FDR}. {AVAILABILITY}: The program is freely available from the Comprehensive R Archive Network (http://cran.r-project.org/) under the terms of the {GNU} General Public License (version 3 or later). {CONTACT}: strimmer@uni-leipzig.de.}
}
@article{saeys_2007,
title = {A review of feature selection techniques in bioinformatics.},
author = {Saeys, Yvan and Inza, Iñaki and Larrañaga, Pedro},
pages = {2507-2517},
url = {http://dx.doi.org/10.1093/bioinformatics/btm344},
year = {2007},
month = {oct},
day = {1},
urldate = {2016-09-21},
journal = {Bioinformatics},
volume = {23},
number = {19},
doi = {10.1093/bioinformatics/btm344},
pmid = {17720704},
f1000-projects = {shared citations},
abstract = {Feature selection techniques have become an apparent need in many bioinformatics applications. In addition to the large pool of techniques that have already been developed in the machine learning and data mining fields, specific applications in bioinformatics have led to a wealth of newly proposed techniques. In this article, we make the interested reader aware of the possibilities of feature selection, providing a basic taxonomy of feature selection techniques, and discussing their use, variety and potential in a number of both common as well as upcoming bioinformatics applications.}
}
@article{kessner_2008,
title = {{ProteoWizard}: open source software for rapid proteomics tools development.},
author = {Kessner, Darren and Chambers, Matt and Burke, Robert and Agus, David and Mallick, Parag},
pages = {2534-2536},
url = {http://dx.doi.org/10.1093/bioinformatics/btn323},
year = {2008},
month = {nov},
day = {1},
urldate = {2019-07-17},
journal = {Bioinformatics},
volume = {24},
number = {21},
doi = {10.1093/bioinformatics/btn323},
pmid = {18606607},
pmcid = {PMC2732273},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: The {ProteoWizard} software project provides a modular and extensible set of open-source, cross-platform tools and libraries. The tools perform proteomics data analyses; the libraries enable rapid tool creation by providing a robust, pluggable development framework that simplifies and unifies data file access, and performs standard proteomics and {LCMS} dataset computations. The library contains readers and writers of the {mzML} data format, which has been written using modern C++ techniques and design principles and supports a variety of platforms with native compilers. The software has been specifically released under the Apache v2 license to ensure it can be used in both academic and commercial projects. In addition to the library, we also introduce a rapidly growing set of companion tools whose implementation helps to illustrate the simplicity of developing applications on top of the {ProteoWizard} library. {AVAILABILITY}: Cross-platform software that compiles using native compilers (i.e. {GCC} on Linux, {MSVC} on Windows and {XCode} on {OSX}) is available for download free of charge, at http://proteowizard.sourceforge.net. This website also provides code examples, and documentation. It is our hope the {ProteoWizard} project will become a standard platform for proteomics development; consequently, code use, contribution and further development are strongly encouraged.}
}
@article{watrous_2012,
title = {Mass spectral molecular networking of living microbial colonies.},
author = {Watrous, Jeramie and Roach, Patrick and Alexandrov, Theodore and Heath, Brandi S and Yang, Jane Y and Kersten, Roland D and van der Voort, Menno and Pogliano, Kit and Gross, Harald and Raaijmakers, Jos M and Moore, Bradley S and Laskin, Julia and Bandeira, Nuno and Dorrestein, Pieter C},
pages = {E1743-52},
url = {http://dx.doi.org/10.1073/pnas.1203689109},
year = {2012},
month = {jun},
day = {26},
urldate = {2019-05-03},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
volume = {109},
number = {26},
doi = {10.1073/pnas.1203689109},
pmid = {22586093},
pmcid = {PMC3387089},
f1000-projects = {shared citations},
abstract = {Integrating the governing chemistry with the genomics and phenotypes of microbial colonies has been a "holy grail" in microbiology. This work describes a highly sensitive, broadly applicable, and cost-effective approach that allows metabolic profiling of live microbial colonies directly from a Petri dish without any sample preparation. Nanospray desorption electrospray ionization mass spectrometry ({MS}), combined with alignment of {MS} data and molecular networking, enabled monitoring of metabolite production from live microbial colonies from diverse bacterial genera, including Bacillus subtilis, Streptomyces coelicolor, Mycobacterium smegmatis, and Pseudomonas aeruginosa. This work demonstrates that, by using these tools to visualize small molecular changes within bacterial interactions, insights can be gained into bacterial developmental processes as a result of the improved organization of {MS}/{MS} data. To validate this experimental platform, metabolic profiling was performed on Pseudomonas sp. {SH}-C52, which protects sugar beet plants from infections by specific soil-borne fungi [R. Mendes et al. (2011) Science 332:1097-1100]. The antifungal effect of strain {SH}-C52 was attributed to thanamycin, a predicted lipopeptide encoded by a nonribosomal peptide synthetase gene cluster. Our technology, in combination with our recently developed peptidogenomics strategy, enabled the detection and partial characterization of thanamycin and showed that it is a monochlorinated lipopeptide that belongs to the syringomycin family of antifungal agents. In conclusion, the platform presented here provides a significant advancement in our ability to understand the spatiotemporal dynamics of metabolite production in live microbial colonies and communities.}
}
@article{langfelder_2008,
title = {{WGCNA}: an R package for weighted correlation network analysis.},
author = {Langfelder, Peter and Horvath, Steve},
pages = {559},
url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-559},
year = {2008},
month = {dec},
day = {29},
urldate = {2016-06-10},
journal = {{BMC} Bioinformatics},
volume = {9},
issn = {1471-2105},
doi = {10.1186/1471-2105-9-559},
pmid = {19114008},
pmcid = {PMC2631488},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Correlation networks are increasingly being used in bioinformatics applications. For example, weighted gene co-expression network analysis is a systems biology method for describing the correlation patterns among genes across microarray samples. Weighted correlation network analysis ({WGCNA}) can be used for finding clusters (modules) of highly correlated genes, for summarizing such clusters using the module eigengene or an intramodular hub gene, for relating modules to one another and to external sample traits (using eigengene network methodology), and for calculating module membership measures. Correlation networks facilitate network based gene screening methods that can be used to identify candidate biomarkers or therapeutic targets. These methods have been successfully applied in various biological contexts, e.g. cancer, mouse genetics, yeast genetics, and analysis of brain imaging data. While parts of the correlation network methodology have been described in separate publications, there is a need to provide a user-friendly, comprehensive, and consistent software implementation and an accompanying tutorial. {RESULTS}: The {WGCNA} R software package is a comprehensive collection of R functions for performing various aspects of weighted correlation network analysis. The package includes functions for network construction, module detection, gene selection, calculations of topological properties, data simulation, visualization, and interfacing with external software. Along with the R package we also present R software tutorials. While the methods development was motivated by gene expression data, the underlying data mining approach can be applied to a variety of different settings. {CONCLUSION}: The {WGCNA} package provides R functions for weighted correlation network analysis, e.g. co-expression network analysis of gene expression data. The R package along with its source code and additional material are freely available at http://www.genetics.ucla.edu/labs/horvath/{CoexpressionNetwork}/Rpackages/{WGCNA}.}
}
@article{roccaserra_2010,
title = {{ISA} software suite: supporting standards-compliant experimental annotation and enabling curation at the community level.},
author = {Rocca-Serra, Philippe and Brandizi, Marco and Maguire, Eamonn and Sklyar, Nataliya and Taylor, Chris and Begley, Kimberly and Field, Dawn and Harris, Stephen and Hide, Winston and Hofmann, Oliver and Neumann, Steffen and Sterk, Peter and Tong, Weida and Sansone, Susanna-Assunta},
pages = {2354-2356},
url = {http://dx.doi.org/10.1093/bioinformatics/btq415},
year = {2010},
month = {sep},
day = {15},
urldate = {2019-05-13},
journal = {Bioinformatics},
volume = {26},
number = {18},
issn = {1367-4811},
doi = {10.1093/bioinformatics/btq415},
pmid = {20679334},
pmcid = {PMC2935443},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: The first open source software suite for experimentalists and curators that (i) assists in the annotation and local management of experimental metadata from high-throughput studies employing one or a combination of omics and other technologies; (ii) empowers users to uptake community-defined checklists and ontologies; and (iii) facilitates submission to international public repositories. {AVAILABILITY} {AND} {IMPLEMENTATION}: Software, documentation, case studies and implementations at http://www.isa-tools.org.}
}
@article{tarca_2009,
title = {A novel signaling pathway impact analysis.},
author = {Tarca, Adi Laurentiu and Draghici, Sorin and Khatri, Purvesh and Hassan, Sonia S and Mittal, Pooja and Kim, Jung-Sun and Kim, Chong Jai and Kusanovic, Juan Pedro and Romero, Roberto},
pages = {75-82},
url = {http://dx.doi.org/10.1093/bioinformatics/btn577},
year = {2009},
month = {jan},
day = {1},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {25},
number = {1},
doi = {10.1093/bioinformatics/btn577},
pmid = {18990722},
pmcid = {PMC2732297},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Gene expression class comparison studies may identify hundreds or thousands of genes as differentially expressed ({DE}) between sample groups. Gaining biological insight from the result of such experiments can be approached, for instance, by identifying the signaling pathways impacted by the observed changes. Most of the existing pathway analysis methods focus on either the number of {DE} genes observed in a given pathway (enrichment analysis methods), or on the correlation between the pathway genes and the class of the samples (functional class scoring methods). Both approaches treat the pathways as simple sets of genes, disregarding the complex gene interactions that these pathways are built to describe. {RESULTS}: We describe a novel signaling pathway impact analysis ({SPIA}) that combines the evidence obtained from the classical enrichment analysis with a novel type of evidence, which measures the actual perturbation on a given pathway under a given condition. A bootstrap procedure is used to assess the significance of the observed total pathway perturbation. Using simulations we show that the evidence derived from perturbations is independent of the pathway enrichment evidence. This allows us to calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values. We illustrate the capabilities of the novel method on four real datasets. The results obtained on these data show that {SPIA} has better specificity and more sensitivity than several widely used pathway analysis methods. {AVAILABILITY}: {SPIA} was implemented as an R package available at http://vortex.cs.wayne.edu/ontoexpress/}
}
@article{kanehisa_2000,
title = {{KEGG}: kyoto encyclopedia of genes and genomes.},
author = {Kanehisa, M and Goto, S},
pages = {27-30},
url = {http://dx.doi.org/10.1093/nar/28.1.27},
year = {2000},
month = {jan},
day = {1},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {28},
number = {1},
doi = {10.1093/nar/28.1.27},
pmid = {10592173},
pmcid = {PMC102409},
f1000-projects = {shared citations},
abstract = {{KEGG} (Kyoto Encyclopedia of Genes and Genomes) is a knowledge base for systematic analysis of gene functions, linking genomic information with higher order functional information. The genomic information is stored in the {GENES} database, which is a collection of gene catalogs for all the completely sequenced genomes and some partial genomes with up-to-date annotation of gene functions. The higher order functional information is stored in the {PATHWAY} database, which contains graphical representations of cellular processes, such as metabolism, membrane transport, signal transduction and cell cycle. The {PATHWAY} database is supplemented by a set of ortholog group tables for the information about conserved subpathways (pathway motifs), which are often encoded by positionally coupled genes on the chromosome and which are especially useful in predicting gene functions. A third database in {KEGG} is {LIGAND} for the information about chemical compounds, enzyme molecules and enzymatic reactions. {KEGG} provides Java graphics tools for browsing genome maps, comparing two genome maps and manipulating expression maps, as well as computational tools for sequence comparison, graph comparison and path computation. The {KEGG} databases are daily updated and made freely available (http://www. genome.ad.jp/kegg/).}
}
@article{xia_2015,
title = {{MetaboAnalyst} 3.0--making metabolomics more meaningful.},
author = {Xia, Jianguo and Sinelnikov, Igor V and Han, Beomsoo and Wishart, David S},
pages = {W251-7},
url = {http://dx.doi.org/10.1093/nar/gkv380},
year = {2015},
month = {jul},
day = {1},
urldate = {2018-01-13},
journal = {Nucleic Acids Research},
volume = {43},
number = {W1},
doi = {10.1093/nar/gkv380},
pmid = {25897128},
pmcid = {PMC4489235},
f1000-projects = {shared citations},
abstract = {{MetaboAnalyst} (www.metaboanalyst.ca) is a web server designed to permit comprehensive metabolomic data analysis, visualization and interpretation. It supports a wide range of complex statistical calculations and high quality graphical rendering functions that require significant computational resources. First introduced in 2009, {MetaboAnalyst} has experienced more than a {50X} growth in user traffic (\textgreater50 000 jobs processed each month). In order to keep up with the rapidly increasing computational demands and a growing number of requests to support translational and systems biology applications, we performed a substantial rewrite and major feature upgrade of the server. The result is {MetaboAnalyst} 3.0. By completely re-implementing the {MetaboAnalyst} suite using the latest web framework technologies, we have been able substantially improve its performance, capacity and user interactivity. Three new modules have also been added including: (i) a module for biomarker analysis based on the calculation of receiver operating characteristic curves; (ii) a module for sample size estimation and power analysis for improved planning of metabolomics studies and (iii) a module to support integrative pathway analysis for both genes and metabolites. In addition, popular features found in existing modules have been significantly enhanced by upgrading the graphical output, expanding the compound libraries and by adding support for more diverse organisms. \copyright The Author(s) 2015. Published by Oxford University Press on behalf of Nucleic Acids Research.}
}
@article{grapov_2015,
title = {{MetaMapR}: pathway independent metabolomic network analysis incorporating unknowns.},
author = {Grapov, Dmitry and Wanichthanarak, Kwanjeera and Fiehn, Oliver},
pages = {2757-2760},
url = {http://dx.doi.org/10.1093/bioinformatics/btv194},
year = {2015},
month = {aug},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {16},
doi = {10.1093/bioinformatics/btv194},
pmid = {25847005},
pmcid = {PMC4528626},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Metabolic network mapping is a widely used approach for integration of metabolomic experimental results with biological domain knowledge. However, current approaches can be limited by biochemical domain or pathway knowledge which results in sparse disconnected graphs for real world metabolomic experiments. {MetaMapR} integrates enzymatic transformations with metabolite structural similarity, mass spectral similarity and empirical associations to generate richly connected metabolic networks. This open source, web-based or desktop software, written in the R programming language, leverages {KEGG} and {PubChem} databases to derive associations between metabolites even in cases where biochemical domain or molecular annotations are unknown. Network calculation is enhanced through an interface to the Chemical Translation System, which allows metabolite identifier translation between \textgreater200 common biochemical databases. Analysis results are presented as interactive visualizations or can be exported as high-quality graphics and numerical tables which can be imported into common network analysis and visualization tools. {AVAILABILITY} {AND} {IMPLEMENTATION}: Freely available at http://dgrapov.github.io/{MetaMapR}/. Requires R and a modern web browser. Installation instructions, tutorials and application examples are available at http://dgrapov.github.io/{MetaMapR}/. {CONTACT}: ofiehn@ucdavis.edu. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{buuren_2011,
title = {mice : Multivariate Imputation by Chained Equations {inR}},
author = {Buuren, Stef van and Groothuis-Oudshoorn, Karin},
url = {http://www.jstatsoft.org/v45/i03/},
year = {2011},
urldate = {2019-07-30},
journal = {Journal of statistical software},
volume = {45},
number = {3},
issn = {1548-7660},
doi = {10.18637/jss.v045.i03},
f1000-projects = {shared citations}
}
@article{cuadrosinostroza_2009,
title = {{TargetSearch}--a Bioconductor package for the efficient preprocessing of {GC}-{MS} metabolite profiling data.},
author = {Cuadros-Inostroza, Alvaro and Caldana, Camila and Redestig, Henning and Kusano, Miyako and Lisec, Jan and Peña-Cortés, Hugo and Willmitzer, Lothar and Hannah, Matthew A},
pages = {428},
url = {http://dx.doi.org/10.1186/1471-2105-10-428},
year = {2009},
month = {dec},
day = {16},
urldate = {2018-01-15},
journal = {{BMC} Bioinformatics},
volume = {10},
doi = {10.1186/1471-2105-10-428},
pmid = {20015393},
pmcid = {PMC3087348},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Metabolite profiling, the simultaneous quantification of multiple metabolites in an experiment, is becoming increasingly popular, particularly with the rise of systems-level biology. The workhorse in this field is gas-chromatography hyphenated with mass spectrometry ({GC}-{MS}). The high-throughput of this technology coupled with a demand for large experiments has led to data pre-processing, i.e. the quantification of metabolites across samples, becoming a major bottleneck. Existing software has several limitations, including restricted maximum sample size, systematic errors and low flexibility. However, the biggest limitation is that the resulting data usually require extensive hand-curation, which is subjective and can typically take several days to weeks. {RESULTS}: We introduce the {TargetSearch} package, an open source tool which is a flexible and accurate method for pre-processing even very large numbers of {GC}-{MS} samples within hours. We developed a novel strategy to iteratively correct and update retention time indices for searching and identifying metabolites. The package is written in the R programming language with computationally intensive functions written in C for speed and performance. The package includes a graphical user interface to allow easy use by those unfamiliar with R. {CONCLUSIONS}: {TargetSearch} allows fast and accurate data pre-processing for {GC}-{MS} experiments and overcomes the sample number limitations and manual curation requirements of existing software. We validate our method by carrying out an analysis against both a set of known chemical standard mixtures and of a biological experiment. In addition we demonstrate its capabilities and speed by comparing it with other {GC}-{MS} pre-processing tools. We believe this package will greatly ease current bottlenecks and facilitate the analysis of metabolic profiling data.}
}
@article{delivera_2012,
title = {Normalizing and integrating metabolomics data.},
author = {De Livera, Alysha M and Dias, Daniel A and De Souza, David and Rupasinghe, Thusitha and Pyke, James and Tull, Dedreia and Roessner, Ute and {McConville}, Malcolm and Speed, Terence P},
pages = {10768-10776},
url = {http://dx.doi.org/10.1021/ac302748b},
year = {2012},
month = {dec},
day = {18},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {84},
number = {24},
doi = {10.1021/ac302748b},
pmid = {23150939},
f1000-projects = {shared citations},
abstract = {Metabolomics research often requires the use of multiple analytical platforms, batches of samples, and laboratories, any of which can introduce a component of unwanted variation. In addition, every experiment is subject to within-platform and other experimental variation, which often includes unwanted biological variation. Such variation must be removed in order to focus on the biological information of interest. We present a broadly applicable method for the removal of unwanted variation arising from various sources for the identification of differentially abundant metabolites and, hence, for the systematic integration of data on the same quantities from different sources. We illustrate the versatility and the performance of the approach in four applications, and we show that it has several advantages over the existing normalization methods.}
}
@article{delivera_2015,
title = {Statistical methods for handling unwanted variation in metabolomics data.},
author = {De Livera, Alysha M and Sysi-Aho, Marko and Jacob, Laurent and Gagnon-Bartsch, Johann A and Castillo, Sandra and Simpson, Julie A and Speed, Terence P},
pages = {3606-3615},
url = {http://dx.doi.org/10.1021/ac502439y},
year = {2015},
month = {apr},
day = {7},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {87},
number = {7},
doi = {10.1021/ac502439y},
pmid = {25692814},
pmcid = {PMC4544854},
f1000-projects = {shared citations},
abstract = {Metabolomics experiments are inevitably subject to a component of unwanted variation, due to factors such as batch effects, long runs of samples, and confounding biological variation. Although the removal of this unwanted variation is a vital step in the analysis of metabolomics data, it is considered a gray area in which there is a recognized need to develop a better understanding of the procedures and statistical methods required to achieve statistically relevant optimal biological outcomes. In this paper, we discuss the causes of unwanted variation in metabolomics experiments, review commonly used metabolomics approaches for handling this unwanted variation, and present a statistical approach for the removal of unwanted variation to obtain normalized metabolomics data. The advantages and performance of the approach relative to several widely used metabolomics normalization approaches are illustrated through two metabolomics studies, and recommendations are provided for choosing and assessing the most suitable normalization method for a given metabolomics experiment. Software for the approach is made freely available.}
}
@article{gu_2013,
title = {{CePa}: an R package for finding significant pathways weighted by multiple network centralities.},
author = {Gu, Zuguang and Wang, Jin},
pages = {658-660},
url = {http://dx.doi.org/10.1093/bioinformatics/btt008},
year = {2013},
month = {mar},
day = {1},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {29},
number = {5},
doi = {10.1093/bioinformatics/btt008},
pmid = {23314125},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: {CePa} is an R package aiming to find significant pathways through network topology information. The package has several advantages compared with current pathway enrichment tools. First, pathway node instead of single gene is taken as the basic unit when analysing networks to meet the fact that genes must be constructed into complexes to hold normal functions. Second, multiple network centralities are applied simultaneously to measure importance of nodes from different aspects to make a full view on the biological system. {CePa} extends standard pathway enrichment methods, which include both over-representation analysis procedure and gene-set analysis procedure. {CePa} has been evaluated with high performance on real-world data, and it can provide more information directly related to current biological problems. {AVAILABILITY}: {CePa} is available at the Comprehensive R Archive Network ({CRAN}): http://cran.r-project.org/web/packages/{CePa}/}
}
@article{hughes_2014,
title = {{MSPrep}--summarization, normalization and diagnostics for processing of mass spectrometry-based metabolomic data.},
author = {Hughes, Grant and Cruickshank-Quinn, Charmion and Reisdorph, Richard and Lutz, Sharon and Petrache, Irina and Reisdorph, Nichole and Bowler, Russell and Kechris, Katerina},
pages = {133-134},
url = {http://dx.doi.org/10.1093/bioinformatics/btt589},
year = {2014},
month = {jan},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {1},
doi = {10.1093/bioinformatics/btt589},
pmid = {24174567},
pmcid = {PMC3866554},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Although R packages exist for the pre-processing of metabolomic data, they currently do not incorporate additional analysis steps of summarization, filtering and normalization of aligned data. We developed the {MSPrep} R package to complement other packages by providing these additional steps, implementing a selection of popular normalization algorithms and generating diagnostics to help guide investigators in their analyses. {AVAILABILITY}: http://www.sourceforge.net/projects/msprep}
}
@article{jauhiainen_2014,
title = {Normalization of metabolomics data with applications to correlation maps.},
author = {Jauhiainen, Alexandra and Madhu, Basetti and Narita, Masako and Narita, Masashi and Griffiths, John and Tavaré, Simon},
pages = {2155-2161},
url = {http://dx.doi.org/10.1093/bioinformatics/btu175},
year = {2014},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {15},
doi = {10.1093/bioinformatics/btu175},
pmid = {24711654},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: In metabolomics, the goal is to identify and measure the concentrations of different metabolites (small molecules) in a cell or a biological system. The metabolites form an important layer in the complex metabolic network, and the interactions between different metabolites are often of interest. It is crucial to perform proper normalization of metabolomics data, but current methods may not be applicable when estimating interactions in the form of correlations between metabolites. We propose a normalization approach based on a mixed model, with simultaneous estimation of a correlation matrix. We also investigate how the common use of a calibration standard in nuclear magnetic resonance ({NMR}) experiments affects the estimation of correlations. {RESULTS}: We show with both real and simulated data that our proposed normalization method is robust and has good performance when discovering true correlations between metabolites. The standardization of {NMR} data is shown in simulation studies to affect our ability to discover true correlations to a small extent. However, comparing standardized and non-standardized real data does not result in any large differences in correlation estimates. {AVAILABILITY} {AND} {IMPLEMENTATION}: Source code is freely available at https://sourceforge.net/projects/metabnorm/ {CONTACT}: alexandra.jauhiainen@ki.se {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online. \copyright The Author 2014. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{kramer_2013,
title = {{rBiopaxParser}--an R package to parse, modify and visualize {BioPAX} data.},
author = {Kramer, Frank and Bayerlová, Michaela and Klemm, Florian and Bleckmann, Annalen and Beissbarth, Tim},
pages = {520-522},
url = {http://dx.doi.org/10.1093/bioinformatics/bts710},
year = {2013},
month = {feb},
day = {15},
urldate = {2019-05-08},
journal = {Bioinformatics},
volume = {29},
number = {4},
doi = {10.1093/bioinformatics/bts710},
pmid = {23274212},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Biological pathway data, stored in structured databases, is a useful source of knowledge for a wide range of bioinformatics algorithms and tools. The Biological Pathway Exchange ({BioPAX}) language has been established as a standard to store and annotate pathway information. However, use of these data within statistical analyses can be tedious. On the other hand, the statistical computing environment R has become the standard for bioinformatics analysis of large-scale genomics data. With this package, we hope to enable R users to work with {BioPAX} data and make use of the always increasing amount of biological pathway knowledge within data analysis methods. {RESULTS}: {rBiopaxParser} is a software package that provides a comprehensive set of functions for parsing, viewing and modifying {BioPAX} pathway data within R. These functions enable the user to access and modify specific parts of the {BioPAX} model. Furthermore, it allows to generate and layout regulatory graphs of controlling interactions and to visualize {BioPAX} pathways. {AVAILABILITY}: {rBiopaxParser} is an open-source R package and has been submitted to Bioconductor.}
}
@article{nyamundanda_2013,
title = {{MetSizeR}: selecting the optimal sample size for metabolomic studies using an analysis based approach.},
author = {Nyamundanda, Gift and Gormley, Isobel Claire and Fan, Yue and Gallagher, William M and Brennan, Lorraine},
pages = {338},
url = {http://dx.doi.org/10.1186/1471-2105-14-338},
year = {2013},
month = {nov},
day = {21},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {14},
doi = {10.1186/1471-2105-14-338},
pmid = {24261687},
pmcid = {PMC4222287},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Determining sample sizes for metabolomic experiments is important but due to the complexity of these experiments, there are currently no standard methods for sample size estimation in metabolomics. Since pilot studies are rarely done in metabolomics, currently existing sample size estimation approaches which rely on pilot data can not be applied. {RESULTS}: In this article, an analysis based approach called {MetSizeR} is developed to estimate sample size for metabolomic experiments even when experimental pilot data are not available. The key motivation for {MetSizeR} is that it considers the type of analysis the researcher intends to use for data analysis when estimating sample size. {MetSizeR} uses information about the data analysis technique and prior expert knowledge of the metabolomic experiment to simulate pilot data from a statistical model. Permutation based techniques are then applied to the simulated pilot data to estimate the required sample size. {CONCLUSIONS}: The {MetSizeR} methodology, and a publicly available software package which implements the approach, are illustrated through real metabolomic applications. Sample size estimates, informed by the intended statistical analysis technique, and the associated uncertainty are provided.}
}
@article{redestig_2009,
title = {Compensation for systematic cross-contribution improves normalization of mass spectrometry based metabolomics data.},
author = {Redestig, Henning and Fukushima, Atsushi and Stenlund, Hans and Moritz, Thomas and Arita, Masanori and Saito, Kazuki and Kusano, Miyako},
pages = {7974-7980},
url = {http://dx.doi.org/10.1021/ac901143w},
year = {2009},
month = {oct},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {81},
number = {19},
doi = {10.1021/ac901143w},
pmid = {19743813},
f1000-projects = {shared citations},
abstract = {Most mass spectrometry based metabolomics studies are semiquantitative and depend on efficient normalization techniques to suppress systematic error. A common approach is to include isotope-labeled internal standards ({ISs}) and then express the estimated metabolite abundances relative to the {IS}. Because of problems such as insufficient chromatographic resolution, however, the analytes may directly influence estimates of the {IS}, a phenomenon known as cross-contribution ({CC}). Normalization using {ISs} that suffer from {CC} effects will cause significant loss of information if the interfering analytes are associated with the studied factors. We present a novel normalization algorithm, which compensates for systematic {CC} effects that can be traced back to a linear association with the experimental design. The proposed method was found to be superior at purifying the signal of interest compared to current normalization methods when applied to two biological data sets and a multicomponent dilution mixture. Our method is applicable to data from randomized and designed experiments that use {ISs} to monitor the systematic error.}
}
@article{stacklies_2007,
title = {{pcaMethods}--a bioconductor package providing {PCA} methods for incomplete data.},
author = {Stacklies, Wolfram and Redestig, Henning and Scholz, Matthias and Walther, Dirk and Selbig, Joachim},
pages = {1164-1167},
url = {http://dx.doi.org/10.1093/bioinformatics/btm069},
year = {2007},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {23},
number = {9},
doi = {10.1093/bioinformatics/btm069},
pmid = {17344241},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: {pcaMethods} is a Bioconductor compliant library for computing principal component analysis ({PCA}) on incomplete data sets. The results can be analyzed directly or used to estimate missing values to enable the use of missing value sensitive statistical methods. The package was mainly developed with microarray and metabolite data sets in mind, but can be applied to any other incomplete data set as well. {AVAILABILITY}: http://www.bioconductor.org}
}
@article{meng_2014,
title = {A multivariate approach to the integration of multi-omics datasets.},
author = {Meng, Chen and Kuster, Bernhard and Culhane, Aedín C and Gholami, Amin Moghaddas},
pages = {162},
url = {http://dx.doi.org/10.1186/1471-2105-15-162},
year = {2014},
month = {may},
day = {29},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {15},
doi = {10.1186/1471-2105-15-162},
pmid = {24884486},
pmcid = {PMC4053266},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: To leverage the potential of multi-omics studies, exploratory data analysis methods that provide systematic integration and comparison of multiple layers of omics information are required. We describe multiple co-inertia analysis ({MCIA}), an exploratory data analysis method that identifies co-relationships between multiple high dimensional datasets. Based on a covariance optimization criterion, {MCIA} simultaneously projects several datasets into the same dimensional space, transforming diverse sets of features onto the same scale, to extract the most variant from each dataset and facilitate biological interpretation and pathway analysis. {RESULTS}: We demonstrate integration of multiple layers of information using {MCIA}, applied to two typical "omics" research scenarios. The integration of transcriptome and proteome profiles of cells in the {NCI}-60 cancer cell line panel revealed distinct, complementary features, which together increased the coverage and power of pathway analysis. Our analysis highlighted the importance of the leukemia extravasation signaling pathway in leukemia that was not highly ranked in the analysis of any individual dataset. Secondly, we compared transcriptome profiles of high grade serous ovarian tumors that were obtained, on two different microarray platforms and next generation {RNA}-sequencing, to identify the most informative platform and extract robust biomarkers of molecular subtypes. We discovered that the variance of {RNA}-sequencing data processed using {RPKM} had greater variance than that with {MapSplice} and {RSEM}. We provided novel markers highly associated to tumor molecular subtype combined from four data platforms. {MCIA} is implemented and available in the R/Bioconductor "omicade4" package. {CONCLUSION}: We believe {MCIA} is an attractive method for data integration and visualization of several datasets of multi-omics features observed on the same set of individuals. The method is not dependent on feature annotation, and thus it can extract important features even when there are not present across all datasets. {MCIA} provides simple graphical representations for the identification of relationships between large datasets.}
}
@article{yamamoto_2014,
title = {Statistical hypothesis testing of factor loading in principal component analysis and its application to metabolite set enrichment analysis.},
author = {Yamamoto, Hiroyuki and Fujimori, Tamaki and Sato, Hajime and Ishikawa, Gen and Kami, Kenjiro and Ohashi, Yoshiaki},
pages = {51},
url = {http://dx.doi.org/10.1186/1471-2105-15-51},
year = {2014},
month = {feb},
day = {21},
urldate = {2018-01-13},
journal = {{BMC} Bioinformatics},
volume = {15},
doi = {10.1186/1471-2105-15-51},
pmid = {24555693},
pmcid = {PMC4015128},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Principal component analysis ({PCA}) has been widely used to visualize high-dimensional metabolomic data in a two- or three-dimensional subspace. In metabolomics, some metabolites (e.g., the top 10 metabolites) have been subjectively selected when using factor loading in {PCA}, and biological inferences are made for these metabolites. However, this approach may lead to biased biological inferences because these metabolites are not objectively selected with statistical criteria. {RESULTS}: We propose a statistical procedure that selects metabolites with statistical hypothesis testing of the factor loading in {PCA} and makes biological inferences about these significant metabolites with a metabolite set enrichment analysis ({MSEA}). This procedure depends on the fact that the eigenvector in {PCA} for autoscaled data is proportional to the correlation coefficient between the {PC} score and each metabolite level. We applied this approach to two sets of metabolomic data from mouse liver samples: 136 of 282 metabolites in the first case study and 66 of 275 metabolites in the second case study were statistically significant. This result suggests that to set the number of metabolites before the analysis is inappropriate because the number of significant metabolites differs in each study when factor loading is used in {PCA}. Moreover, when an {MSEA} of these significant metabolites was performed, significant metabolic pathways were detected, which were acceptable in terms of previous biological knowledge. {CONCLUSIONS}: It is essential to select metabolites statistically to make unbiased biological inferences from metabolomic data when using factor loading in {PCA}. We propose a statistical procedure to select metabolites with statistical hypothesis testing of the factor loading in {PCA}, and to draw biological inferences about these significant metabolites with {MSEA}. We have developed an R package "mseapca" to facilitate this approach. The "mseapca" package is publicly available at the {CRAN} website.}
}
@article{silva_2014,
title = {{ProbMetab}: an R package for Bayesian probabilistic annotation of {LC}-{MS}-based metabolomics.},
author = {Silva, Ricardo R and Jourdan, Fabien and Salvanha, Diego M and Letisse, Fabien and Jamin, Emilien L and Guidetti-Gonzalez, Simone and Labate, Carlos A and Vêncio, Ricardo Z N},
pages = {1336-1337},
url = {http://dx.doi.org/10.1093/bioinformatics/btu019},
year = {2014},
month = {may},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {30},
number = {9},
doi = {10.1093/bioinformatics/btu019},
pmid = {24443383},
pmcid = {PMC3998140},
f1000-projects = {shared citations},
abstract = {We present {ProbMetab}, an R package that promotes substantial improvement in automatic probabilistic liquid chromatography-mass spectrometry-based metabolome annotation. The inference engine core is based on a Bayesian model implemented to (i) allow diverse source of experimental data and metadata to be systematically incorporated into the model with alternative ways to calculate the likelihood function and (ii) allow sensitive selection of biologically meaningful biochemical reaction databases as Dirichlet-categorical prior distribution. Additionally, to ensure result interpretation by system biologists, we display the annotation in a network where observed mass peaks are connected if their candidate metabolites are substrate/product of known biochemical reactions. This graph can be overlaid with other graph-based analysis, such as partial correlation networks, in a visualization scheme exported to Cytoscape, with web and stand-alone versions.}
}
@article{luo_2013,
title = {Pathview: an R/Bioconductor package for pathway-based data integration and visualization.},
author = {Luo, Weijun and Brouwer, Cory},
pages = {1830-1831},
url = {http://dx.doi.org/10.1093/bioinformatics/btt285},
year = {2013},
month = {jul},
day = {15},
urldate = {2017-04-19},
journal = {Bioinformatics},
volume = {29},
number = {14},
doi = {10.1093/bioinformatics/btt285},
pmid = {23740750},
pmcid = {PMC3702256},
f1000-projects = {shared citations},
abstract = {{SUMMARY}: Pathview is a novel tool set for pathway-based data integration and visualization. It maps and renders user data on relevant pathway graphs. Users only need to supply their data and specify the target pathway. Pathview automatically downloads the pathway graph data, parses the data file, maps and integrates user data onto the pathway and renders pathway graphs with the mapped data. Although built as a stand-alone program, Pathview may seamlessly integrate with pathway and functional analysis tools for large-scale and fully automated analysis pipelines. {AVAILABILITY}: The package is freely available under the {GPLv3} license through Bioconductor and R-Forge. It is available at http://bioconductor.org/packages/release/bioc/html/pathview.html and at http://Pathview.r-forge.r-project.org/. {CONTACT}: luo\_weijun@yahoo.com {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{tsugawa_2015,
title = {{MS}-{DIAL}: data-independent {MS}/{MS} deconvolution for comprehensive metabolome analysis.},
author = {Tsugawa, Hiroshi and Cajka, Tomas and Kind, Tobias and Ma, Yan and Higgins, Brendan and Ikeda, Kazutaka and Kanazawa, Mitsuhiro and {VanderGheynst}, Jean and Fiehn, Oliver and Arita, Masanori},
pages = {523-526},
url = {http://dx.doi.org/10.1038/nmeth.3393},
year = {2015},
month = {jun},
urldate = {2019-08-10},
journal = {Nature Methods},
volume = {12},
number = {6},
doi = {10.1038/nmeth.3393},
pmid = {25938372},
pmcid = {PMC4449330},
f1000-projects = {shared citations},
abstract = {Data-independent acquisition ({DIA}) in liquid chromatography ({LC}) coupled to tandem mass spectrometry ({MS}/{MS}) provides comprehensive untargeted acquisition of molecular data. We provide an open-source software pipeline, which we call {MS}-{DIAL}, for {DIA}-based identification and quantification of small molecules by mass spectral deconvolution. For a reversed-phase {LC}-{MS}/{MS} analysis of nine algal strains, {MS}-{DIAL} using an enriched {LipidBlast} library identified 1,023 lipid compounds, highlighting the chemotaxonomic relationships between the algal strains.}
}
@book{james_2013,
title = {An Introduction to Statistical Learning},
author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
series = {Springer Texts in Statistics},
publisher = {Springer New York},
url = {http://link.springer.com/10.1007/978-1-4614-7138-7},
year = {2013},
urldate = {2018-05-22},
volume = {103},
isbn = {978-1-4614-7137-0},
issn = {1431-{875X}},
doi = {10.1007/978-1-4614-7138-7},
address = {New York, {NY}},
f1000-projects = {shared citations}
}
@article{wishart_2013,
title = {{HMDB} 3.0--The Human Metabolome Database in 2013.},
author = {Wishart, David S and Jewison, Timothy and Guo, An Chi and Wilson, Michael and Knox, Craig and Liu, Yifeng and Djoumbou, Yannick and Mandal, Rupasri and Aziat, Farid and Dong, Edison and Bouatra, Souhaila and Sinelnikov, Igor and Arndt, David and Xia, Jianguo and Liu, Philip and Yallou, Faizath and Bjorndahl, Trent and Perez-Pineiro, Rolando and Eisner, Roman and Allen, Felicity and Neveu, Vanessa and Greiner, Russ and Scalbert, Augustin},
pages = {D801-7},
url = {http://dx.doi.org/10.1093/nar/gks1065},
year = {2013},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {41},
number = {Database issue},
doi = {10.1093/nar/gks1065},
pmid = {23161693},
pmcid = {PMC3531200},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}) (www.hmdb.ca) is a resource dedicated to providing scientists with the most current and comprehensive coverage of the human metabolome. Since its first release in 2007, the {HMDB} has been used to facilitate research for nearly 1000 published studies in metabolomics, clinical biochemistry and systems biology. The most recent release of {HMDB} (version 3.0) has been significantly expanded and enhanced over the 2009 release (version 2.0). In particular, the number of annotated metabolite entries has grown from 6500 to more than 40,000 (a 600\% increase). This enormous expansion is a result of the inclusion of both 'detected' metabolites (those with measured concentrations or experimental confirmation of their existence) and 'expected' metabolites (those for which biochemical pathways are known or human intake/exposure is frequent but the compound has yet to be detected in the body). The latest release also has greatly increased the number of metabolites with biofluid or tissue concentration data, the number of compounds with reference spectra and the number of data fields per entry. In addition to this expansion in data quantity, new database visualization tools and new data content have been added or enhanced. These include better spectral viewing tools, more powerful chemical substructure searches, an improved chemical taxonomy and better, more interactive pathway maps. This article describes these enhancements to the {HMDB}, which was previously featured in the 2009 {NAR} Database Issue. (Note to referees, {HMDB} 3.0 will go live on 18 September 2012.).}
}
@article{ernest_2012,
title = {{MetabR}: an R script for linear model analysis of quantitative metabolomic data.},
author = {Ernest, Ben and Gooding, Jessica R and Campagna, Shawn R and Saxton, Arnold M and Voy, Brynn H},
pages = {596},
url = {http://dx.doi.org/10.1186/1756-0500-5-596},
year = {2012},
month = {oct},
day = {30},
urldate = {2018-01-13},
journal = {{BMC} Research Notes},
volume = {5},
doi = {10.1186/1756-0500-5-596},
pmid = {23111096},
pmcid = {PMC3532230},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Metabolomics is an emerging high-throughput approach to systems biology, but data analysis tools are lacking compared to other systems level disciplines such as transcriptomics and proteomics. Metabolomic data analysis requires a normalization step to remove systematic effects of confounding variables on metabolite measurements. Current tools may not correctly normalize every metabolite when the relationships between each metabolite quantity and fixed-effect confounding variables are different, or for the effects of random-effect confounding variables. Linear mixed models, an established methodology in the microarray literature, offer a standardized and flexible approach for removing the effects of fixed- and random-effect confounding variables from metabolomic data. {FINDINGS}: Here we present a simple menu-driven program, "{MetabR}", designed to aid researchers with no programming background in statistical analysis of metabolomic data. Written in the open-source statistical programming language R, {MetabR} implements linear mixed models to normalize metabolomic data and analysis of variance ({ANOVA}) to test treatment differences. {MetabR} exports normalized data, checks statistical model assumptions, identifies differentially abundant metabolites, and produces output files to help with data interpretation. Example data are provided to illustrate normalization for common confounding variables and to demonstrate the utility of the {MetabR} program. {CONCLUSIONS}: We developed {MetabR} as a simple and user-friendly tool for implementing linear mixed model-based normalization and statistical analysis of targeted metabolomic data, which helps to fill a lack of available data analysis tools in this field. The program, user guide, example data, and any future news or updates related to the program may be found at http://metabr.r-forge.r-project.org/.}
}
@article{lcao_2009,
title = {{integrOmics}: an R package to unravel relationships between two omics datasets.},
author = {Lê Cao, Kim-Anh and González, Ignacio and Déjean, Sébastien},
pages = {2855-2856},
url = {http://dx.doi.org/10.1093/bioinformatics/btp515},
year = {2009},
month = {nov},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {25},
number = {21},
doi = {10.1093/bioinformatics/btp515},
pmid = {19706745},
pmcid = {PMC2781751},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: With the availability of many 'omics' data, such as transcriptomics, proteomics or metabolomics, the integrative or joint analysis of multiple datasets from different technology platforms is becoming crucial to unravel the relationships between different biological functional levels. However, the development of such an analysis is a major computational and technical challenge as most approaches suffer from high data dimensionality. New methodologies need to be developed and validated. {RESULTS}: {integrOmics} efficiently performs integrative analyses of two types of 'omics' variables that are measured on the same samples. It includes a regularized version of canonical correlation analysis to enlighten correlations between two datasets, and a sparse version of partial least squares ({PLS}) regression that includes simultaneous variable selection in both datasets. The usefulness of both approaches has been demonstrated previously and successfully applied in various integrative studies. {AVAILABILITY}: {integrOmics} is freely available from http://{CRAN}.R-project.org/ or from the web site companion (http://math.univ-toulouse.fr/biostat) that provides full documentation and tutorials. {CONTACT}: k.lecao@uq.edu.au {SUPPLEMENTARY} {INFORMATION}: Supplementary data are available at Bioinformatics online.}
}
@article{luna_2016,
title = {{PaxtoolsR}: pathway analysis in R using Pathway Commons.},
author = {Luna, Augustin and Babur, Özgün and Aksoy, Bülent Arman and Demir, Emek and Sander, Chris},
pages = {1262-1264},
url = {http://dx.doi.org/10.1093/bioinformatics/btv733},
year = {2016},
month = {apr},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {32},
number = {8},
doi = {10.1093/bioinformatics/btv733},
pmid = {26685306},
pmcid = {PMC4824129},
f1000-projects = {shared citations},
abstract = {{PURPOSE}: {PaxtoolsR} package enables access to pathway data represented in the {BioPAX} format and made available through the Pathway Commons webservice for users of the R language to aid in advanced pathway analyses. Features include the extraction, merging and validation of pathway data represented in the {BioPAX} format. This package also provides novel pathway datasets and advanced querying features for R users through the Pathway Commons webservice allowing users to query, extract and retrieve data and integrate these data with local {BioPAX} datasets. {AVAILABILITY} {AND} {IMPLEMENTATION}: The {PaxtoolsR} package is compatible with versions of R 3.1.1 (and higher) on Windows, Mac {OS} X and Linux using Bioconductor 3.0 and is available through the Bioconductor R package repository along with source code and a tutorial vignette describing common tasks, such as data visualization and gene set enrichment analysis. Source code and documentation are at http://www.bioconductor.org/packages/paxtoolsr This plugin is free, open-source and licensed under the {LGPL}-3. {CONTACT}: paxtools@cbio.mskcc.org or lunaa@cbio.mskcc.org. \copyright The Author 2015. Published by Oxford University Press.}
}
@article{enot_2008,
title = {Preprocessing, classification modeling and feature selection using flow injection electrospray mass spectrometry metabolite fingerprint data.},
author = {Enot, David P and Lin, Wanchang and Beckmann, Manfred and Parker, David and Overy, David P and Draper, John},
pages = {446-470},
url = {http://dx.doi.org/10.1038/nprot.2007.511},
year = {2008},
urldate = {2019-09-06},
journal = {Nature Protocols},
volume = {3},
number = {3},
doi = {10.1038/nprot.2007.511},
pmid = {18323816},
f1000-projects = {shared citations},
abstract = {Metabolome analysis by flow injection electrospray mass spectrometry ({FIE}-{MS}) fingerprinting generates measurements relating to large numbers of m/z signals. Such data sets often exhibit high variance with a paucity of replicates, thus providing a challenge for data mining. We describe data preprocessing and modeling methods that have proved reliable in projects involving samples from a range of organisms. The protocols interact with software resources specifically for metabolomics provided in a Web-accessible data analysis package {FIEmspro} (http://users.aber.ac.uk/jhd) written in the R environment and requiring a moderate knowledge of R command-line usage. Specific emphasis is placed on describing the outcome of modeling experiments using {FIE}-{MS} data that require further preprocessing to improve quality. The salient features of both poor and robust (i.e., highly generalizable) multivariate models are outlined together with advice on validating classifiers and avoiding false discovery when seeking explanatory variables.}
}
@article{schaefer_2009,
title = {{PID}: the pathway interaction database.},
author = {Schaefer, Carl F and Anthony, Kira and Krupa, Shiva and Buchoff, Jeffrey and Day, Matthew and Hannay, Timo and Buetow, Kenneth H},
pages = {D674-9},
url = {http://dx.doi.org/10.1093/nar/gkn653},
year = {2009},
month = {jan},
urldate = {2019-05-03},
journal = {Nucleic Acids Research},
volume = {37},
number = {Database issue},
doi = {10.1093/nar/gkn653},
pmid = {18832364},
pmcid = {PMC2686461},
f1000-projects = {shared citations},
abstract = {The Pathway Interaction Database ({PID}, http://pid.nci.nih.gov) is a freely available collection of curated and peer-reviewed pathways composed of human molecular signaling and regulatory events and key cellular processes. Created in a collaboration between the {US} National Cancer Institute and Nature Publishing Group, the database serves as a research tool for the cancer research community and others interested in cellular pathways, such as neuroscientists, developmental biologists and immunologists. {PID} offers a range of search features to facilitate pathway exploration. Users can browse the predefined set of pathways or create interaction network maps centered on a single molecule or cellular process of interest. In addition, the batch query tool allows users to upload long list(s) of molecules, such as those derived from microarray experiments, and either overlay these molecules onto predefined pathways or visualize the complete molecular connectivity map. Users can also download molecule lists, citation lists and complete database content in extensible markup language ({XML}) and Biological Pathways Exchange ({BioPAX}) Level 2 format. The database is updated with new pathway content every month and supplemented by specially commissioned articles on the practical uses of other relevant online tools.}
}
@article{wachter_2015,
title = {{pwOmics}: an R package for pathway-based integration of time-series omics data using public database knowledge.},
author = {Wachter, Astrid and Bei\ssbarth, Tim},
pages = {3072-3074},
url = {http://dx.doi.org/10.1093/bioinformatics/btv323},
year = {2015},
month = {sep},
day = {15},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {31},
number = {18},
doi = {10.1093/bioinformatics/btv323},
pmid = {26002883},
f1000-projects = {shared citations},
abstract = {{UNLABELLED}: Characterization of biological processes is progressively enabled with the increased generation of omics data on different signaling levels. Here we present a straightforward approach for the integrative analysis of data from different high-throughput technologies based on pathway and interaction models from public databases. {pwOmics} performs pathway-based level-specific data comparison of coupled human proteomic and genomic/transcriptomic datasets based on their log fold changes. Separate downstream and upstream analyses results on the functional levels of pathways, transcription factors and genes/transcripts are performed in the cross-platform consensus analysis. These provide a basis for the combined interpretation of regulatory effects over time. Via network reconstruction and inference methods (Steiner tree, dynamic Bayesian network inference) consensus graphical networks can be generated for further analyses and visualization. {AVAILABILITY} {AND} {IMPLEMENTATION}: The R package {pwOmics} is freely available on Bioconductor (http://www.bioconductor.org/). {CONTACT}: astrid.wachter@med.uni-goettingen.de. \copyright The Author 2015. Published by Oxford University Press. All rights reserved. For Permissions, please e-mail: journals.permissions@oup.com.}
}
@article{saghatelian_2004,
title = {Assignment of endogenous substrates to enzymes by global metabolite profiling.},
author = {Saghatelian, Alan and Trauger, Sunia A and Want, Elizabeth J and Hawkins, Edward G and Siuzdak, Gary and Cravatt, Benjamin F},
pages = {14332-14339},
url = {http://dx.doi.org/10.1021/bi0480335},
year = {2004},
month = {nov},
day = {16},
urldate = {2018-01-13},
journal = {Biochemistry},
volume = {43},
number = {45},
issn = {0006-2960},
doi = {10.1021/bi0480335},
pmid = {15533037},
f1000-projects = {shared citations},
abstract = {Enzymes regulate biological processes through the conversion of specific substrates to products. Therefore, of fundamental interest for every enzyme is the elucidation of its natural substrates. Here, we describe a general strategy for identifying endogenous substrates of enzymes by untargeted liquid chromatography-mass spectrometry ({LC}-{MS}) analysis of tissue metabolomes from wild-type and enzyme-inactivated organisms. We use this method to discover several brain lipids regulated by the mammalian enzyme fatty acid amide hydrolase ({FAAH}) in vivo, including known signaling molecules (e.g., the endogenous cannabinoid anandamide) and a novel family of nervous system-enriched natural products, the taurine-conjugated fatty acids. Remarkably, the relative hydrolytic activity that {FAAH} exhibited for lipid metabolites in vitro was not predictive of the identity of specific {FAAH} substrates in vivo. Thus, global metabolite profiling establishes unanticipated connections between the proteome and metabolome that enable assignment of an enzyme's unique biochemical functions in vivo.}
}
@article{lim_2010,
title = {{T3DB}: a comprehensively annotated database of common toxins and their targets.},
author = {Lim, Emilia and Pon, Allison and Djoumbou, Yannick and Knox, Craig and Shrivastava, Savita and Guo, An Chi and Neveu, Vanessa and Wishart, David S},
pages = {D781-6},
url = {http://dx.doi.org/10.1093/nar/gkp934},
year = {2010},
month = {jan},
urldate = {2019-07-23},
journal = {Nucleic Acids Research},
volume = {38},
number = {Database issue},
doi = {10.1093/nar/gkp934},
pmid = {19897546},
pmcid = {PMC2808899},
f1000-projects = {shared citations},
abstract = {In an effort to capture meaningful biological, chemical and mechanistic information about clinically relevant, commonly encountered or important toxins, we have developed the Toxin and Toxin-Target Database ({T3DB}). The {T3DB} is a unique bioinformatics resource that compiles comprehensive information about common or ubiquitous toxins and their toxin-targets into a single electronic repository. The database currently contains over 2900 small molecule and peptide toxins, 1300 toxin-targets and more than 33,000 toxin-target associations. Each {T3DB} record ({ToxCard}) contains over 80 data fields providing detailed information on chemical properties and descriptors, toxicity values, protein and gene sequences (for both targets and toxins), molecular and cellular interaction data, toxicological data, mechanistic information and references. This information has been manually extracted and manually verified from numerous sources, including other electronic databases, government documents, textbooks and scientific journals. A key focus of the {T3DB} is on providing 'depth' over 'breadth' with detailed descriptions, mechanisms of action, and information on toxins and toxin-targets. {T3DB} is fully searchable and supports extensive text, sequence, chemical structure and relational query searches, similar to those found in the Human Metabolome Database ({HMDB}) and {DrugBank}. Potential applications of the {T3DB} include clinical metabolomics, toxin target prediction, toxicity prediction and toxicology education. The {T3DB} is available online at http://www.t3db.org.}
}
@article{smith_2006,
title = {{XCMS}: processing mass spectrometry data for metabolite profiling using nonlinear peak alignment, matching, and identification.},
author = {Smith, Colin A and Want, Elizabeth J and O'Maille, Grace and Abagyan, Ruben and Siuzdak, Gary},
pages = {779-787},
url = {http://dx.doi.org/10.1021/ac051437y},
year = {2006},
month = {feb},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {78},
number = {3},
doi = {10.1021/ac051437y},
pmid = {16448051},
f1000-projects = {shared citations},
abstract = {Metabolite profiling in biomarker discovery, enzyme substrate assignment, drug activity/specificity determination, and basic metabolic research requires new data preprocessing approaches to correlate specific metabolites to their biological origin. Here we introduce an {LC}/{MS}-based data analysis approach, {XCMS}, which incorporates novel nonlinear retention time alignment, matched filtration, peak detection, and peak matching. Without using internal standards, the method dynamically identifies hundreds of endogenous metabolites for use as standards, calculating a nonlinear retention time correction profile for each sample. Following retention time correction, the relative metabolite ion intensities are directly compared to identify changes in specific endogenous metabolites, such as potential biomarkers. The software is demonstrated using data sets from a previously reported enzyme knockout study and a large-scale study of plasma samples. {XCMS} is freely available under an open-source license at http://metlin.scripps.edu/download/.}
}
@article{scheltema_2011,
title = {{PeakML}/{mzMatch}: a file format, Java library, R library, and tool-chain for mass spectrometry data analysis.},
author = {Scheltema, Richard A and Jankevics, Andris and Jansen, Ritsert C and Swertz, Morris A and Breitling, Rainer},
pages = {2786-2793},
url = {http://dx.doi.org/10.1021/ac2000994},
year = {2011},
month = {apr},
day = {1},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {83},
number = {7},
doi = {10.1021/ac2000994},
pmid = {21401061},
f1000-projects = {shared citations},
abstract = {The recent proliferation of high-resolution mass spectrometers has generated a wealth of new data analysis methods. However, flexible integration of these methods into configurations best suited to the research question is hampered by heterogeneous file formats and monolithic software development. The {mzXML}, {mzData}, and {mzML} file formats have enabled uniform access to unprocessed raw data. In this paper we present our efforts to produce an equally simple and powerful format, {PeakML}, to uniformly exchange processed intermediary and result data. To demonstrate the versatility of {PeakML}, we have developed an open source Java toolkit for processing, filtering, and annotating mass spectra in a customizable pipeline ({mzMatch}), as well as a user-friendly data visualization environment ({PeakML} Viewer). The {PeakML} format in particular enables the flexible exchange of processed data between software created by different groups or companies, as we illustrate by providing a {PeakML}-based integration of the widely used {XCMS} package with {mzMatch} data processing tools. As an added advantage, downstream analysis can benefit from direct access to the full mass trace information underlying summarized mass spectrometry results, providing the user with the means to rapidly verify results. The {PeakML}/{mzMatch} software is freely available at http://mzmatch.sourceforge.net, with documentation, tutorials, and a community forum.}
}
@article{chambers_2012,
title = {A cross-platform toolkit for mass spectrometry and proteomics.},
author = {Chambers, Matthew C and Maclean, Brendan and Burke, Robert and Amodei, Dario and Ruderman, Daniel L and Neumann, Steffen and Gatto, Laurent and Fischer, Bernd and Pratt, Brian and Egertson, Jarrett and Hoff, Katherine and Kessner, Darren and Tasman, Natalie and Shulman, Nicholas and Frewen, Barbara and Baker, Tahmina A and Brusniak, Mi-Youn and Paulse, Christopher and Creasy, David and Flashner, Lisa and Kani, Kian and Moulding, Chris and Seymour, Sean L and Nuwaysir, Lydia M and Lefebvre, Brent and Kuhlmann, Frank and Roark, Joe and Rainer, Paape and Detlev, Suckau and Hemenway, Tina and Huhmer, Andreas and Langridge, James and Connolly, Brian and Chadick, Trey and Holly, Krisztina and Eckels, Josh and Deutsch, Eric W and Moritz, Robert L and Katz, Jonathan E and Agus, David B and {MacCoss}, Michael and Tabb, David L and Mallick, Parag},
pages = {918-920},
url = {http://www.nature.com/doifinder/10.1038/nbt.2377},
year = {2012},
month = {oct},
urldate = {2018-01-13},
journal = {Nature Biotechnology},
volume = {30},
number = {10},
issn = {1087-0156},
doi = {10.1038/nbt.2377},
pmid = {23051804},
pmcid = {PMC3471674},
f1000-projects = {shared citations}
}
@article{kuhl_2012,
title = {{CAMERA}: an integrated strategy for compound spectra extraction and annotation of liquid chromatography/mass spectrometry data sets.},
author = {Kuhl, Carsten and Tautenhahn, Ralf and Böttcher, Christoph and Larson, Tony R and Neumann, Steffen},
pages = {283-289},
url = {http://dx.doi.org/10.1021/ac202450g},
year = {2012},
month = {jan},
day = {3},
urldate = {2018-01-13},
journal = {Analytical Chemistry},
volume = {84},
number = {1},
doi = {10.1021/ac202450g},
pmid = {22111785},
pmcid = {PMC3658281},
f1000-projects = {shared citations},
abstract = {Liquid chromatography coupled to mass spectrometry is routinely used for metabolomics experiments. In contrast to the fairly routine and automated data acquisition steps, subsequent compound annotation and identification require extensive manual analysis and thus form a major bottleneck in data interpretation. Here we present {CAMERA}, a Bioconductor package integrating algorithms to extract compound spectra, annotate isotope and adduct peaks, and propose the accurate compound mass even in highly complex data. To evaluate the algorithms, we compared the annotation of {CAMERA} against a manually defined annotation for a mixture of known compounds spiked into a complex matrix at different concentrations. {CAMERA} successfully extracted accurate masses for 89.7\% and 90.3\% of the annotatable compounds in positive and negative ion modes, respectively. Furthermore, we present a novel annotation approach that combines spectral information of data acquired in opposite ion modes to further improve the annotation rate. We demonstrate the utility of {CAMERA} in two different, easily adoptable plant metabolomics experiments, where the application of {CAMERA} drastically reduced the amount of manual analysis. \copyright 2011 American Chemical Society}
}
@article{wilkinson_2016,
title = {The {FAIR} Guiding Principles for scientific data management and stewardship.},
author = {Wilkinson, Mark D and Dumontier, Michel and Aalbersberg, I Jsbrand Jan and Appleton, Gabrielle and Axton, Myles and Baak, Arie and Blomberg, Niklas and Boiten, Jan-Willem and da Silva Santos, Luiz Bonino and Bourne, Philip E and Bouwman, Jildau and Brookes, Anthony J and Clark, Tim and Crosas, Mercè and Dillo, Ingrid and Dumon, Olivier and Edmunds, Scott and Evelo, Chris T and Finkers, Richard and Gonzalez-Beltran, Alejandra and Gray, Alasdair J G and Groth, Paul and Goble, Carole and Grethe, Jeffrey S and Heringa, Jaap and 't Hoen, Peter A C and Hooft, Rob and Kuhn, Tobias and Kok, Ruben and Kok, Joost and Lusher, Scott J and Martone, Maryann E and Mons, Albert and Packer, Abel L and Persson, Bengt and Rocca-Serra, Philippe and Roos, Marco and van Schaik, Rene and Sansone, Susanna-Assunta and Schultes, Erik and Sengstag, Thierry and Slater, Ted and Strawn, George and Swertz, Morris A and Thompson, Mark and van der Lei, Johan and van Mulligen, Erik and Velterop, Jan and Waagmeester, Andra and Wittenburg, Peter and Wolstencroft, Katherine and Zhao, Jun and Mons, Barend},
pages = {160018},
url = {http://www.nature.com/articles/sdata201618},
year = {2016},
month = {mar},
day = {15},
urldate = {2018-07-13},
journal = {Scientific data},
volume = {3},
issn = {2052-4463},
doi = {10.1038/sdata.2016.18},
pmid = {26978244},
pmcid = {PMC4792175},
f1000-projects = {shared citations},
abstract = {There is an urgent need to improve the infrastructure supporting the reuse of scholarly data. A diverse set of stakeholders-representing academia, industry, funding agencies, and scholarly publishers-have come together to design and jointly endorse a concise and measureable set of principles that we refer to as the {FAIR} Data Principles. The intent is that these may act as a guideline for those wishing to enhance the reusability of their data holdings. Distinct from peer initiatives that focus on the human scholar, the {FAIR} Principles put specific emphasis on enhancing the ability of machines to automatically find and use the data, in addition to supporting its reuse by individuals. This Comment is the first formal publication of the {FAIR} Principles, and includes the rationale behind them, and some exemplar implementations in the community.}
}
@article{kim_2016,
title = {{PubChem} Substance and Compound databases.},
author = {Kim, Sunghwan and Thiessen, Paul A and Bolton, Evan E and Chen, Jie and Fu, Gang and Gindulyte, Asta and Han, Lianyi and He, Jane and He, Siqian and Shoemaker, Benjamin A and Wang, Jiyao and Yu, Bo and Zhang, Jian and Bryant, Stephen H},
pages = {D1202-13},
url = {http://dx.doi.org/10.1093/nar/gkv951},
year = {2016},
month = {jan},
day = {4},
urldate = {2018-01-29},
journal = {Nucleic Acids Research},
volume = {44},
number = {D1},
doi = {10.1093/nar/gkv951},
pmid = {26400175},
pmcid = {PMC4702940},
f1000-projects = {shared citations},
abstract = {{PubChem} (https://pubchem.ncbi.nlm.nih.gov) is a public repository for information on chemical substances and their biological activities, launched in 2004 as a component of the Molecular Libraries Roadmap Initiatives of the {US} National Institutes of Health ({NIH}). For the past 11 years, {PubChem} has grown to a sizable system, serving as a chemical information resource for the scientific research community. {PubChem} consists of three inter-linked databases, Substance, Compound and {BioAssay}. The Substance database contains chemical information deposited by individual data contributors to {PubChem}, and the Compound database stores unique chemical structures extracted from the Substance database. Biological activity data of chemical substances tested in assay experiments are contained in the {BioAssay} database. This paper provides an overview of the {PubChem} Substance and Compound databases, including data sources and contents, data organization, data submission using {PubChem} Upload, chemical structure standardization, web-based interfaces for textual and non-textual searches, and programmatic access. It also gives a brief description of {PubChem3D}, a resource derived from theoretical three-dimensional structures of compounds in {PubChem}, as well as {PubChemRDF}, Resource Description Framework ({RDF})-formatted {PubChem} data for data sharing, analysis and integration with information contained in other databases. Published by Oxford University Press on behalf of Nucleic Acids Research 2015. This work is written by (a) {US} Government employee(s) and is in the public domain in the {US}.}
}
@article{collberg_2016,
title = {Repeatability in computer systems research},
author = {Collberg, Christian and Proebsting, Todd A.},
pages = {62-69},
url = {http://dl.acm.org/citation.cfm?doid=2897191.2812803},
year = {2016},
month = {feb},
day = {25},
urldate = {2019-05-13},
journal = {Communications of the {ACM}},
volume = {59},
number = {3},
issn = {00010782},
doi = {10.1145/2812803},
f1000-projects = {shared citations},
abstract = {To encourage repeatable research, fund repeatability engineering and reward commitments to sharing research artifacts.}
}
@article{zhang_2009,
title = {{KEGGgraph}: a graph approach to {KEGG} {PATHWAY} in R and bioconductor.},
author = {Zhang, Jitao David and Wiemann, Stefan},
pages = {1470-1471},
url = {http://dx.doi.org/10.1093/bioinformatics/btp167},
year = {2009},
month = {jun},
day = {1},
urldate = {2019-05-09},
journal = {Bioinformatics},
volume = {25},
number = {11},
doi = {10.1093/bioinformatics/btp167},
pmid = {19307239},
pmcid = {PMC2682514},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: {KEGG} {PATHWAY} is a service of Kyoto Encyclopedia of Genes and Genomes ({KEGG}), constructing manually curated pathway maps that represent current knowledge on biological networks in graph models. While valuable graph tools have been implemented in R/Bioconductor, to our knowledge there is currently no software package to parse and analyze {KEGG} pathways with graph theory. {RESULTS}: We introduce the software package {KEGGgraph} in R and Bioconductor, an interface between {KEGG} pathways and graph models as well as a collection of tools for these graphs. Superior to existing approaches, {KEGGgraph} captures the pathway topology and allows further analysis or dissection of pathway graphs. We demonstrate the use of the package by the case study of analyzing human pancreatic cancer pathway. {AVAILABILITY}: {KEGGgraph} is freely available at the Bioconductor web site (http://www.bioconductor.org). {KGML} files can be downloaded from {KEGG} {FTP} site (ftp://ftp.genome.jp/pub/kegg/xml).}
}
@article{lewis_2009,
title = {{rNMR}: open source software for identifying and quantifying metabolites in {NMR} spectra.},
author = {Lewis, Ian A and Schommer, Seth C and Markley, John L},
pages = {S123-6},
url = {http://dx.doi.org/10.1002/mrc.2526},
year = {2009},
month = {dec},
urldate = {2018-01-15},
journal = {Magnetic Resonance in Chemistry},
volume = {47 Suppl 1},
doi = {10.1002/mrc.2526},
pmid = {19821464},
pmcid = {PMC2798074},
f1000-projects = {shared citations},
abstract = {Despite the extensive use of nuclear magnetic resonance ({NMR}) for metabolomics, no publicly available tools have been designed for identifying and quantifying metabolites across multiple spectra. We introduce here a new open source software tool, {rNMR}, which provides a simple graphics-based method for visualizing, identifying, and quantifying metabolites across multiple one- or two-dimensional {NMR} spectra. {rNMR} differs from existing software tools for {NMR} spectroscopy in that analyses are based on regions of interest ({ROIs}) rather than peak lists. {ROIs} contain all of the underlying {NMR} data within user-defined chemical shift ranges. {ROIs} can be inspected visually, and they support robust quantification of {NMR} signals. {ROI}-based analyses support simultaneous views of metabolite signals from up to hundreds of spectra, and {ROI} boundaries can be adjusted dynamically to ensure that signals corresponding to assigned atoms are analyzed consistently throughout the dataset. We describe how {rNMR} greatly reduces the time required for robust bioanalytical analysis of complex {NMR} data. An {rNMR} analysis yields a compact and transparent way of archiving the results from a metabolomics study so that it can be examined and evaluated by others. The {rNMR} website at http://rnmr.nmrfam.wisc.edu offers downloadable versions of {rNMR} for Windows, Macintosh, and Linux platforms along with extensive help documentation, instructional videos, and sample data.}
}
@article{wishart_2009,
title = {{HMDB}: a knowledgebase for the human metabolome.},
author = {Wishart, David S and Knox, Craig and Guo, An Chi and Eisner, Roman and Young, Nelson and Gautam, Bijaya and Hau, David D and Psychogios, Nick and Dong, Edison and Bouatra, Souhaila and Mandal, Rupasri and Sinelnikov, Igor and Xia, Jianguo and Jia, Leslie and Cruz, Joseph A and Lim, Emilia and Sobsey, Constance A and Shrivastava, Savita and Huang, Paul and Liu, Philip and Fang, Lydia and Peng, Jun and Fradette, Ryan and Cheng, Dean and Tzur, Dan and Clements, Melisa and Lewis, Avalyn and De Souza, Andrea and Zuniga, Azaret and Dawe, Margot and Xiong, Yeping and Clive, Derrick and Greiner, Russ and Nazyrova, Alsu and Shaykhutdinov, Rustem and Li, Liang and Vogel, Hans J and Forsythe, Ian},
pages = {D603-10},
url = {http://dx.doi.org/10.1093/nar/gkn810},
year = {2009},
month = {jan},
urldate = {2019-06-11},
journal = {Nucleic Acids Research},
volume = {37},
number = {Database issue},
doi = {10.1093/nar/gkn810},
pmid = {18953024},
pmcid = {PMC2686599},
f1000-projects = {shared citations},
abstract = {The Human Metabolome Database ({HMDB}, http://www.hmdb.ca) is a richly annotated resource that is designed to address the broad needs of biochemists, clinical chemists, physicians, medical geneticists, nutritionists and members of the metabolomics community. Since its first release in 2007, the {HMDB} has been used to facilitate the research for nearly 100 published studies in metabolomics, clinical biochemistry and systems biology. The most recent release of {HMDB} (version 2.0) has been significantly expanded and enhanced over the previous release (version 1.0). In particular, the number of fully annotated metabolite entries has grown from 2180 to more than 6800 (a 300\% increase), while the number of metabolites with biofluid or tissue concentration data has grown by a factor of five (from 883 to 4413). Similarly, the number of purified compounds with reference to {NMR}, {LC}-{MS} and {GC}-{MS} spectra has more than doubled (from 380 to more than 790 compounds). In addition to this significant expansion in database size, many new database searching tools and new data content has been added or enhanced. These include better algorithms for spectral searching and matching, more powerful chemical substructure searches, faster text searching software, as well as dedicated pathway searching tools and customized, clickable metabolic maps. Changes to the user-interface have also been implemented to accommodate future expansion and to make database navigation much easier. These improvements should make the {HMDB} much more useful to a much wider community of users.}
}
@article{sales_2012,
title = {graphite - a Bioconductor package to convert pathway topology to gene network.},
author = {Sales, Gabriele and Calura, Enrica and Cavalieri, Duccio and Romualdi, Chiara},
pages = {20},
url = {http://dx.doi.org/10.1186/1471-2105-13-20},
year = {2012},
month = {jan},
day = {31},
urldate = {2019-05-09},
journal = {{BMC} Bioinformatics},
volume = {13},
doi = {10.1186/1471-2105-13-20},
pmid = {22292714},
pmcid = {PMC3296647},
f1000-projects = {shared citations},
abstract = {{BACKGROUND}: Gene set analysis is moving towards considering pathway topology as a crucial feature. Pathway elements are complex entities such as protein complexes, gene family members and chemical compounds. The conversion of pathway topology to a gene/protein networks (where nodes are a simple element like a gene/protein) is a critical and challenging task that enables topology-based gene set analyses.Unfortunately, currently available R/Bioconductor packages provide pathway networks only from single databases. They do not propagate signals through chemical compounds and do not differentiate between complexes and gene families. {RESULTS}: Here we present graphite, a Bioconductor package addressing these issues. Pathway information from four different databases is interpreted following specific biologically-driven rules that allow the reconstruction of gene-gene networks taking into account protein complexes, gene families and sensibly removing chemical compounds from the final graphs. The resulting networks represent a uniform resource for pathway analyses. Indeed, graphite provides easy access to three recently proposed topological methods. The graphite package is available as part of the Bioconductor software suite. {CONCLUSIONS}: graphite is an innovative package able to gather and make easily available the contents of the four major pathway databases. In the field of topological analysis graphite acts as a provider of biological information by reducing the pathway complexity considering the biological meaning of the pathway elements.}
}
@article{cao_2008,
title = {{ChemmineR}: a compound mining framework for R.},
author = {Cao, Yiqun and Charisi, Anna and Cheng, Li-Chang and Jiang, Tao and Girke, Thomas},
pages = {1733-1734},
url = {http://dx.doi.org/10.1093/bioinformatics/btn307},
year = {2008},
month = {aug},
day = {1},
urldate = {2018-01-13},
journal = {Bioinformatics},
volume = {24},
number = {15},
doi = {10.1093/bioinformatics/btn307},
pmid = {18596077},
pmcid = {PMC2638865},
f1000-projects = {shared citations},
abstract = {{MOTIVATION}: Software applications for structural similarity searching and clustering of small molecules play an important role in drug discovery and chemical genomics. Here, we present the first open-source compound mining framework for the popular statistical programming environment R. The integration with a powerful statistical environment maximizes the flexibility, expandability and programmability of the provided analysis functions. {RESULTS}: We discuss the algorithms and compound mining utilities provided by the R package {ChemmineR}. It contains functions for structural similarity searching, clustering of compound libraries with a wide spectrum of classification algorithms and various utilities for managing complex compound data. It also offers a wide range of visualization functions for compound clusters and chemical structures. The package is well integrated with the online {ChemMine} environment and allows bidirectional communications between the two services. {AVAILABILITY}: {ChemmineR} is freely available as an R package from the {ChemMine} project site: http://bioweb.ucr.edu/{ChemMineV2}/chemminer}
}
@article{heller_2015,
title = {Inchi, the {IUPAC} international chemical identifier.},
author = {Heller, Stephen R and {McNaught}, Alan and Pletnev, Igor and Stein, Stephen and Tchekhovskoi, Dmitrii},
pages = {23},
url = {http://dx.doi.org/10.1186/s13321-015-0068-4},
year = {2015},
month = {may},
day = {30},
urldate = {2018-01-29},
journal = {Journal of cheminformatics},
volume = {7},
doi = {10.1186/s13321-015-0068-4},
pmid = {26136848},
pmcid = {PMC4486400},
f1000-projects = {shared citations},
abstract = {This paper documents the design, layout and algorithms of the {IUPAC} International Chemical Identifier, {InChI}.}
}
@article{jacob_2016,
title = {Correcting gene expression data when neither the unwanted variation nor the factor of interest are observed.},
author = {Jacob, Laurent and Gagnon-Bartsch, Johann A and Speed, Terence P},
pages = {16-28},
url = {http://dx.doi.org/10.1093/biostatistics/kxv026},
year = {2016},
month = {jan},
urldate = {2017-05-28},
journal = {Biostatistics},
volume = {17},
number = {1},
doi = {10.1093/biostatistics/kxv026},
pmid = {26286812},
pmcid = {PMC4679071},
f1000-projects = {shared citations},