-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.bib
14941 lines (14115 loc) · 785 KB
/
demo.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Jérémie at 2011-09-13 15:24:45 +0200
%% Saved with string encoding Unicode (UTF-8)
@article{breiman2001random,
title={Random forests},
author={Breiman, Leo},
journal={Machine learning},
volume={45},
number={1},
pages={5--32},
year={2001},
publisher={Springer}
}
@book{james2013introductiontostatlearning,
title={An introduction to statistical learning},
author={James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert},
volume={112},
year={2013},
publisher={Springer}
}
% Generated by Paperpile. Check out http://paperpile.com for more information.
% BibTeX export options can be customized via Settings -> BibTeX.
@incollection{kufareva2011methods,
title={Methods of protein structure comparison},
author={Kufareva, Irina and Abagyan, Ruben},
booktitle={Homology Modeling},
pages={231--257},
year={2011},
publisher={Springer}
}
@article{dong2018structural,
title={{Structural flexibility and protein adaptation to temperature: Molecular dynamics analysis of malate dehydrogenases of marine molluscs}},
author={Dong, Yun-wei and Liao, Ming-ling and Meng, Xian-liang and Somero, George N},
journal={Proceedings of the National Academy of Sciences},
volume={115},
number={6},
pages={1274--1279},
year={2018},
publisher={{National Acad Sciences}}
}
@article{mauger2019mrna,
title={{mRNA structure regulates protein expression through changes in functional half-life}},
author={Mauger, David M and Cabral, B Joseph and Presnyak, Vladimir and Su, Stephen V and Reid, David W and Goodman, Brooke and Link, Kristian and Khatwani, Nikhil and Reynders, John and Moore, Melissa J and others},
journal={Proceedings of the National Academy of Sciences},
volume={116},
number={48},
pages={24075--24083},
year={2019},
publisher={National Acad Sciences}
}
@book{presse1988numerical,
title={{Numerical recipes in C}},
author={Presse, WH and Flannery, Brian P and Teukolsky, Saul A and Wetterling, W},
journal={Press Syndicate of the University of Cambridge},
year={1988}
}
@book{hames2005biochemistry,
title={Biochemistry/David Hames and Nigel Hooper.},
author={Hames, BD and Hooper, NM and Hames, BD and others},
year={2005}
}
@article{lagarias1998convergence,
title={{Convergence properties of the Nelder--Mead simplex method in low dimensions}},
author={Lagarias, Jeffrey C and Reeds, James A and Wright, Margaret H and Wright, Paul E},
journal={SIAM Journal on Optimization},
volume={9},
number={1},
pages={112--147},
year={1998},
publisher={SIAM}
}
@article{liu2018fermented,
title={Fermented beverage and food storage in 13,000 y-old stone mortars at Raqefet Cave, Israel: Investigating Natufian ritual feasting},
author={Liu, Li and Wang, Jiajing and Rosenberg, Danny and Zhao, Hao and Lengyel, Gy{\"o}rgy and Nadel, Dani},
journal={Journal of Archaeological Science: Reports},
volume={21},
pages={783--793},
year={2018},
publisher={Elsevier}
}
@article{puetz2019recombinant,
title={Recombinant proteins for industrial versus pharmaceutical purposes: a review of process and pricing},
author={Puetz, John and Wurm, Florian M},
journal={Processes},
volume={7},
number={8},
pages={476},
year={2019},
publisher={Multidisciplinary Digital Publishing Institute}
}
@article{demain2009production,
title={Production of recombinant proteins by microbes and higher organisms},
author={Demain, Arnold L and Vaishnav, Preeti},
journal={Biotechnology Advances},
volume={27},
number={3},
pages={297--306},
year={2009},
publisher={Elsevier}
}
@article{jia2016high,
title={High-throughput recombinant protein expression in \textit{Escherichia coli}: current status and future perspectives},
author={Jia, Baolei and Jeon, Che Ok},
journal={Open Biology},
volume={6},
number={8},
pages={160196},
year={2016},
publisher={The Royal Society}
}
@article{braun2003high,
title={High throughput protein production for functional proteomics},
author={Braun, Pascal and LaBaer, Josh},
journal={Trends in Biotechnology},
volume={21},
number={9},
pages={383--388},
year={2003},
publisher={Elsevier}
}
@article{stevens2000design,
title={Design of high-throughput methods of protein production for structural biology},
author={Stevens, Raymond C},
journal={Structure},
volume={8},
number={9},
pages={R177--R185},
year={2000},
publisher={Elsevier}
}
@article{walsh2014biopharmaceutical,
title={Biopharmaceutical benchmarks 2014},
author={Walsh, Gary},
journal={Nature Biotechnology},
volume={32},
number={10},
pages={992--1000},
year={2014},
publisher={Nature Publishing Group}
}
@article{turner2009nndb,
title={{NNDB: the nearest neighbor parameter database for predicting stability of nucleic acid secondary structure}},
author={Turner, Douglas H and Mathews, David H},
journal={Nucleic acids research},
volume={38},
number={suppl\_1},
pages={D280--D282},
year={2009},
publisher={Oxford University Press}
}
@article{zuker1981optimal,
title={{Optimal computer folding of large RNA sequences using thermodynamics and auxiliary information}},
author={Zuker, Michael and Stiegler, Patrick},
journal={Nucleic acids research},
volume={9},
number={1},
pages={133--148},
year={1981},
publisher={Oxford University Press}
}
@article{zuker1989finding,
title={{On finding all suboptimal foldings of an RNA molecule}},
author={Zuker, Michael},
journal={Science},
volume={244},
number={4900},
pages={48--52},
year={1989},
publisher={American Association for the Advancement of Science}
}
@article{waterman1985dynamic,
title={A dynamic programming algorithm to find all solutions in a neighborhood of the optimum},
author={Waterman, Michael S and Byers, Thomas H},
journal={Mathematical Biosciences},
volume={77},
number={1-2},
pages={179--188},
year={1985},
publisher={Elsevier}
}
@article{valencia2006control,
title={{Control of translation and mRNA degradation by miRNAs and siRNAs}},
author={Valencia-Sanchez, Marco Antonio and Liu, Jidong and Hannon, Gregory J and Parker, Roy},
journal={Genes \& development},
volume={20},
number={5},
pages={515--524},
year={2006},
publisher={Cold Spring Harbor Lab}
}
@article{catalanotto2016microrna,
title={{MicroRNA in control of gene expression: an overview of nuclear functions}},
author={Catalanotto, Caterina and Cogoni, Carlo and Zardo, Giuseppe},
journal={International journal of molecular sciences},
volume={17},
number={10},
pages={1712},
year={2016},
publisher={Multidisciplinary Digital Publishing Institute}
}
@article{mccaskill1990equilibrium,
title={{The equilibrium partition function and base pair binding probabilities for RNA secondary structure}},
author={McCaskill, John S},
journal={Biopolymers: Original Research on Biomolecules},
volume={29},
number={6-7},
pages={1105--1119},
year={1990},
publisher={Wiley Online Library}
}
@article{ding2005rna,
title={RNA secondary structure prediction by centroids in a Boltzmann weighted ensemble},
author={Ding, YE and Chan, Chi Yu and Lawrence, Charles E},
journal={Rna},
volume={11},
number={8},
pages={1157--1166},
year={2005},
publisher={Cold Spring Harbor Lab}
}
@article{eddy2004rna,
title={{How do RNA folding algorithms work?}},
author={Eddy, Sean R},
journal={Nature Biotechnology},
volume={22},
number={11},
pages={1457},
year={2004},
publisher={Nature Publishing Group}
}
@article{flamm2000rna,
title={RNA folding at elementary step resolution},
author={Flamm, Christoph and Fontana, Walter and Hofacker, Ivo L and Schuster, Peter},
journal={Rna},
volume={6},
number={3},
pages={325--338},
year={2000},
publisher={Cambridge University Press}
}
@article{flamm2002barrier,
title={Barrier trees of degenerate landscapes},
author={Flamm, Christoph and Hofacker, Ivo L and Stadler, Peter F and Wolfinger, Michael T},
journal={Zeitschrift f{\"u}r physikalische chemie},
volume={216},
number={2},
pages={155},
year={2002},
publisher={De Gruyter Oldenbourg}
}
@article{lorenz2011computing,
title={Computing the partition function for kinetically trapped RNA secondary structures},
author={Lorenz, William A and Clote, Peter},
journal={PLoS One},
volume={6},
number={1},
pages={e16178},
year={2011},
publisher={Public Library of Science}
}
@article{bhandari2019highly,
title={Highly accessible translation initiation sites are predictive of successful heterologous protein expression},
author={Bhandari, Bikash K and Lim, Chun Shen and Gardner, Paul P},
journal={BioRxiv},
pages={726752},
year={2019},
publisher={Cold Spring Harbor Laboratory}
}
@article{guruprasad1990correlation,
title={Correlation between stability of a protein and its dipeptide composition: a novel approach for predicting in vivo stability of a protein from its primary sequence},
author={Guruprasad, Kunchur and Reddy, BV Bhasker and Pandit, Madhusudan W},
journal={Protein Engineering, Design and Selection},
volume={4},
number={2},
pages={155--161},
year={1990},
publisher={Oxford University Press}
}
@article{vihinen1994accuracy,
title={Accuracy of protein flexibility predictions},
author={Vihinen, Mauno and Torkkila, Esa and Riikonen, Pentti},
journal={Proteins: Structure, Function, and Bioinformatics},
volume={19},
number={2},
pages={141--149},
year={1994},
publisher={Wiley Online Library}
}
@article{mann2017intarna,
title={IntaRNA 2.0: enhanced and customizable prediction of RNA--RNA interactions},
author={Mann, Martin and Wright, Patrick R and Backofen, Rolf},
journal={Nucleic acids research},
volume={45},
number={W1},
pages={W435--W439},
year={2017},
publisher={Oxford University Press}
}
@article{ikemura1985codon,
title={{Codon usage and tRNA content in unicellular and multicellular organisms}},
author={Ikemura, Toshimichi},
journal={Molecular Biology and Evolution},
volume={2},
number={1},
pages={13--34},
year={1985}
}
@article{pelletier1987involvement,
title={The involvement of mRNA secondary structure in protein synthesis},
author={Pelletier, Jerry and Sonenberg, Nahum},
journal={Biochemistry and Cell Biology},
volume={65},
number={6},
pages={576--581},
year={1987},
publisher={NRC Research Press}
}
@article{ding2008ab,
title={{Ab initio RNA folding by discrete molecular dynamics: from structure prediction to folding mechanisms}},
author={Ding, Feng and Sharma, Shantanu and Chalasani, Poornima and Demidov, Vadim V and Broude, Natalia E and Dokholyan, Nikolay V},
journal={Rna},
volume={14},
number={6},
pages={1164--1173},
year={2008},
publisher={Cold Spring Harbor Lab}
}
@article{mcdowell2007molecular,
title={Molecular dynamics simulations of RNA: an in silico single molecule approach},
author={McDowell, S Elizabeth and {\v{S}}pa{\v{c}}kov{\'a}, Nad'a and {\v{S}}poner, Ji{\v{r}}{\'\i} and Walter, Nils G},
journal={Biopolymers: Original Research on Biomolecules},
volume={85},
number={2},
pages={169--184},
year={2007},
publisher={Wiley Online Library}
}
@article{kondo2010reaction,
title={Reaction-diffusion model as a framework for understanding biological pattern formation},
author={Kondo, Shigeru and Miura, Takashi},
journal={science},
volume={329},
number={5999},
pages={1616--1620},
year={2010},
publisher={American Association for the Advancement of Science}
}
@article{turing1990chemical,
title={The chemical basis of morphogenesis},
author={Turing, Alan Mathison},
journal={Bulletin of mathematical biology},
volume={52},
number={1-2},
pages={153--197},
year={1990},
publisher={Springer}
}
@article{dong2017shaping,
title={Shaping development by stochasticity and dynamics in gene regulation},
author={Dong, Peng and Liu, Zhe},
journal={Open Biology},
volume={7},
number={5},
pages={170030},
year={2017},
publisher={The Royal Society}
}
@article{banani2017biomolecular,
title={Biomolecular condensates: organizers of cellular biochemistry},
author={Banani, Salman F and Lee, Hyun O and Hyman, Anthony A and Rosen, Michael K},
journal={Nature reviews Molecular cell biology},
volume={18},
number={5},
pages={285},
year={2017},
publisher={Nature Publishing Group}
}
@article{stoeger2016passive,
title={Passive noise filtering by cellular compartmentalization},
author={Stoeger, Thomas and Battich, Nico and Pelkmans, Lucas},
journal={Cell},
volume={164},
number={6},
pages={1151--1161},
year={2016},
publisher={Elsevier}
}
@article{rao2002control,
title={Control, exploitation and tolerance of intracellular noise},
author={Rao, Christopher V and Wolf, Denise M and Arkin, Adam P},
journal={Nature},
volume={420},
number={6912},
pages={231},
year={2002},
publisher={Nature Publishing Group}
}
@article{itakura1977expression,
title={Expression in \textit{Escherichia coli} of a chemically synthesized gene for the hormone somatostatin},
author={Itakura, Keiichi and Hirose, Tadaaki and Crea, Roberto and Riggs, Arthur D and Heyneker, Herbert L and Bolivar, Francisco and Boyer, Herbert W},
journal={Science},
volume={198},
number={4321},
pages={1056--1063},
year={1977},
publisher={American Association for the Advancement of Science}
}
@article{dubendorf1991controlling,
title={Controlling basal expression in an inducible {T7} expression system by blocking the target {T7} promoter with lac repressor},
author={Dubendorf, John W and Studier, F William},
journal={Journal of Molecular Biology},
volume={219},
number={1},
pages={45--59},
year={1991},
publisher={Elsevier}
}
@ARTICLE{Tunney2018-sr,
title = "Accurate design of translational output by a neural network model
of ribosome distribution",
author = "Tunney, Robert and McGlincy, Nicholas J and Graham, Monica E and
Naddaf, Nicki and Pachter, Lior and Lareau, Liana F",
abstract = "Synonymous codon choice can have dramatic effects on ribosome
speed and protein expression. Ribosome profiling experiments have
underscored that ribosomes do not move uniformly along mRNAs.
Here, we have modeled this variation in translation elongation by
using a feed-forward neural network to predict the ribosome
density at each codon as a function of its sequence neighborhood.
Our approach revealed sequence features affecting translation
elongation and characterized large technical biases in ribosome
profiling. We applied our model to design synonymous variants of
a fluorescent protein spanning the range of translation speeds
predicted with our model. Levels of the fluorescent protein in
budding yeast closely tracked the predicted translation speeds
across their full range. We therefore demonstrate that our model
captures information determining translation dynamics in vivo;
that this information can be harnessed to design coding
sequences; and that control of translation elongation alone is
sufficient to produce large quantitative differences in protein
output.",
journal = "Nat. Struct. Mol. Biol.",
volume = 25,
number = 7,
pages = "577--582",
month = jul,
year = 2018,
language = "en"
}
@ARTICLE{Acton2005-ng,
title = "Robotic cloning and Protein Production Platform of the Northeast
Structural Genomics Consortium",
author = "Acton, Thomas B and Gunsalus, Kristin C and Xiao, Rong and Ma, Li
Chung and Aramini, James and Baran, Michael C and Chiang, Yi-Wen
and Climent, Teresa and Cooper, Bonnie and Denissova, Natalia G
and Douglas, Shawn M and Everett, John K and Ho, Chi Kent and
Macapagal, Daphne and Rajan, Paranji K and Shastry, Ritu and
Shih, Liang-Yu and Swapna, G V T and Wilson, Michael and Wu,
Margaret and Gerstein, Mark and Inouye, Masayori and Hunt, John F
and Montelione, Gaetano T",
abstract = "In this chapter we describe the core Protein Production Platform
of the Northeast Structural Genomics Consortium (NESG) and
outline the strategies used for producing high-quality protein
samples using\textit{Escherichia coli}host vectors. The platform is
centered on 6X-His affinity-tagged protein constructs, allowing
for a similar purification procedure for most targets, and the
implementation of high-throughput parallel methods. In most
cases, these affinity-purified proteins are sufficiently
homogeneous that a single subsequent gel filtration
chromatography step is adequate to produce protein preparations
that are greater than 98\% pure. Using this platform, over 1000
different proteins have been cloned, expressed, and purified in
tens of milligram quantities over the last 36-month period (see
Summary Statistics for All Targets, ). Our experience using a
hierarchical multiplex expression and purification strategy, also
described in this chapter, has allowed us to achieve success in
producing not only protein samples but also many
three-dimensional structures. As of December 2004, the NESG
Consortium has deposited over 145 new protein structures to the
Protein Data Bank (PDB); about two-thirds of these protein
samples were produced by the NESG Protein Production Facility
described here. The methods described here have proven effective
in producing quality samples of both eukaryotic and prokaryotic
proteins. These improved robotic and?or parallel cloning,
expression, protein production, and biophysical screening
technologies will be of broad value to the structural biology,
functional proteomics, and structural genomics communities.",
journal = "Methods Enzymol.",
volume = 394,
pages = "210--243",
year = 2005,
language = "en"
}
@ARTICLE{Wang2015-ky,
title = "Version 4.0 of {PaxDb}: Protein abundance data, integrated across
model organisms, tissues, and cell-lines",
author = "Wang, Mingcong and Herrmann, Christina J and Simonovic, Milan and
Szklarczyk, Damian and von Mering, Christian",
abstract = "Protein quantification at proteome-wide scale is an important
aim, enabling insights into fundamental cellular biology and
serving to constrain experiments and theoretical models. While
proteome-wide quantification is not yet fully routine, many
datasets approaching proteome-wide coverage are becoming
available through biophysical and MS techniques. Data of this
type can be accessed via a variety of sources, including
publication supplements and online data repositories. However,
access to the data is still fragmentary, and comparisons across
experiments and organisms are not straightforward. Here, we
describe recent updates to our database resource ``PaxDb''
(Protein Abundances Across Organisms). PaxDb focuses on protein
abundance information at proteome-wide scope, irrespective of the
underlying measurement technique. Quantification data is
reprocessed, unified, and quality-scored, and then integrated to
build a meta-resource. PaxDb also allows evolutionary comparisons
through precomputed gene orthology relations. Recently, we have
expanded the scope of the database to include cell-line samples,
and more systematically scan the literature for suitable
datasets. We report that a significant fraction of published
experiments cannot readily be accessed and/or parsed for
quantitative information, requiring additional steps and efforts.
The current update brings PaxDb to 414 datasets in 53 organisms,
with (semi-) quantitative abundance information covering more
than 300,000 proteins.",
journal = "Proteomics",
volume = 15,
number = 18,
pages = "3163--3168",
month = sep,
year = 2015,
keywords = "Absolute protein abundance; Bioinformatics; Evolution; Spectral
counting",
language = "en"
}
@ARTICLE{Sharp1987-ed,
title = "The codon Adaptation Index--a measure of directional synonymous
codon usage bias, and its potential applications",
author = "Sharp, P M and Li, W H",
abstract = "A simple, effective measure of synonymous codon usage bias, the
Codon Adaptation Index, is detailed. The index uses a reference
set of highly expressed genes from a species to assess the
relative merits of each codon, and a score for a gene is
calculated from the frequency of use of all codons in that gene.
The index assesses the extent to which selection has been
effective in moulding the pattern of codon usage. In that respect
it is useful for predicting the level of expression of a gene,
for assessing the adaptation of viral genes to their hosts, and
for making comparisons of codon usage in different organisms. The
index may also give an approximate indication of the likely
success of heterologous gene expression.",
journal = "Nucleic Acids Res.",
volume = 15,
number = 3,
pages = "1281--1295",
month = feb,
year = 1987,
language = "en"
}
@article{mittal2018codon,
title={Codon usage influences fitness through RNA toxicity},
author={Mittal, Pragya and Brindle, James and Stephen, Julie and Plotkin, Joshua B and Kudla, Grzegorz},
journal={Proceedings of the National Academy of Sciences},
volume={115},
number={34},
pages={8639--8644},
year={2018},
publisher={National Acad Sciences}
}
% The entry below contains non-ASCII chars that could not be converted
% to a LaTeX equivalent.
@ARTICLE{Fu2012-ng,
title = "{CD-HIT}: accelerated for clustering the next-generation
sequencing data",
author = "Fu, Limin and Niu, Beifang and Zhu, Zhengwei and Wu, Sitao and
Li, Weizhong",
abstract = "SUMMARY: CD-HIT is a widely used program for clustering
biological sequences to reduce sequence redundancy and improve
the performance of other sequence analyses. In response to the
rapid increase in the amount of sequencing data produced by the
next-generation sequencing technologies, we have developed a new
CD-HIT program accelerated with a novel parallelization strategy
and some other techniques to allow efficient clustering of such
datasets. Our tests demonstrated very good speedup derived from
the parallelization for up to ∼24 cores and a quasi-linear
speedup for up to ∼8 cores. The enhanced CD-HIT is capable of
handling very large datasets in much shorter time than previous
versions. AVAILABILITY: http://cd-hit.org. CONTACT: liwz@sdsc.edu
SUPPLEMENTARY INFORMATION: Supplementary data are available at
Bioinformatics online.",
journal = "Bioinformatics",
volume = 28,
number = 23,
pages = "3150--3152",
month = dec,
year = 2012,
language = "en"
}
@MISC{noauthor_1985-wm,
title = "Codon usage and {tRNA} content in unicellular and multicellular
organisms",
journal = "Molecular Biology and Evolution",
year = 1985
}
@ARTICLE{Hofacker1994-vu,
title = "Fast folding and comparison of {RNA} secondary structures",
author = "Hofacker, I L and Fontana, W and Stadler, P F and Bonhoeffer, L S
and Tacker, M and Schuster, P",
abstract = "Computer codes for computation and comparison of RNA secondary
structures, the Vienna RNA package, are presented, that are based
on dynamic programming algorithms and aim at predictions of
structures with minimum free energies as well as at computations
of the equilibrium partition functions and base pairing
probabilities.",
journal = "Monatshefte f{\"u}r Chemie / Chemical Monthly",
volume = 125,
number = 2,
pages = "167--188",
month = feb,
year = 1994
}
@MISC{noauthor_undated-gm,
title = "Codon Optimization ({ExpOptimizer}) - Online Tools",
abstract = "A free-to-use tool for scientists to optimize DNA sequence
for recombinant protein expression",
howpublished = "\url{https://www.novoprolabs.com/tools/codon-optimization}",
note = "Accessed: 2019-6-14"
}
@MISC{Lareaulab_undated-uh,
title = "lareaulab/iXnos",
booktitle = "{GitHub}",
author = "{lareaulab}",
abstract = "Neural network regression model of translation elongation
rate, with DP algorithm to optimize fast coding sequences
under model - lareaulab/iXnos",
howpublished = "\url{https://github.com/lareaulab/iXnos}",
note = "Accessed: 2019-6-17"
}
@MISC{Ang2016-rv,
title = "Multi-omics data driven analysis establishes reference codon
biases for synthetic gene design in microbial and mammalian cells",
author = "Ang, Kok Siong and Kyriakopoulos, Sarantos and Li, Wei and Lee,
Dong-Yup",
journal = "Methods",
volume = 102,
pages = "26--35",
year = 2016
}
@article{gustafsson2004codon,
title={Codon bias and heterologous protein expression},
author={Gustafsson, Claes and Govindarajan, Sridhar and Minshull, Jeremy},
journal={Trends in Biotechnology},
volume={22},
number={7},
pages={346--353},
year={2004},
publisher={Elsevier}
}
@article{rosano2009rare,
title={Rare codon content affects the solubility of recombinant proteins in a codon bias-adjusted \textit{Escherichia coli} strain},
author={Rosano, Germ{\'a}n L and Ceccarelli, Eduardo A},
journal={Microbial Cell Factories},
volume={8},
number={1},
pages={41},
year={2009},
publisher={Springer}
}
@ARTICLE{Tuller2010-ub,
title = "Translation efficiency is determined by both codon bias and
folding energy",
author = "Tuller, Tamir and Waldman, Yedael Y and Kupiec, Martin and
Ruppin, Eytan",
abstract = "Synonymous mutations do not alter the protein produced yet can
have a significant effect on protein levels. The mechanisms by
which this effect is achieved are controversial; although some
previous studies have suggested that codon bias is the most
important determinant of translation efficiency, a recent study
suggested that mRNA folding at the beginning of genes is the
dominant factor via its effect on translation initiation. Using
the\textit{Escherichia coli}and Saccharomyces cerevisiae transcriptomes,
we conducted a genome-scale study aiming at dissecting the
determinants of translation efficiency. There is a significant
association between codon bias and translation efficiency across
all endogenous genes in E. coli and S. cerevisiae but no
association between folding energy and translation efficiency,
demonstrating the role of codon bias as an important determinant
of translation efficiency. However, folding energy does modulate
the strength of association between codon bias and translation
efficiency, which is maximized at very weak mRNA folding (i.e.,
high folding energy) levels. We find a strong correlation between
the genomic profiles of ribosomal density and genomic profiles of
folding energy across mRNA, suggesting that lower folding
energies slow down the ribosomes and decrease translation
efficiency. Accordingly, we find that selection forces act near
uniformly to decrease the folding energy at the beginning of
genes. In summary, these findings testify that in endogenous
genes, folding energy affects translation efficiency in a global
manner that is not related to the expression levels of individual
genes, and thus cannot be detected by correlation with their
expression levels.",
journal = "Proc. Natl. Acad. Sci. U. S. A.",
volume = 107,
number = 8,
pages = "3645--3650",
month = feb,
year = 2010,
language = "en"
}
@MISC{noauthor_undated-lc,
title = "{SciPy.org} --- {SciPy.org}",
howpublished = "\url{http://www.scipy.org/}",
note = "Accessed: 2019-6-17"
}
@ARTICLE{Raab2010-eg,
title = "The {GeneOptimizer} Algorithm: using a sliding window approach to
cope with the vast sequence space in multiparameter {DNA}
sequence optimization",
author = "Raab, David and Graf, Marcus and Notka, Frank and Sch{\"o}dl,
Thomas and Wagner, Ralf",
abstract = "One of the main advantages of de novo gene synthesis is the fact
that it frees the researcher from any limitations imposed by the
use of natural templates. To make the most out of this
opportunity, efficient algorithms are needed to calculate a
coding sequence, combining different requirements, such as
adapted codon usage or avoidance of restriction sites, in the
best possible way. We present an algorithm where a ``variation
window'' covering several amino acid positions slides along the
coding sequence. Candidate sequences are built comprising the
already optimized part of the complete sequence and all possible
combinations of synonymous codons representing the amino acids
within the window. The candidate sequences are assessed with a
quality function, and the first codon of the best candidates'
variation window is fixed. Subsequently the window is shifted by
one codon position. As an example of a freely accessible software
implementing the algorithm, we present the Mr. Gene
web-application. Additionally two experimental applications of
the algorithm are shown.",
journal = "Syst. Synth. Biol.",
volume = 4,
number = 3,
pages = "215--225",
month = sep,
year = 2010,
keywords = "Codon optimization; Expression optimization; Gene synthesis;
Sequence optimization algorithm; Synthetic genes",
language = "en"
}
@MISC{noauthor_undated-nk,
title = "Codon Optimization ({ExpOptimizer}) - Online Tools",
abstract = "A free-to-use tool for scientists to optimize DNA sequence
for recombinant protein expression",
howpublished = "\url{https://www.novoprolabs.com/tools/codon-optimization}",
note = "Accessed: 2019-6-14"
}
@ARTICLE{Pedregosa2011-cd,
title = "Scikit-learn: Machine Learning in Python",
author = "Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre
and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and
Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and
Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and
Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and
Duchesnay, {\'E}douard",
journal = "J. Mach. Learn. Res.",
volume = 12,
number = "Oct",
pages = "2825--2830",
year = 2011
}
@ARTICLE{Terai2016-vp,
title = "{CDSfold}: an algorithm for designing a protein-coding sequence
with the most stable secondary structure",
author = "Terai, Goro and Kamegai, Satoshi and Asai, Kiyoshi",
abstract = "MOTIVATION: An important problem in synthetic biology is to
design a nucleotide sequence of an mRNA that confers a desirable
expression level of a target protein. The secondary structure of
protein-coding sequences (CDSs) is one potential factor that
could have both positive and negative effects on protein
production. To elucidate the role of secondary structure in CDSs,
algorithms for manipulating secondary structure should be
developed. RESULTS: We developed an algorithm for designing a CDS
with the most stable secondary structure among all possible ones
translated into the same protein, and implemented it as the
program CDSfold. The algorithm runs the Zuker algorithm under the
constraint of a given amino acid sequence. The time and space
complexity is O(L(3)) and O(L(2)), respectively, where L is the
length of the CDS to be designed. Although our algorithm is
slower than the original Zuker algorithm, it could design a
relatively long (2.7-kb) CDS in approximately 1 h. AVAILABILITY
AND IMPLEMENTATION: The CDSfold program is freely available for
non-commercial users as stand-alone and web-based software from
http://cdsfold.trahed.jp/cdsfold/ CONTACTS: terai-goro@aist.go.jp
or asai@k.u-tokyo.ac.jp SUPPLEMENTARY INFORMATION: Supplementary
data are available at Bioinformatics online.",
journal = "Bioinformatics",
volume = 32,
number = 6,
pages = "828--834",
month = mar,
year = 2016,
language = "en"
}
@ARTICLE{Kalvari2018-un,
title = "Rfam 13.0: shifting to a genome-centric resource for non-coding
{RNA} families",
author = "Kalvari, Ioanna and Argasinska, Joanna and Quinones-Olvera,
Natalia and Nawrocki, Eric P and Rivas, Elena and Eddy, Sean R
and Bateman, Alex and Finn, Robert D and Petrov, Anton I",
abstract = "The Rfam database is a collection of RNA families in which each
family is represented by a multiple sequence alignment, a
consensus secondary structure, and a covariance model. In this
paper we introduce Rfam release 13.0, which switches to a new
genome-centric approach that annotates a non-redundant set of
reference genomes with RNA families. We describe new web
interface features including faceted text search and R-scape
secondary structure visualizations. We discuss a new literature
curation workflow and a pipeline for building families based on
RNAcentral. There are 236 new families in release 13.0, bringing
the total number of families to 2687. The Rfam website is
http://rfam.org.",
journal = "Nucleic Acids Res.",
volume = 46,
number = "D1",
pages = "D335--D342",
month = jan,
year = 2018,
language = "en"
}
@ARTICLE{Shine1974-kl,
title = "The 3'-terminal sequence of \textit{Escherichia coli} {16S} ribosomal
{RNA}: complementarity to nonsense triplets and ribosome binding
sites",
author = "Shine, J and Dalgarno, L",
abstract = "With a stepwise degradation and terminal labeling procedure the
3'-terminal sequence of E. coli 16S ribosomal RNA is shown to be
Pyd-A-C-C-U-C-C-U-U-A(OH). It is suggested that this region of
the RNA is able to interact with mRNA and that the 3'-terminal
U-U-A(OH) is involved in the termination of protein synthesis
through base-pairing with terminator codons. The sequence
A-C-C-U-C-C could recognize a conserved sequence found in the
ribosome binding sites of various coliphage mRNAs; it may thus be
involved in the formation of the mRNA.30S subunit complex.",
journal = "Proc. Natl. Acad. Sci. U. S. A.",
volume = 71,
number = 4,
pages = "1342--1346",
month = apr,
year = 1974,
language = "en"
}
@ARTICLE{Tegel2011-gy,
title = {Enhancing the protein production levels in \textit{Escherichia coli} with
a strong promoter},
author = "Tegel, Hanna and Ottosson, Jenny and Hober, Sophia",
abstract = "In biotechnology, the use of \textit{Escherichia coli} for recombinant
protein production has a long tradition, although the optimal
production conditions for certain proteins are still not evident.
The most favorable conditions for protein production vary with
the gene product. Temperature and induction conditions represent
parameters that affect total protein production, as well as the
amount of soluble protein. Furthermore, the choice of promoter
and bacterial strain will have large effects on the production of
the target protein. In the present study, the effects of three
different promoters (T7, trc and lacUV5) on E. coli production of
target proteins with different characteristics are presented. The
total amount of target protein as well as the amount of soluble
protein were analyzed, demonstrating the benefits of using a
strong promoter such as T7. To understand the underlying causes,
transcription levels have been correlated with the total amount
of target protein and protein solubility in vitro has been
correlated with the amount of soluble protein that is produced.
In addition, the effects of two different E. coli strains,
BL21(DE3) and Rosetta(DE3), on the expression pattern were
analyzed. It is concluded that the regulation of protein
production is a combination of the transcription and translation
efficiencies. Other important parameters include the
nucleotide-sequence itself and the solubility of the target
protein.",
journal = "FEBS J.",
volume = 278,
number = 5,
pages = "729--739",
month = mar,
year = 2011,