@article {45867, title = {Automated ensemble assembly and validation of microbial genomes.}, journal = {BMC Bioinformatics}, volume = {15}, year = {2014}, month = {2014}, pages = {126}, abstract = {

BACKGROUND: The continued democratization of DNA sequencing has sparked a new wave of development of genome assembly and assembly validation methods. As individual research labs, rather than centralized centers, begin to sequence the majority of new genomes, it is important to establish best practices for genome assembly. However, recent evaluations such as GAGE and the Assemblathon have concluded that there is no single best approach to genome assembly. Instead, it is preferable to generate multiple assemblies and validate them to determine which is most useful for the desired analysis; this is a labor-intensive process that is often impossible or unfeasible.

RESULTS: To encourage best practices supported by the community, we present iMetAMOS, an automated ensemble assembly pipeline; iMetAMOS encapsulates the process of running, validating, and selecting a single assembly from multiple assemblies. iMetAMOS packages several leading open-source tools into a single binary that automates parameter selection and execution of multiple assemblers, scores the resulting assemblies based on multiple validation metrics, and annotates the assemblies for genes and contaminants. We demonstrate the utility of the ensemble process on 225 previously unassembled Mycobacterium tuberculosis genomes as well as a Rhodobacter sphaeroides benchmark dataset. On these real data, iMetAMOS reliably produces validated assemblies and identifies potential contamination without user intervention. In addition, intelligent parameter selection produces assemblies of R. sphaeroides comparable to or exceeding the quality of those from the GAGE-B evaluation, affecting the relative ranking of some assemblers.

CONCLUSIONS: Ensemble assembly with iMetAMOS provides users with multiple, validated assemblies for each genome. Although computationally limited to small or mid-sized genomes, this approach is the most effective and reproducible means for generating high-quality assemblies and enables users to select an assembly best tailored to their specific needs.

}, keywords = {Genome, Bacterial, Genome, Microbial, Genomics, Mycobacterium tuberculosis, Rhodobacter sphaeroides, Sequence Analysis, DNA, software}, issn = {1471-2105}, doi = {10.1186/1471-2105-15-126}, author = {Koren, Sergey and Todd Treangen and Hill, Christopher M and Pop, Mihai and Phillippy, Adam M} } @article {49599, title = {BlindCall: ultra-fast base-calling of high-throughput sequencing data by blind deconvolution.}, volume = {30}, year = {2014}, month = {2014 May 1}, pages = {1214-9}, abstract = {

MOTIVATION: Base-calling of sequencing data produced by high-throughput sequencing platforms is a fundamental process in current bioinformatics analysis. However, existing third-party probabilistic or machine-learning methods that significantly improve the accuracy of base-calls on these platforms are impractical for production use due to their computational inefficiency.

RESULTS: We directly formulate base-calling as a blind deconvolution problem and implemented BlindCall as an efficient solver to this inverse problem. BlindCall produced base-calls at accuracy comparable to state-of-the-art probabilistic methods while processing data at rates 10 times faster in most cases. The computational complexity of BlindCall scales linearly with read length making it better suited for new long-read sequencing technologies.

}, keywords = {algorithms, High-Throughput Nucleotide Sequencing, HUMANS, Probability, Reproducibility of Results, Sequence Analysis, DNA, software, Time factors}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu010}, author = {Ye, Chengxi and Hsiao, Chiaowen and Corrada Bravo, Hector} } @article {49776, title = {Whole genome analysis of Leptospira licerasiae provides insight into leptospiral evolution and pathogenicity.}, journal = {PLoS Negl Trop Dis}, volume = {6}, year = {2012}, month = {2012}, pages = {e1853}, abstract = {

The whole genome analysis of two strains of the first intermediately pathogenic leptospiral species to be sequenced (Leptospira licerasiae strains VAR010 and MMD0835) provides insight into their pathogenic potential and deepens our understanding of leptospiral evolution. Comparative analysis of eight leptospiral genomes shows the existence of a core leptospiral genome comprising 1547 genes and 452 conserved genes restricted to infectious species (including L. licerasiae) that are likely to be pathogenicity-related. Comparisons of the functional content of the genomes suggests that L. licerasiae retains several proteins related to nitrogen, amino acid and carbohydrate metabolism which might help to explain why these Leptospira grow well in artificial media compared with pathogenic species. L. licerasiae strains VAR010(T) and MMD0835 possess two prophage elements. While one element is circular and shares homology with LE1 of L. biflexa, the second is cryptic and homologous to a previously identified but unnamed region in L. interrogans serovars Copenhageni and Lai. We also report a unique O-antigen locus in L. licerasiae comprised of a 6-gene cluster that is unexpectedly short compared with L. interrogans in which analogous regions may include >90 such genes. Sequence homology searches suggest that these genes were acquired by lateral gene transfer (LGT). Furthermore, seven putative genomic islands ranging in size from 5 to 36 kb are present also suggestive of antecedent LGT. How Leptospira become naturally competent remains to be determined, but considering the phylogenetic origins of the genes comprising the O-antigen cluster and other putative laterally transferred genes, L. licerasiae must be able to exchange genetic material with non-invasive environmental bacteria. The data presented here demonstrate that L. licerasiae is genetically more closely related to pathogenic than to saprophytic Leptospira and provide insight into the genomic bases for its infectiousness and its unique antigenic characteristics.

}, keywords = {DNA, Bacterial, Evolution, Molecular, Gene Transfer, Horizontal, Genome, Bacterial, Genomic islands, HUMANS, Leptospira, Molecular Sequence Data, Multigene Family, Prophages, Sequence Analysis, DNA, Virulence factors}, issn = {1935-2735}, doi = {10.1371/journal.pntd.0001853}, author = {Ricaldi, Jessica N and Fouts, Derrick E and Selengut, Jeremy D and Harkins, Derek M and Patra, Kailash P and Moreno, Angelo and Lehmann, Jason S and Purushe, Janaki and Sanka, Ravi and Torres, Michael and Webster, Nicholas J and Vinetz, Joseph M and Matthias, Michael A} } @article {38573, title = {Whole genome analysis of Leptospira licerasiae provides insight into leptospiral evolution and pathogenicity}, journal = {PLoS neglected tropical diseasesPLoS neglected tropical diseases}, volume = {6}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23145189?dopt=Abstract}, type = {10.1371/journal.pntd.0001853}, abstract = {The whole genome analysis of two strains of the first intermediately pathogenic leptospiral species to be sequenced (Leptospira licerasiae strains VAR010 and MMD0835) provides insight into their pathogenic potential and deepens our understanding of leptospiral evolution. Comparative analysis of eight leptospiral genomes shows the existence of a core leptospiral genome comprising 1547 genes and 452 conserved genes restricted to infectious species (including L. licerasiae) that are likely to be pathogenicity-related. Comparisons of the functional content of the genomes suggests that L. licerasiae retains several proteins related to nitrogen, amino acid and carbohydrate metabolism which might help to explain why these Leptospira grow well in artificial media compared with pathogenic species. L. licerasiae strains VAR010(T) and MMD0835 possess two prophage elements. While one element is circular and shares homology with LE1 of L. biflexa, the second is cryptic and homologous to a previously identified but unnamed region in L. interrogans serovars Copenhageni and Lai. We also report a unique O-antigen locus in L. licerasiae comprised of a 6-gene cluster that is unexpectedly short compared with L. interrogans in which analogous regions may include >90 such genes. Sequence homology searches suggest that these genes were acquired by lateral gene transfer (LGT). Furthermore, seven putative genomic islands ranging in size from 5 to 36 kb are present also suggestive of antecedent LGT. How Leptospira become naturally competent remains to be determined, but considering the phylogenetic origins of the genes comprising the O-antigen cluster and other putative laterally transferred genes, L. licerasiae must be able to exchange genetic material with non-invasive environmental bacteria. The data presented here demonstrate that L. licerasiae is genetically more closely related to pathogenic than to saprophytic Leptospira and provide insight into the genomic bases for its infectiousness and its unique antigenic characteristics.}, keywords = {DNA, Bacterial, Evolution, Molecular, Gene Transfer, Horizontal, Genome, Bacterial, Genomic islands, HUMANS, Leptospira, Molecular Sequence Data, Multigene Family, Prophages, Sequence Analysis, DNA, Virulence factors}, author = {Ricaldi, Jessica N. and Fouts, Derrick E. and J. Selengut and Harkins, Derek M. and Patra, Kailash P. and Moreno, Angelo and Lehmann, Jason S. and Purushe, Janaki and Sanka, Ravi and Torres, Michael and Webster, Nicholas J. and Vinetz, Joseph M. and Matthias, Michael A.} } @article {38522, title = {Tackling the widespread and critical impact of batch effects in high-throughput data}, journal = {Nature reviews. GeneticsNature reviews. Genetics}, volume = {11}, year = {2010}, note = {http://www.ncbi.nlm.nih.gov/pubmed/20838408?dopt=Abstract}, type = {10.1038/nrg2825}, abstract = {High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.}, keywords = {biotechnology, Computational Biology, Genomics, Oligonucleotide Array Sequence Analysis, Periodicals as Topic, Research Design, Sequence Analysis, DNA}, author = {Leek, Jeffrey T. and Scharpf, Robert B. and H{\'e}ctor Corrada Bravo and Simcha, David and Langmead, Benjamin and Johnson, W. Evan and Geman, Donald and Baggerly, Keith and Irizarry, Rafael A.} } @article {49749, title = {Measuring differential gene expression by short read sequencing: quantitative comparison to 2-channel gene expression microarrays.}, journal = {BMC Genomics}, volume = {10}, year = {2009}, month = {2009}, pages = {221}, abstract = {

BACKGROUND: High-throughput cDNA synthesis and sequencing of poly(A)-enriched RNA is rapidly emerging as a technology competing to replace microarrays as a quantitative platform for measuring gene expression.

RESULTS: Consequently, we compared full length cDNA sequencing to 2-channel gene expression microarrays in the context of measuring differential gene expression. Because of its comparable cost to a gene expression microarray, our study focused on the data obtainable from a single lane of an Illumina 1 G sequencer. We compared sequencing data to a highly replicated microarray experiment profiling two divergent strains of S. cerevisiae.

CONCLUSION: Using a large number of quantitative PCR (qPCR) assays, more than previous studies, we found that neither technology is decisively better at measuring differential gene expression. Further, we report sequencing results from a diploid hybrid of two strains of S. cerevisiae that indicate full length cDNA sequencing can discover heterozygosity and measure quantitative allele-specific expression simultaneously.

}, keywords = {algorithms, DNA, Complementary, DNA, Fungal, Gene Expression Profiling, Oligonucleotide Array Sequence Analysis, Saccharomyces cerevisiae, sequence alignment, Sequence Analysis, DNA}, issn = {1471-2164}, doi = {10.1186/1471-2164-10-221}, author = {Bloom, Joshua S and Khan, Zia and Kruglyak, Leonid and Singh, Mona and Caudy, Amy A} } @article {49748, title = {A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays.}, journal = {Bioinformatics}, volume = {25}, year = {2009}, month = {2009 Jul 1}, pages = {1609-16}, abstract = {

MOTIVATION: High-throughput sequencing technologies place ever increasing demands on existing algorithms for sequence analysis. Algorithms for computing maximal exact matches (MEMs) between sequences appear in two contexts where high-throughput sequencing will vastly increase the volume of sequence data: (i) seeding alignments of high-throughput reads for genome assembly and (ii) designating anchor points for genome-genome comparisons.

RESULTS: We introduce a new algorithm for finding MEMs. The algorithm leverages a sparse suffix array (SA), a text index that stores every K-th position of the text. In contrast to a full text index that stores every position of the text, a sparse SA occupies much less memory. Even though we use a sparse index, the output of our algorithm is the same as a full text index algorithm as long as the space between the indexed suffixes is not greater than a minimum length of a MEM. By relying on partial matches and additional text scanning between indexed positions, the algorithm trades memory for extra computation. The reduced memory usage makes it possible to determine MEMs between significantly longer sequences.

AVAILABILITY: Source code for the algorithm is available under a BSD open source license at http://compbio.cs.princeton.edu/mems. The implementation can serve as a drop-in replacement for the MEMs algorithm in MUMmer 3.

}, keywords = {algorithms, Base Sequence, Genomics, sequence alignment, Sequence Analysis, DNA}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btp275}, author = {Khan, Zia and Bloom, Joshua S and Kruglyak, Leonid and Singh, Mona} } @article {38528, title = {Three genomes from the phylum Acidobacteria provide insight into the lifestyles of these microorganisms in soils}, journal = {Applied and environmental microbiologyApplied and environmental microbiology}, volume = {75}, year = {2009}, note = {http://www.ncbi.nlm.nih.gov/pubmed/19201974?dopt=Abstract}, type = {10.1128/AEM.02294-08}, abstract = {The complete genomes of three strains from the phylum Acidobacteria were compared. Phylogenetic analysis placed them as a unique phylum. They share genomic traits with members of the Proteobacteria, the Cyanobacteria, and the Fungi. The three strains appear to be versatile heterotrophs. Genomic and culture traits indicate the use of carbon sources that span simple sugars to more complex substrates such as hemicellulose, cellulose, and chitin. The genomes encode low-specificity major facilitator superfamily transporters and high-affinity ABC transporters for sugars, suggesting that they are best suited to low-nutrient conditions. They appear capable of nitrate and nitrite reduction but not N(2) fixation or denitrification. The genomes contained numerous genes that encode siderophore receptors, but no evidence of siderophore production was found, suggesting that they may obtain iron via interaction with other microorganisms. The presence of cellulose synthesis genes and a large class of novel high-molecular-weight excreted proteins suggests potential traits for desiccation resistance, biofilm formation, and/or contribution to soil structure. Polyketide synthase and macrolide glycosylation genes suggest the production of novel antimicrobial compounds. Genes that encode a variety of novel proteins were also identified. The abundance of acidobacteria in soils worldwide and the breadth of potential carbon use by the sequenced strains suggest significant and previously unrecognized contributions to the terrestrial carbon cycle. Combining our genomic evidence with available culture traits, we postulate that cells of these isolates are long-lived, divide slowly, exhibit slow metabolic rates under low-nutrient conditions, and are well equipped to tolerate fluctuations in soil hydration.}, keywords = {Anti-Bacterial Agents, bacteria, Biological Transport, Carbohydrate Metabolism, Cyanobacteria, DNA, Bacterial, Fungi, Genome, Bacterial, Macrolides, Molecular Sequence Data, Nitrogen, Phylogeny, Proteobacteria, Sequence Analysis, DNA, Sequence Homology, Soil Microbiology}, author = {Ward, Naomi L. and Challacombe, Jean F. and Janssen, Peter H. and Henrissat, Bernard and Coutinho, Pedro M. and Wu, Martin and Xie, Gary and Haft, Daniel H. and Sait, Michelle and Badger, Jonathan and Barabote, Ravi D. and Bradley, Brent and Brettin, Thomas S. and Brinkac, Lauren M. and Bruce, David and Creasy, Todd and Daugherty, Sean C. and Davidsen, Tanja M. and DeBoy, Robert T. and Detter, J. Chris and Dodson, Robert J. and Durkin, A. Scott and Ganapathy, Anuradha and Gwinn-Giglio, Michelle and Han, Cliff S. and Khouri, Hoda and Kiss, Hajnalka and Kothari, Sagar P. and Madupu, Ramana and Nelson, Karen E. and Nelson, William C. and Paulsen, Ian and Penn, Kevin and Ren, Qinghu and Rosovitz, M. J. and J. Selengut and Shrivastava, Susmita and Sullivan, Steven A. and Tapia, Roxanne and Thompson, L. Sue and Watkins, Kisha L. and Yang, Qi and Yu, Chunhui and Zafar, Nikhat and Zhou, Liwei and Kuske, Cheryl R.} } @article {49780, title = {Three genomes from the phylum Acidobacteria provide insight into the lifestyles of these microorganisms in soils.}, journal = {Appl Environ Microbiol}, volume = {75}, year = {2009}, month = {2009 Apr}, pages = {2046-56}, abstract = {

The complete genomes of three strains from the phylum Acidobacteria were compared. Phylogenetic analysis placed them as a unique phylum. They share genomic traits with members of the Proteobacteria, the Cyanobacteria, and the Fungi. The three strains appear to be versatile heterotrophs. Genomic and culture traits indicate the use of carbon sources that span simple sugars to more complex substrates such as hemicellulose, cellulose, and chitin. The genomes encode low-specificity major facilitator superfamily transporters and high-affinity ABC transporters for sugars, suggesting that they are best suited to low-nutrient conditions. They appear capable of nitrate and nitrite reduction but not N(2) fixation or denitrification. The genomes contained numerous genes that encode siderophore receptors, but no evidence of siderophore production was found, suggesting that they may obtain iron via interaction with other microorganisms. The presence of cellulose synthesis genes and a large class of novel high-molecular-weight excreted proteins suggests potential traits for desiccation resistance, biofilm formation, and/or contribution to soil structure. Polyketide synthase and macrolide glycosylation genes suggest the production of novel antimicrobial compounds. Genes that encode a variety of novel proteins were also identified. The abundance of acidobacteria in soils worldwide and the breadth of potential carbon use by the sequenced strains suggest significant and previously unrecognized contributions to the terrestrial carbon cycle. Combining our genomic evidence with available culture traits, we postulate that cells of these isolates are long-lived, divide slowly, exhibit slow metabolic rates under low-nutrient conditions, and are well equipped to tolerate fluctuations in soil hydration.

}, keywords = {Anti-Bacterial Agents, bacteria, Biological Transport, Carbohydrate Metabolism, Cyanobacteria, DNA, Bacterial, Fungi, Genome, Bacterial, Macrolides, Molecular Sequence Data, Nitrogen, Phylogeny, Proteobacteria, Sequence Analysis, DNA, Sequence Homology, Soil Microbiology}, issn = {1098-5336}, doi = {10.1128/AEM.02294-08}, author = {Ward, Naomi L and Challacombe, Jean F and Janssen, Peter H and Henrissat, Bernard and Coutinho, Pedro M and Wu, Martin and Xie, Gary and Haft, Daniel H and Sait, Michelle and Badger, Jonathan and Barabote, Ravi D and Bradley, Brent and Brettin, Thomas S and Brinkac, Lauren M and Bruce, David and Creasy, Todd and Daugherty, Sean C and Davidsen, Tanja M and DeBoy, Robert T and Detter, J Chris and Dodson, Robert J and Durkin, A Scott and Ganapathy, Anuradha and Gwinn-Giglio, Michelle and Han, Cliff S and Khouri, Hoda and Kiss, Hajnalka and Kothari, Sagar P and Madupu, Ramana and Nelson, Karen E and Nelson, William C and Paulsen, Ian and Penn, Kevin and Ren, Qinghu and Rosovitz, M J and Selengut, Jeremy D and Shrivastava, Susmita and Sullivan, Steven A and Tapia, Roxanne and Thompson, L Sue and Watkins, Kisha L and Yang, Qi and Yu, Chunhui and Zafar, Nikhat and Zhou, Liwei and Kuske, Cheryl R} } @article {49676, title = {The draft genome of the transgenic tropical fruit tree papaya (Carica papaya Linnaeus).}, journal = {Nature}, volume = {452}, year = {2008}, month = {2008 Apr 24}, pages = {991-6}, abstract = {

Papaya, a fruit crop cultivated in tropical and subtropical regions, is known for its nutritional benefits and medicinal applications. Here we report a 3x draft genome sequence of {\textquoteright}SunUp{\textquoteright} papaya, the first commercial virus-resistant transgenic fruit tree to be sequenced. The papaya genome is three times the size of the Arabidopsis genome, but contains fewer genes, including significantly fewer disease-resistance gene analogues. Comparison of the five sequenced genomes suggests a minimal angiosperm gene set of 13,311. A lack of recent genome duplication, atypical of other angiosperm genomes sequenced so far, may account for the smaller papaya gene number in most functional groups. Nonetheless, striking amplifications in gene number within particular functional groups suggest roles in the evolution of tree-like habit, deposition and remobilization of starch reserves, attraction of seed dispersal agents, and adaptation to tropical daylengths. Transgenesis at three locations is closely associated with chloroplast insertions into the nuclear genome, and with topoisomerase I recognition sites. Papaya offers numerous advantages as a system for fruit-tree functional genomics, and this draft genome sequence provides the foundation for revealing the basis of Carica{\textquoteright}s distinguishing morpho-physiological, medicinal and nutritional properties.

}, keywords = {Arabidopsis, Carica, Contig Mapping, Databases, Genetic, Genes, Plant, Genome, Plant, Molecular Sequence Data, Plants, Genetically Modified, sequence alignment, Sequence Analysis, DNA, Transcription Factors, Tropical Climate}, issn = {1476-4687}, doi = {10.1038/nature06856}, author = {Ming, Ray and Hou, Shaobin and Feng, Yun and Yu, Qingyi and Dionne-Laporte, Alexandre and Saw, Jimmy H and Senin, Pavel and Wang, Wei and Ly, Benjamin V and Lewis, Kanako L T and Salzberg, Steven L and Feng, Lu and Jones, Meghan R and Skelton, Rachel L and Murray, Jan E and Chen, Cuixia and Qian, Wubin and Shen, Junguo and Du, Peng and Eustice, Moriah and Tong, Eric and Tang, Haibao and Lyons, Eric and Paull, Robert E and Michael, Todd P and Wall, Kerr and Rice, Danny W and Albert, Henrik and Wang, Ming-Li and Zhu, Yun J and Schatz, Michael and Nagarajan, Niranjan and Acob, Ricelle A and Guan, Peizhu and Blas, Andrea and Wai, Ching Man and Ackerman, Christine M and Ren, Yan and Liu, Chao and Wang, Jianmei and Wang, Jianping and Na, Jong-Kuk and Shakirov, Eugene V and Haas, Brian and Thimmapuram, Jyothi and Nelson, David and Wang, Xiyin and Bowers, John E and Gschwend, Andrea R and Delcher, Arthur L and Singh, Ratnesh and Suzuki, Jon Y and Tripathi, Savarni and Neupane, Kabi and Wei, Hairong and Irikura, Beth and Paidi, Maya and Jiang, Ning and Zhang, Wenli and Presting, Gernot and Windsor, Aaron and Navajas-P{\'e}rez, Rafael and Torres, Manuel J and Feltus, F Alex and Porter, Brad and Li, Yingjun and Burroughs, A Max and Luo, Ming-Cheng and Liu, Lei and Christopher, David A and Mount, Stephen M and Moore, Paul H and Sugimura, Tak and Jiang, Jiming and Schuler, Mary A and Friedman, Vikki and Mitchell-Olds, Thomas and Shippen, Dorothy E and dePamphilis, Claude W and Palmer, Jeffrey D and Freeling, Michael and Paterson, Andrew H and Gonsalves, Dennis and Wang, Lei and Alam, Maqsudul} } @article {49677, title = {Evolution of genes and genomes on the Drosophila phylogeny.}, journal = {Nature}, volume = {450}, year = {2007}, month = {2007 Nov 8}, pages = {203-18}, abstract = {

Comparative analysis of multiple genomes in a phylogenetic framework dramatically improves the precision and sensitivity of evolutionary inference, producing more robust results than single-genome analyses can provide. The genomes of 12 Drosophila species, ten of which are presented here for the first time (sechellia, simulans, yakuba, erecta, ananassae, persimilis, willistoni, mojavensis, virilis and grimshawi), illustrate how rates and patterns of sequence divergence across taxa can illuminate evolutionary processes on a genomic scale. These genome sequences augment the formidable genetic tools that have made Drosophila melanogaster a pre-eminent model for animal genetics, and will further catalyse fundamental research on mechanisms of development, cell biology, genetics, disease, neurobiology, behaviour, physiology and evolution. Despite remarkable similarities among these Drosophila species, we identified many putatively non-neutral changes in protein-coding genes, non-coding RNA genes, and cis-regulatory regions. These may prove to underlie differences in the ecology and behaviour of these diverse species.

}, keywords = {Animals, Codon, DNA Transposable Elements, Drosophila, Drosophila Proteins, Evolution, Molecular, Gene Order, Genes, Insect, Genome, Insect, Genome, Mitochondrial, Genomics, Immunity, Multigene Family, Phylogeny, Reproduction, RNA, Untranslated, sequence alignment, Sequence Analysis, DNA, Synteny}, issn = {1476-4687}, doi = {10.1038/nature06341}, author = {Clark, Andrew G and Eisen, Michael B and Smith, Douglas R and Bergman, Casey M and Oliver, Brian and Markow, Therese A and Kaufman, Thomas C and Kellis, Manolis and Gelbart, William and Iyer, Venky N and Pollard, Daniel A and Sackton, Timothy B and Larracuente, Amanda M and Singh, Nadia D and Abad, Jose P and Abt, Dawn N and Adryan, Boris and Aguade, Montserrat and Akashi, Hiroshi and Anderson, Wyatt W and Aquadro, Charles F and Ardell, David H and Arguello, Roman and Artieri, Carlo G and Barbash, Daniel A and Barker, Daniel and Barsanti, Paolo and Batterham, Phil and Batzoglou, Serafim and Begun, Dave and Bhutkar, Arjun and Blanco, Enrico and Bosak, Stephanie A and Bradley, Robert K and Brand, Adrianne D and Brent, Michael R and Brooks, Angela N and Brown, Randall H and Butlin, Roger K and Caggese, Corrado and Calvi, Brian R and Bernardo de Carvalho, A and Caspi, Anat and Castrezana, Sergio and Celniker, Susan E and Chang, Jean L and Chapple, Charles and Chatterji, Sourav and Chinwalla, Asif and Civetta, Alberto and Clifton, Sandra W and Comeron, Josep M and Costello, James C and Coyne, Jerry A and Daub, Jennifer and David, Robert G and Delcher, Arthur L and Delehaunty, Kim and Do, Chuong B and Ebling, Heather and Edwards, Kevin and Eickbush, Thomas and Evans, Jay D and Filipski, Alan and Findeiss, Sven and Freyhult, Eva and Fulton, Lucinda and Fulton, Robert and Garcia, Ana C L and Gardiner, Anastasia and Garfield, David A and Garvin, Barry E and Gibson, Greg and Gilbert, Don and Gnerre, Sante and Godfrey, Jennifer and Good, Robert and Gotea, Valer and Gravely, Brenton and Greenberg, Anthony J and Griffiths-Jones, Sam and Gross, Samuel and Guigo, Roderic and Gustafson, Erik A and Haerty, Wilfried and Hahn, Matthew W and Halligan, Daniel L and Halpern, Aaron L and Halter, Gillian M and Han, Mira V and Heger, Andreas and Hillier, LaDeana and Hinrichs, Angie S and Holmes, Ian and Hoskins, Roger A and Hubisz, Melissa J and Hultmark, Dan and Huntley, Melanie A and Jaffe, David B and Jagadeeshan, Santosh and Jeck, William R and Johnson, Justin and Jones, Corbin D and Jordan, William C and Karpen, Gary H and Kataoka, Eiko and Keightley, Peter D and Kheradpour, Pouya and Kirkness, Ewen F and Koerich, Leonardo B and Kristiansen, Karsten and Kudrna, Dave and Kulathinal, Rob J and Kumar, Sudhir and Kwok, Roberta and Lander, Eric and Langley, Charles H and Lapoint, Richard and Lazzaro, Brian P and Lee, So-Jeong and Levesque, Lisa and Li, Ruiqiang and Lin, Chiao-Feng and Lin, Michael F and Lindblad-Toh, Kerstin and Llopart, Ana and Long, Manyuan and Low, Lloyd and Lozovsky, Elena and Lu, Jian and Luo, Meizhong and Machado, Carlos A and Makalowski, Wojciech and Marzo, Mar and Matsuda, Muneo and Matzkin, Luciano and McAllister, Bryant and McBride, Carolyn S and McKernan, Brendan and McKernan, Kevin and Mendez-Lago, Maria and Minx, Patrick and Mollenhauer, Michael U and Montooth, Kristi and Mount, Stephen M and Mu, Xu and Myers, Eugene and Negre, Barbara and Newfeld, Stuart and Nielsen, Rasmus and Noor, Mohamed A F and O{\textquoteright}Grady, Patrick and Pachter, Lior and Papaceit, Montserrat and Parisi, Matthew J and Parisi, Michael and Parts, Leopold and Pedersen, Jakob S and Pesole, Graziano and Phillippy, Adam M and Ponting, Chris P and Pop, Mihai and Porcelli, Damiano and Powell, Jeffrey R and Prohaska, Sonja and Pruitt, Kim and Puig, Marta and Quesneville, Hadi and Ram, Kristipati Ravi and Rand, David and Rasmussen, Matthew D and Reed, Laura K and Reenan, Robert and Reily, Amy and Remington, Karin A and Rieger, Tania T and Ritchie, Michael G and Robin, Charles and Rogers, Yu-Hui and Rohde, Claudia and Rozas, Julio and Rubenfield, Marc J and Ruiz, Alfredo and Russo, Susan and Salzberg, Steven L and Sanchez-Gracia, Alejandro and Saranga, David J and Sato, Hajime and Schaeffer, Stephen W and Schatz, Michael C and Schlenke, Todd and Schwartz, Russell and Segarra, Carmen and Singh, Rama S and Sirot, Laura and Sirota, Marina and Sisneros, Nicholas B and Smith, Chris D and Smith, Temple F and Spieth, John and Stage, Deborah E and Stark, Alexander and Stephan, Wolfgang and Strausberg, Robert L and Strempel, Sebastian and Sturgill, David and Sutton, Granger and Sutton, Granger G and Tao, Wei and Teichmann, Sarah and Tobari, Yoshiko N and Tomimura, Yoshihiko and Tsolas, Jason M and Valente, Vera L S and Venter, Eli and Venter, J Craig and Vicario, Saverio and Vieira, Filipe G and Vilella, Albert J and Villasante, Alfredo and Walenz, Brian and Wang, Jun and Wasserman, Marvin and Watts, Thomas and Wilson, Derek and Wilson, Richard K and Wing, Rod A and Wolfner, Mariana F and Wong, Alex and Wong, Gane Ka-Shu and Wu, Chung-I and Wu, Gabriel and Yamamoto, Daisuke and Yang, Hsiao-Pei and Yang, Shiaw-Pyng and Yorke, James A and Yoshida, Kiyohito and Zdobnov, Evgeny and Zhang, Peili and Zhang, Yu and Zimin, Aleksey V and Baldwin, Jennifer and Abdouelleil, Amr and Abdulkadir, Jamal and Abebe, Adal and Abera, Brikti and Abreu, Justin and Acer, St Christophe and Aftuck, Lynne and Alexander, Allen and An, Peter and Anderson, Erica and Anderson, Scott and Arachi, Harindra and Azer, Marc and Bachantsang, Pasang and Barry, Andrew and Bayul, Tashi and Berlin, Aaron and Bessette, Daniel and Bloom, Toby and Blye, Jason and Boguslavskiy, Leonid and Bonnet, Claude and Boukhgalter, Boris and Bourzgui, Imane and Brown, Adam and Cahill, Patrick and Channer, Sheridon and Cheshatsang, Yama and Chuda, Lisa and Citroen, Mieke and Collymore, Alville and Cooke, Patrick and Costello, Maura and D{\textquoteright}Aco, Katie and Daza, Riza and De Haan, Georgius and DeGray, Stuart and DeMaso, Christina and Dhargay, Norbu and Dooley, Kimberly and Dooley, Erin and Doricent, Missole and Dorje, Passang and Dorjee, Kunsang and Dupes, Alan and Elong, Richard and Falk, Jill and Farina, Abderrahim and Faro, Susan and Ferguson, Diallo and Fisher, Sheila and Foley, Chelsea D and Franke, Alicia and Friedrich, Dennis and Gadbois, Loryn and Gearin, Gary and Gearin, Christina R and Giannoukos, Georgia and Goode, Tina and Graham, Joseph and Grandbois, Edward and Grewal, Sharleen and Gyaltsen, Kunsang and Hafez, Nabil and Hagos, Birhane and Hall, Jennifer and Henson, Charlotte and Hollinger, Andrew and Honan, Tracey and Huard, Monika D and Hughes, Leanne and Hurhula, Brian and Husby, M Erii and Kamat, Asha and Kanga, Ben and Kashin, Seva and Khazanovich, Dmitry and Kisner, Peter and Lance, Krista and Lara, Marcia and Lee, William and Lennon, Niall and Letendre, Frances and LeVine, Rosie and Lipovsky, Alex and Liu, Xiaohong and Liu, Jinlei and Liu, Shangtao and Lokyitsang, Tashi and Lokyitsang, Yeshi and Lubonja, Rakela and Lui, Annie and MacDonald, Pen and Magnisalis, Vasilia and Maru, Kebede and Matthews, Charles and McCusker, William and McDonough, Susan and Mehta, Teena and Meldrim, James and Meneus, Louis and Mihai, Oana and Mihalev, Atanas and Mihova, Tanya and Mittelman, Rachel and Mlenga, Valentine and Montmayeur, Anna and Mulrain, Leonidas and Navidi, Adam and Naylor, Jerome and Negash, Tamrat and Nguyen, Thu and Nguyen, Nga and Nicol, Robert and Norbu, Choe and Norbu, Nyima and Novod, Nathaniel and O{\textquoteright}Neill, Barry and Osman, Sahal and Markiewicz, Eva and Oyono, Otero L and Patti, Christopher and Phunkhang, Pema and Pierre, Fritz and Priest, Margaret and Raghuraman, Sujaa and Rege, Filip and Reyes, Rebecca and Rise, Cecil and Rogov, Peter and Ross, Keenan and Ryan, Elizabeth and Settipalli, Sampath and Shea, Terry and Sherpa, Ngawang and Shi, Lu and Shih, Diana and Sparrow, Todd and Spaulding, Jessica and Stalker, John and Stange-Thomann, Nicole and Stavropoulos, Sharon and Stone, Catherine and Strader, Christopher and Tesfaye, Senait and Thomson, Talene and Thoulutsang, Yama and Thoulutsang, Dawa and Topham, Kerri and Topping, Ira and Tsamla, Tsamla and Vassiliev, Helen and Vo, Andy and Wangchuk, Tsering and Wangdi, Tsering and Weiand, Michael and Wilkinson, Jane and Wilson, Adam and Yadav, Shailendra and Young, Geneva and Yu, Qing and Zembek, Lisa and Zhong, Danni and Zimmer, Andrew and Zwirko, Zac and Jaffe, David B and Alvarez, Pablo and Brockman, Will and Butler, Jonathan and Chin, CheeWhye and Gnerre, Sante and Grabherr, Manfred and Kleber, Michael and Mauceli, Evan and MacCallum, Iain} } @article {49782, title = {Genome sequence and identification of candidate vaccine antigens from the animal pathogen Dichelobacter nodosus.}, journal = {Nat Biotechnol}, volume = {25}, year = {2007}, month = {2007 May}, pages = {569-75}, abstract = {

Dichelobacter nodosus causes ovine footrot, a disease that leads to severe economic losses in the wool and meat industries. We sequenced its 1.4-Mb genome, the smallest known genome of an anaerobe. It differs markedly from small genomes of intracellular bacteria, retaining greater biosynthetic capabilities and lacking any evidence of extensive ongoing genome reduction. Comparative genomic microarray studies and bioinformatic analysis suggested that, despite its small size, almost 20\% of the genome is derived from lateral gene transfer. Most of these regions seem to be associated with virulence. Metabolic reconstruction indicated unsuspected capabilities, including carbohydrate utilization, electron transfer and several aerobic pathways. Global transcriptional profiling and bioinformatic analysis enabled the prediction of virulence factors and cell surface proteins. Screening of these proteins against ovine antisera identified eight immunogenic proteins that are candidate antigens for a cross-protective vaccine.

}, keywords = {Animals, Antigens, Chromosome mapping, Dichelobacter nodosus, Foot Rot, Genome, Bacterial, Sequence Analysis, DNA}, issn = {1087-0156}, doi = {10.1038/nbt1302}, author = {Myers, Garry S A and Parker, Dane and Al-Hasani, Keith and Kennan, Ruth M and Seemann, Torsten and Ren, Qinghu and Badger, Jonathan H and Selengut, Jeremy D and DeBoy, Robert T and Tettelin, Herv{\'e} and Boyce, John D and McCarl, Victoria P and Han, Xiaoyan and Nelson, William C and Madupu, Ramana and Mohamoud, Yasmin and Holley, Tara and Fedorova, Nadia and Khouri, Hoda and Bottomley, Steven P and Whittington, Richard J and Adler, Ben and Songer, J Glenn and Rood, Julian I and Paulsen, Ian T} } @article {38296, title = {Genome sequence and identification of candidate vaccine antigens from the animal pathogen Dichelobacter nodosus}, journal = {Nature biotechnologyNature biotechnology}, volume = {25}, year = {2007}, note = {http://www.ncbi.nlm.nih.gov/pubmed/17468768?dopt=Abstract}, type = {10.1038/nbt1302}, abstract = {Dichelobacter nodosus causes ovine footrot, a disease that leads to severe economic losses in the wool and meat industries. We sequenced its 1.4-Mb genome, the smallest known genome of an anaerobe. It differs markedly from small genomes of intracellular bacteria, retaining greater biosynthetic capabilities and lacking any evidence of extensive ongoing genome reduction. Comparative genomic microarray studies and bioinformatic analysis suggested that, despite its small size, almost 20\% of the genome is derived from lateral gene transfer. Most of these regions seem to be associated with virulence. Metabolic reconstruction indicated unsuspected capabilities, including carbohydrate utilization, electron transfer and several aerobic pathways. Global transcriptional profiling and bioinformatic analysis enabled the prediction of virulence factors and cell surface proteins. Screening of these proteins against ovine antisera identified eight immunogenic proteins that are candidate antigens for a cross-protective vaccine.}, keywords = {Animals, Antigens, Chromosome mapping, Dichelobacter nodosus, Foot Rot, Genome, Bacterial, Sequence Analysis, DNA}, author = {Myers, Garry S. A. and Parker, Dane and Al-Hasani, Keith and Kennan, Ruth M. and Seemann, Torsten and Ren, Qinghu and Badger, Jonathan H. and J. Selengut and DeBoy, Robert T. and Tettelin, Herv{\'e} and Boyce, John D. and McCarl, Victoria P. and Han, Xiaoyan and Nelson, William C. and Madupu, Ramana and Mohamoud, Yasmin and Holley, Tara and Fedorova, Nadia and Khouri, Hoda and Bottomley, Steven P. and Whittington, Richard J. and Adler, Ben and Songer, J. Glenn and Rood, Julian I. and Paulsen, Ian T.} } @article {49679, title = {SplicePort--an interactive splice-site analysis tool.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jul}, pages = {W285-91}, abstract = {

SplicePort is a web-based tool for splice-site analysis that allows the user to make splice-site predictions for submitted sequences. In addition, the user can also browse the rich catalog of features that underlies these predictions, and which we have found capable of providing high classification accuracy on human splice sites. Feature selection is optimized for human splice sites, but the selected features are likely to be predictive for other mammals as well. With our interactive feature browsing and visualization tool, the user can view and explore subsets of features used in splice-site prediction (either the features that account for the classification of a specific input sequence or the complete collection of features). Selected feature sets can be searched, ranked or displayed easily. The user can group features into clusters and frequency plot WebLogos can be generated for each cluster. The user can browse the identified clusters and their contributing elements, looking for new interesting signals, or can validate previously observed signals. The SplicePort web server can be accessed at http://www.cs.umd.edu/projects/SplicePort and http://www.spliceport.org.

}, keywords = {Base Sequence, Chromosome mapping, Computational Biology, Computer simulation, DNA, Genome, HUMANS, Internet, Models, Genetic, Molecular Sequence Data, Pattern Recognition, Automated, RNA Splice Sites, sequence alignment, Sequence Analysis, DNA, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkm407}, author = {Dogan, Rezarta Islamaj and Getoor, Lise and Wilbur, W John and Mount, Stephen M} } @article {49641, title = {Analysis of fat body transcriptome from the adult tsetse fly, Glossina morsitans morsitans.}, journal = {Insect Mol Biol}, volume = {15}, year = {2006}, month = {2006 Aug}, pages = {411-24}, abstract = {

Tsetse flies (Diptera: Glossinidia) are vectors of pathogenic African trypanosomes. To develop a foundation for tsetse physiology, a normalized expressed sequence tag (EST) library was constructed from fat body tissue of immune-stimulated Glossina morsitans morsitans. Analysis of 20,257 high-quality ESTs yielded 6372 unique genes comprised of 3059 tentative consensus (TC) sequences and 3313 singletons (available at http://aksoylab.yale.edu). We analysed the putative fat body transcriptome based on homology to other gene products with known functions available in the public domain. In particular, we describe the immune-related products, reproductive function related yolk proteins and milk-gland protein, iron metabolism regulating ferritins and transferrin, and tsetse{\textquoteright}s major energy source proline biosynthesis. Expression analysis of the three yolk proteins indicates that all are detected in females, while only the yolk protein with similarity to lipases, is expressed in males. Milk gland protein, apparently important for larval nutrition, however, is primarily synthesized by accessory milk gland tissue.

}, keywords = {Adipose Tissue, Animals, Base Sequence, Computational Biology, DNA Primers, Egg Proteins, Expressed Sequence Tags, Female, Gene Expression Profiling, Insect Vectors, Male, Molecular Sequence Data, Reverse Transcriptase Polymerase Chain Reaction, Sequence Analysis, DNA, Sex Factors, Tsetse Flies}, issn = {0962-1075}, doi = {10.1111/j.1365-2583.2006.00649.x}, author = {Attardo, G M and Strickler-Dinglasan, P and Perkin, S A H and Caler, E and Bonaldo, M F and Soares, M B and El-Sayeed, N and Aksoy, S} } @article {38159, title = {Comparative genomic evidence for a close relationship between the dimorphic prosthecate bacteria Hyphomonas neptunium and Caulobacter crescentus}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {188}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16980487?dopt=Abstract}, type = {10.1128/JB.00111-06}, abstract = {The dimorphic prosthecate bacteria (DPB) are alpha-proteobacteria that reproduce in an asymmetric manner rather than by binary fission and are of interest as simple models of development. Prior to this work, the only member of this group for which genome sequence was available was the model freshwater organism Caulobacter crescentus. Here we describe the genome sequence of Hyphomonas neptunium, a marine member of the DPB that differs from C. crescentus in that H. neptunium uses its stalk as a reproductive structure. Genome analysis indicates that this organism shares more genes with C. crescentus than it does with Silicibacter pomeroyi (a closer relative according to 16S rRNA phylogeny), that it relies upon a heterotrophic strategy utilizing a wide range of substrates, that its cell cycle is likely to be regulated in a similar manner to that of C. crescentus, and that the outer membrane complements of H. neptunium and C. crescentus are remarkably similar. H. neptunium swarmer cells are highly motile via a single polar flagellum. With the exception of cheY and cheR, genes required for chemotaxis were absent in the H. neptunium genome. Consistent with this observation, H. neptunium swarmer cells did not respond to any chemotactic stimuli that were tested, which suggests that H. neptunium motility is a random dispersal mechanism for swarmer cells rather than a stimulus-controlled navigation system for locating specific environments. In addition to providing insights into bacterial development, the H. neptunium genome will provide an important resource for the study of other interesting biological processes including chromosome segregation, polar growth, and cell aging.}, keywords = {Alphaproteobacteria, Bacterial Outer Membrane Proteins, Caulobacter crescentus, cell cycle, Chemotaxis, DNA, Bacterial, Flagella, Genome, Bacterial, Microbial Viability, Molecular Sequence Data, Movement, Sequence Analysis, DNA, Sequence Homology, signal transduction}, author = {Badger, Jonathan H. and Hoover, Timothy R. and Brun, Yves V. and Weiner, Ronald M. and Laub, Michael T. and Alexandre, Gladys and Mr{\'a}zek, Jan and Ren, Qinghu and Paulsen, Ian T. and Nelson, Karen E. and Khouri, Hoda M. and Radune, Diana and Sosa, Julia and Dodson, Robert J. and Sullivan, Steven A. and Rosovitz, M. J. and Madupu, Ramana and Brinkac, Lauren M. and Durkin, A. Scott and Daugherty, Sean C. and Kothari, Sagar P. and Giglio, Michelle Gwinn and Zhou, Liwei and Haft, Daniel H. and J. Selengut and Davidsen, Tanja M. and Yang, Qi and Zafar, Nikhat and Ward, Naomi L.} } @article {38287, title = {Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial "pan-genome"}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {102}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16172379?dopt=Abstract}, type = {10.1073/pnas.0506758102}, abstract = {The development of efficient and inexpensive genome sequencing methods has revolutionized the study of human bacterial pathogens and improved vaccine design. Unfortunately, the sequence of a single genome does not reflect how genetic variability drives pathogenesis within a bacterial species and also limits genome-wide screens for vaccine candidates or for antimicrobial targets. We have generated the genomic sequence of six strains representing the five major disease-causing serotypes of Streptococcus agalactiae, the main cause of neonatal infection in humans. Analysis of these genomes and those available in databases showed that the S. agalactiae species can be described by a pan-genome consisting of a core genome shared by all isolates, accounting for approximately 80\% of any single genome, plus a dispensable genome consisting of partially shared and strain-specific genes. Mathematical extrapolation of the data suggests that the gene reservoir available for inclusion in the S. agalactiae pan-genome is vast and that unique genes will continue to be identified even after sequencing hundreds of genomes.}, keywords = {Amino Acid Sequence, Bacterial Capsules, Base Sequence, Gene expression, Genes, Bacterial, Genetic Variation, Genome, Bacterial, Molecular Sequence Data, Phylogeny, sequence alignment, Sequence Analysis, DNA, Streptococcus agalactiae, virulence}, author = {Tettelin, Herv{\'e} and Masignani, Vega and Cieslewicz, Michael J. and Donati, Claudio and Medini, Duccio and Ward, Naomi L. and Angiuoli, Samuel V. and Crabtree, Jonathan and Jones, Amanda L. and Durkin, A. Scott and DeBoy, Robert T. and Davidsen, Tanja M. and Mora, Marirosa and Scarselli, Maria and Margarit y Ros, Immaculada and Peterson, Jeremy D. and Hauser, Christopher R. and Sundaram, Jaideep P. and Nelson, William C. and Madupu, Ramana and Brinkac, Lauren M. and Dodson, Robert J. and Rosovitz, Mary J. and Sullivan, Steven A. and Daugherty, Sean C. and Haft, Daniel H. and J. Selengut and Gwinn, Michelle L. and Zhou, Liwei and Zafar, Nikhat and Khouri, Hoda and Radune, Diana and Dimitrov, George and Watkins, Kisha and O{\textquoteright}Connor, Kevin J. B. and Smith, Shannon and Utterback, Teresa R. and White, Owen and Rubens, Craig E. and Grandi, Guido and Madoff, Lawrence C. and Kasper, Dennis L. and Telford, John L. and Wessels, Michael R. and Rappuoli, Rino and Fraser, Claire M.} } @article {49633, title = {The sequence and analysis of Trypanosoma brucei chromosome II.}, journal = {Nucleic Acids Res}, volume = {31}, year = {2003}, month = {2003 Aug 15}, pages = {4856-63}, abstract = {

We report here the sequence of chromosome II from Trypanosoma brucei, the causative agent of African sleeping sickness. The 1.2-Mb pairs encode about 470 predicted genes organised in 17 directional clusters on either strand, the largest cluster of which has 92 genes lined up over a 284-kb region. An analysis of the GC skew reveals strand compositional asymmetries that coincide with the distribution of protein-coding genes, suggesting these asymmetries may be the result of transcription-coupled repair on coding versus non-coding strand. A 5-cM genetic map of the chromosome reveals recombinational {\textquoteright}hot{\textquoteright} and {\textquoteright}cold{\textquoteright} regions, the latter of which is predicted to include the putative centromere. One end of the chromosome consists of a 250-kb region almost exclusively composed of RHS (pseudo)genes that belong to a newly characterised multigene family containing a hot spot of insertion for retroelements. Interspersed with the RHS genes are a few copies of truncated RNA polymerase pseudogenes as well as expression site associated (pseudo)genes (ESAGs) 3 and 4, and 76 bp repeats. These features are reminiscent of a vestigial variant surface glycoprotein (VSG) gene expression site. The other end of the chromosome contains a 30-kb array of VSG genes, the majority of which are pseudogenes, suggesting that this region may be a site for modular de novo construction of VSG gene diversity during transposition/gene conversion events.

}, keywords = {Animals, Antigens, Protozoan, Chromosome mapping, Chromosomes, DNA, Protozoan, Gene Duplication, Genes, Protozoan, Molecular Sequence Data, Pseudogenes, Recombination, Genetic, Sequence Analysis, DNA, Trypanosoma brucei brucei}, issn = {1362-4962}, author = {el-Sayed, Najib M A and Ghedin, Elodie and Song, Jinming and MacLeod, Annette and Bringaud, Frederic and Larkin, Christopher and Wanless, David and Peterson, Jeremy and Hou, Lihua and Taylor, Sonya and Tweedie, Alison and Biteau, Nicolas and Khalak, Hanif G and Lin, Xiaoying and Mason, Tanya and Hannick, Linda and Caler, Elisabet and Blandin, Ga{\"e}lle and Bartholomeu, Daniella and Simpson, Anjana J and Kaul, Samir and Zhao, Hong and Pai, Grace and Van Aken, Susan and Utterback, Teresa and Haas, Brian and Koo, Hean L and Umayam, Lowell and Suh, Bernard and Gerrard, Caroline and Leech, Vanessa and Qi, Rong and Zhou, Shiguo and Schwartz, David and Feldblyum, Tamara and Salzberg, Steven and Tait, Andrew and Turner, C Michael R and Ullu, Elisabetta and White, Owen and Melville, Sara and Adams, Mark D and Fraser, Claire M and Donelson, John E} } @article {49687, title = {The draft genome of Ciona intestinalis: insights into chordate and vertebrate origins.}, journal = {Science}, volume = {298}, year = {2002}, month = {2002 Dec 13}, pages = {2157-67}, abstract = {

The first chordates appear in the fossil record at the time of the Cambrian explosion, nearly 550 million years ago. The modern ascidian tadpole represents a plausible approximation to these ancestral chordates. To illuminate the origins of chordate and vertebrates, we generated a draft of the protein-coding portion of the genome of the most studied ascidian, Ciona intestinalis. The Ciona genome contains approximately 16,000 protein-coding genes, similar to the number in other invertebrates, but only half that found in vertebrates. Vertebrate gene families are typically found in simplified form in Ciona, suggesting that ascidians contain the basic ancestral complement of genes involved in cell signaling and development. The ascidian genome has also acquired a number of lineage-specific innovations, including a group of genes engaged in cellulose metabolism that are related to those in bacteria and fungi.

}, keywords = {Alleles, Animals, Apoptosis, Base Sequence, Cellulose, Central Nervous System, Ciona intestinalis, Computational Biology, Endocrine System, Gene Dosage, Gene Duplication, genes, Genes, Homeobox, Genome, Heart, Immunity, Molecular Sequence Data, Multigene Family, Muscle Proteins, Organizers, Embryonic, Phylogeny, Polymorphism, Genetic, Proteins, Sequence Analysis, DNA, Sequence Homology, Nucleic Acid, Species Specificity, Thyroid Gland, Urochordata, Vertebrates}, issn = {1095-9203}, doi = {10.1126/science.1080049}, author = {Dehal, Paramvir and Satou, Yutaka and Campbell, Robert K and Chapman, Jarrod and Degnan, Bernard and De Tomaso, Anthony and Davidson, Brad and Di Gregorio, Anna and Gelpke, Maarten and Goodstein, David M and Harafuji, Naoe and Hastings, Kenneth E M and Ho, Isaac and Hotta, Kohji and Huang, Wayne and Kawashima, Takeshi and Lemaire, Patrick and Martinez, Diego and Meinertzhagen, Ian A and Necula, Simona and Nonaka, Masaru and Putnam, Nik and Rash, Sam and Saiga, Hidetoshi and Satake, Masanobu and Terry, Astrid and Yamada, Lixy and Wang, Hong-Gang and Awazu, Satoko and Azumi, Kaoru and Boore, Jeffrey and Branno, Margherita and Chin-Bow, Stephen and DeSantis, Rosaria and Doyle, Sharon and Francino, Pilar and Keys, David N and Haga, Shinobu and Hayashi, Hiroko and Hino, Kyosuke and Imai, Kaoru S and Inaba, Kazuo and Kano, Shungo and Kobayashi, Kenji and Kobayashi, Mari and Lee, Byung-In and Makabe, Kazuhiro W and Manohar, Chitra and Matassi, Giorgio and Medina, Monica and Mochizuki, Yasuaki and Mount, Steve and Morishita, Tomomi and Miura, Sachiko and Nakayama, Akie and Nishizaka, Satoko and Nomoto, Hisayo and Ohta, Fumiko and Oishi, Kazuko and Rigoutsos, Isidore and Sano, Masako and Sasaki, Akane and Sasakura, Yasunori and Shoguchi, Eiichi and Shin-i, Tadasu and Spagnuolo, Antoinetta and Stainier, Didier and Suzuki, Miho M and Tassy, Olivier and Takatori, Naohito and Tokuoka, Miki and Yagi, Kasumi and Yoshizaki, Fumiko and Wada, Shuichi and Zhang, Cindy and Hyatt, P Douglas and Larimer, Frank and Detter, Chris and Doggett, Norman and Glavina, Tijana and Hawkins, Trevor and Richardson, Paul and Lucas, Susan and Kohara, Yuji and Levine, Michael and Satoh, Nori and Rokhsar, Daniel S} } @article {38304, title = {Genome sequence of the human malaria parasite Plasmodium falciparum}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368864?dopt=Abstract}, type = {10.1038/nature01097}, abstract = {The parasite Plasmodium falciparum is responsible for hundreds of millions of cases of malaria, and kills more than one million African children annually. Here we report an analysis of the genome sequence of P. falciparum clone 3D7. The 23-megabase nuclear genome consists of 14 chromosomes, encodes about 5,300 genes, and is the most (A + T)-rich genome sequenced to date. Genes involved in antigenic variation are concentrated in the subtelomeric regions of the chromosomes. Compared to the genomes of free-living eukaryotic microbes, the genome of this intracellular parasite encodes fewer enzymes and transporters, but a large proportion of genes are devoted to immune evasion and host-parasite interactions. Many nuclear-encoded proteins are targeted to the apicoplast, an organelle involved in fatty-acid and isoprenoid metabolism. The genome sequence provides the foundation for future studies of this organism, and is being exploited in the search for new drugs and vaccines to fight malaria.}, keywords = {Animals, Chromosome Structures, DNA Repair, DNA Replication, DNA, Protozoan, Evolution, Molecular, Genome, Protozoan, HUMANS, Malaria Vaccines, Malaria, Falciparum, Membrane Transport Proteins, Molecular Sequence Data, Plasmodium falciparum, Plastids, Proteome, Protozoan Proteins, Recombination, Genetic, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Hall, Neil and Fung, Eula and White, Owen and Berriman, Matthew and Hyman, Richard W. and Carlton, Jane M. and Pain, Arnab and Nelson, Karen E. and Bowman, Sharen and Paulsen, Ian T. and James, Keith and Eisen, Jonathan A. and Rutherford, Kim and Salzberg, Steven L. and Craig, Alister and Kyes, Sue and Chan, Man-Suen and Nene, Vishvanath and Shallom, Shamira J. and Suh, Bernard and Peterson, Jeremy and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and Haft, Daniel and Mather, Michael W. and Vaidya, Akhil B. and Martin, David M. A. and Fairlamb, Alan H. and Fraunholz, Martin J. and Roos, David S. and Ralph, Stuart A. and McFadden, Geoffrey I. and Cummings, Leda M. and Subramanian, G. Mani and Mungall, Chris and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Newbold, Chris and Davis, Ronald W. and Fraser, Claire M. and Barrell, Bart} } @article {38492, title = {Sequence of Plasmodium falciparum chromosomes 2, 10, 11 and 14}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368868?dopt=Abstract}, type = {10.1038/nature01094}, abstract = {The mosquito-borne malaria parasite Plasmodium falciparum kills an estimated 0.7-2.7 million people every year, primarily children in sub-Saharan Africa. Without effective interventions, a variety of factors-including the spread of parasites resistant to antimalarial drugs and the increasing insecticide resistance of mosquitoes-may cause the number of malaria cases to double over the next two decades. To stimulate basic research and facilitate the development of new drugs and vaccines, the genome of Plasmodium falciparum clone 3D7 has been sequenced using a chromosome-by-chromosome shotgun strategy. We report here the nucleotide sequences of chromosomes 10, 11 and 14, and a re-analysis of the chromosome 2 sequence. These chromosomes represent about 35\% of the 23-megabase P. falciparum genome.}, keywords = {Animals, Chromosomes, DNA, Protozoan, Genome, Protozoan, Plasmodium falciparum, Proteome, Protozoan Proteins, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Shallom, Shamira J. and Carlton, Jane M. and Salzberg, Steven L. and Nene, Vishvanath and Shoaibi, Azadeh and Ciecko, Anne and Lynn, Jeffery and Rizzo, Michael and Weaver, Bruce and Jarrahi, Behnam and Brenner, Michael and Parvizi, Babak and Tallon, Luke and Moazzez, Azita and Granger, David and Fujii, Claire and Hansen, Cheryl and Pederson, James and Feldblyum, Tamara and Peterson, Jeremy and Suh, Bernard and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and White, Owen and Cummings, Leda M. and Smith, Hamilton O. and Adams, Mark D. and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Fraser, Claire M.} } @article {49692, title = {The genome sequence of Drosophila melanogaster.}, journal = {Science}, volume = {287}, year = {2000}, month = {2000 Mar 24}, pages = {2185-95}, abstract = {

The fly Drosophila melanogaster is one of the most intensively studied organisms in biology and serves as a model system for the investigation of many developmental and cellular processes common to higher eukaryotes, including humans. We have determined the nucleotide sequence of nearly all of the approximately 120-megabase euchromatic portion of the Drosophila genome using a whole-genome shotgun sequencing strategy supported by extensive clone-based sequence and a high-quality bacterial artificial chromosome physical map. Efforts are under way to close the remaining gaps; however, the sequence is of sufficient accuracy and contiguity to be declared substantially complete and to support an initial analysis of genome structure and preliminary gene annotation and interpretation. The genome encodes approximately 13,600 genes, somewhat fewer than the smaller Caenorhabditis elegans genome, but with comparable functional diversity.

}, keywords = {Animals, Biological Transport, Chromatin, Cloning, Molecular, Computational Biology, Contig Mapping, Cytochrome P-450 Enzyme System, DNA Repair, DNA Replication, Drosophila melanogaster, Euchromatin, Gene Library, Genes, Insect, Genome, Heterochromatin, Insect Proteins, Nuclear Proteins, Protein Biosynthesis, Sequence Analysis, DNA, Transcription, Genetic}, issn = {0036-8075}, author = {Adams, M D and Celniker, S E and Holt, R A and Evans, C A and Gocayne, J D and Amanatides, P G and Scherer, S E and Li, P W and Hoskins, R A and Galle, R F and George, R A and Lewis, S E and Richards, S and Ashburner, M and Henderson, S N and Sutton, G G and Wortman, J R and Yandell, M D and Zhang, Q and Chen, L X and Brandon, R C and Rogers, Y H and Blazej, R G and Champe, M and Pfeiffer, B D and Wan, K H and Doyle, C and Baxter, E G and Helt, G and Nelson, C R and Gabor, G L and Abril, J F and Agbayani, A and An, H J and Andrews-Pfannkoch, C and Baldwin, D and Ballew, R M and Basu, A and Baxendale, J and Bayraktaroglu, L and Beasley, E M and Beeson, K Y and Benos, P V and Berman, B P and Bhandari, D and Bolshakov, S and Borkova, D and Botchan, M R and Bouck, J and Brokstein, P and Brottier, P and Burtis, K C and Busam, D A and Butler, H and Cadieu, E and Center, A and Chandra, I and Cherry, J M and Cawley, S and Dahlke, C and Davenport, L B and Davies, P and de Pablos, B and Delcher, A and Deng, Z and Mays, A D and Dew, I and Dietz, S M and Dodson, K and Doup, L E and Downes, M and Dugan-Rocha, S and Dunkov, B C and Dunn, P and Durbin, K J and Evangelista, C C and Ferraz, C and Ferriera, S and Fleischmann, W and Fosler, C and Gabrielian, A E and Garg, N S and Gelbart, W M and Glasser, K and Glodek, A and Gong, F and Gorrell, J H and Gu, Z and Guan, P and Harris, M and Harris, N L and Harvey, D and Heiman, T J and Hernandez, J R and Houck, J and Hostin, D and Houston, K A and Howland, T J and Wei, M H and Ibegwam, C and Jalali, M and Kalush, F and Karpen, G H and Ke, Z and Kennison, J A and Ketchum, K A and Kimmel, B E and Kodira, C D and Kraft, C and Kravitz, S and Kulp, D and Lai, Z and Lasko, P and Lei, Y and Levitsky, A A and Li, J and Li, Z and Liang, Y and Lin, X and Liu, X and Mattei, B and McIntosh, T C and McLeod, M P and McPherson, D and Merkulov, G and Milshina, N V and Mobarry, C and Morris, J and Moshrefi, A and Mount, S M and Moy, M and Murphy, B and Murphy, L and Muzny, D M and Nelson, D L and Nelson, D R and Nelson, K A and Nixon, K and Nusskern, D R and Pacleb, J M and Palazzolo, M and Pittman, G S and Pan, S and Pollard, J and Puri, V and Reese, M G and Reinert, K and Remington, K and Saunders, R D and Scheeler, F and Shen, H and Shue, B C and Sid{\'e}n-Kiamos, I and Simpson, M and Skupski, M P and Smith, T and Spier, E and Spradling, A C and Stapleton, M and Strong, R and Sun, E and Svirskas, R and Tector, C and Turner, R and Venter, E and Wang, A H and Wang, X and Wang, Z Y and Wassarman, D A and Weinstock, G M and Weissenbach, J and Williams, S M and Worley, K C and Wu, D and Yang, S and Yao, Q A and Ye, J and Yeh, R F and Zaveri, J S and Zhan, M and Zhang, G and Zhao, Q and Zheng, L and Zheng, X H and Zhong, F N and Zhong, W and Zhou, X and Zhu, S and Zhu, X and Smith, H O and Gibbs, R A and Myers, E W and Rubin, G M and Venter, J C} }