@article {49865, title = {Draft Genome Sequences from a Novel Clade of Bacillus cereus sensu lato Strains Isolated from the International Space Station}, volume = {1}, year = {2017}, pages = {1}, author = {Kasthuri Venkateswaran and Aleksandra Checinska-Sielaff and Joy Klubnik and Todd Treangen and M.J. Rosovitz and Nicholas H. Bergman} } @article {49839, title = {Sequestration of nematocysts by divergent cnidarian predators: mechanism, function, and evolution}, journal = {Invertebrate Biology}, volume = {136}, year = {2017}, month = {Jan-03-2017}, pages = {75 - 91}, doi = {10.1111/ivb.2017.136.issue-110.1111/ivb.12154}, url = {http://doi.wiley.com/10.1111/ivb.2017.136.issue-1http://doi.wiley.com/10.1111/ivb.12154http://onlinelibrary.wiley.com/wol1/doi/10.1111/ivb.12154/fullpdfhttps://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1111\%2Fivb.12154}, author = {Goodheart, Jessica and Bely, Alexandra E.} } @article {49756, title = {Distinct genomic and epigenomic features demarcate hypomethylated blocks in colon cancer}, journal = {BMC Cancer}, volume = {16447943582141728452710921541113181321912}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s12885-016-2128-1}, url = {http://www.biomedcentral.com/1471-2407/16/88http://link.springer.com/content/pdf/10.1186/s12885-016-2128-1}, author = {Sharmin, Mahfuza and Bravo, {\'e}ctor Corrada and Hannenhalli, Sridhar} } @article {49810, title = {Diversity in a Polymicrobial Community Revealed by Analysis of Viromes, Endolysins and CRISPR Spacers.}, journal = {PLoS One}, volume = {11}, year = {2016}, month = {2016}, pages = {e0160574}, abstract = {

The polymicrobial biofilm communities in Mushroom and Octopus Spring in Yellowstone National Park (YNP) are well characterized, yet little is known about the phage populations. Dominant species, Synechococcus sp. JA-2-3B{\textquoteright}a(2-13), Synechococcus sp. JA-3-3Ab, Chloroflexus sp. Y-400-fl, and Roseiflexus sp. RS-1, contain multiple CRISPR-Cas arrays, suggesting complex interactions with phage predators. To analyze phage populations from Octopus Spring biofilms, we sequenced a viral enriched fraction. To assemble and analyze phage metagenomic data, we developed a custom module, VIRITAS, implemented within the MetAMOS framework. This module bins contigs into groups based on tetranucleotide frequencies and CRISPR spacer-protospacer matching and ORF calling. Using this pipeline we were able to assemble phage sequences into contigs and bin them into three clusters that corroborated with their potential host range. The virome contained 52,348 predicted ORFs; some were clearly phage-like; 9319 ORFs had a recognizable Pfam domain while the rest were hypothetical. Of the recognized domains with CRISPR spacer matches, was the phage endolysin used by lytic phage to disrupt cells. Analysis of the endolysins present in the thermophilic cyanophage contigs revealed a subset of characterized endolysins as well as a Glyco_hydro_108 (PF05838) domain not previously associated with sequenced cyanophages. A search for CRISPR spacer matches to all identified phage endolysins demonstrated that a majority of endolysin domains were targets. This strategy provides a general way to link host and phage as endolysins are known to be widely distributed in bacteriophage. Endolysins can also provide information about host cell wall composition and have the additional potential to be used as targets for novel therapeutics.

}, issn = {1932-6203}, doi = {10.1371/journal.pone.0160574}, author = {Davison, Michelle and Todd Treangen and Koren, Sergey and Pop, Mihai and Bhaya, Devaki} } @article {49827, title = {Dual Transcriptome Profiling of Leishmania-Infected Human Macrophages Reveals Distinct Reprogramming Signatures}, journal = {mBio}, volume = {7}, year = {2016}, month = {Jun-07-2016}, pages = {e00027-16}, doi = {10.1128/mBio.00027-16}, url = {http://mbio.asm.org/lookup/doi/10.1128/mBio.00027-16https://syndication.highwire.org/content/doi/10.1128/mBio.00027-16}, author = {Fernandes, Maria Cecilia and Dillon, Laura A. L. and Belew, Ashton Trey and Bravo, H{\'e}ctor Corrada and Mosser, David M. and El-Sayed, Najib M.} } @article {49668, title = {The fruRBA operon is necessary for Group A Streptococcal growth in fructose and for resistance to neutrophil killing during growth in whole human blood.}, journal = {Infect Immun}, year = {2016}, month = {2016 Jan 19}, abstract = {

Bacterial pathogens rely on the availability of nutrients for survival in the host environment. The phosphoenolpyruvate-phosphotransferase system (PTS) is a global regulatory network connecting sugar uptake with signal transduction. Since the fructose PTS has been shown to impact virulence in several Streptococci, including the human pathogen S. pyogenes (the group A Streptococcus, GAS), we characterized its role in carbon metabolism and pathogenesis in the M1T1 strain 5448. Growth in fructose as a sole carbon source resulted in 103 genes affected transcriptionally, where the fru locus (fruRBA) was the most induced. RT-PCR showed that fruRBA formed an operon, which was repressed by FruR in the absence of fructose, in addition to being under carbon catabolic repression. Growth assays and carbon utilization profiles revealed that although the entire fru operon was required for growth in fructose, FruA was the main transporter for fructose and was also involved in the utilization of three additional PTS sugars: cellobiose, mannitol, and N-acetyl-D-galactosamine. Inactivation of sloR, a fruA homolog that was also up regulated in presence of fructose, failed to reveal a role as a secondary fructose transporter. Whereas the ability of both ΔfruR and ΔfruB mutants to survive in the presence of whole human blood or neutrophils was impaired, the phenotype was not reproduced in murine whole blood, nor were those mutants attenuated in a mouse intraperitoneal infection. Since the ΔfruA mutant exhibited no phenotype in the human or mouse assays, we propose that FruR and FruB are important for GAS survival in a human-specific environment.

}, issn = {1098-5522}, doi = {10.1128/IAI.01296-15}, author = {Valdes, Kayla M and Sundar, Ganesh S and Vega, Luis A and Belew, Ashton T and Islam, Emrul and Binet, Rachel and El-Sayed, Najib M and Le Breton, Yoann and McIver, Kevin S} } @article {49801, title = {Heterogeneity of transcription factor binding specificity models within and across cell lines}, journal = {Genome Research}, year = {2016}, month = {Apr-06-2017}, pages = {gr.199166.115}, issn = {1088-9051}, doi = {10.1101/gr.199166.115}, url = {http://genome.cshlp.org/lookup/doi/10.1101/gr.199166.115}, author = {Sharmin, Mahfuza and Bravo, {\'e}ctor Corrada and Hannenhalli, Sridhar} } @article {49864, title = {Identification and genomic analysis of a novel group C orthobunyavirus isolated from a mosquito captured near Iquitos, Peru}, journal = {PLoS Negl Trop Dis}, volume = {10}, year = {2016}, pages = {e0004440}, author = {Todd Treangen and Schoeler, George and Phillippy, Adam M and Bergman, Nicholas H and Turell, Michael J} } @article {49840, title = {Identification guide to the heterobranch sea slugs (Mollusca: Gastropoda) from Bocas del Toro, Panama}, journal = {Marine Biodiversity Records}, volume = {96737453830254034557880541418411912544728739317415779780725696418782226404216145163412560451520488424050829677}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s41200-016-0048-z}, url = {http://mbr.biomedcentral.com/articles/10.1186/s41200-016-0048-zhttp://link.springer.com/content/pdf/10.1186/s41200-016-0048-z}, author = {Goodheart, Jessica and Ellingson, Ryan A. and Vital, Xochitl G. and {\~a}o Filho, Hilton C. and McCarthy, Jennifer B. and Medrano, Sabrina M. and Bhave, Vishal J. and {\'\i}a-M{\'e}ndez, Kimberly and {\'e}nez, Lina M. and {\'o}pez, Gina and Hoover, Craig A. and Awbrey, Jaymes D. and De Jesus, Jessika M. and Gowacki, William and Krug, Patrick J. and {\'e}s, {\'A}ngel} } @article {49795, title = {The fruRBA Operon Is Necessary for Group A Streptococcal Growth in Fructose and for Resistance to Neutrophil Killing during Growth in Whole Human Blood}, journal = {Infection and Immunity}, volume = {84}, year = {2016}, month = {Dec-04-2017}, pages = {1016 - 1031}, issn = {0019-9567}, doi = {10.1128/IAI.01296-15}, url = {http://iai.asm.org/lookup/doi/10.1128/IAI.01296-15}, author = {Valdes, Kayla M. and Sundar, Ganesh S. and Vega, Luis A. and Belew, Ashton T. and Islam, Emrul and Binet, Rachel and El-Sayed, Najib M. and Le Breton, Yoann and McIver, Kevin S.}, editor = {Camilli, A.} } @article {49791, title = {Individual-specific changes in the human gut microbiota after challenge with enterotoxigenic Escherichia coli and subsequent ciprofloxacin treatment}, journal = {BMC Genomics}, volume = {17183412111831230710512122489914142853341501081566039108377115651846133171373920352123327102188151723}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s12864-016-2777-0}, url = {http://bmcgenomics.biomedcentral.com/articles/10.1186/s12864-016-2777-0http://link.springer.com/content/pdf/10.1186/s12864-016-2777-0}, author = {Pop, Mihai and Paulson, Joseph N. and Chakraborty, Subhra and Astrovskaya, Irina and Lindsay, Brianna R. and Li, Shan and Bravo, {\'e}ctor Corrada and Harro, Clayton and Parkhill, Julian and Walker, Alan W. and Walker, Richard I. and Sack, David A. and Stine, O. Colin} } @article {49813, title = {A joint analysis of transcriptomic and metabolomic data uncovers enhanced enzyme-metabolite coupling in breast cancer.}, journal = {Sci Rep}, volume = {6}, year = {2016}, month = {2016 Jul 13}, pages = {29662}, abstract = {

Disrupted regulation of cellular processes is considered one of the hallmarks of cancer. We analyze metabolomic and transcriptomic profiles jointly collected from breast cancer and hepatocellular carcinoma patients to explore the associations between the expression of metabolic enzymes and the levels of the metabolites participating in the reactions they catalyze. Surprisingly, both breast cancer and hepatocellular tumors exhibit an increase in their gene-metabolites associations compared to noncancerous adjacent tissues. Following, we build predictors of metabolite levels from the expression of the enzyme genes catalyzing them. Applying these predictors to a large cohort of breast cancer samples we find that depleted levels of key cancer-related metabolites including glucose, glycine, serine and acetate are significantly associated with improved patient survival. Thus, we show that the levels of a wide range of metabolites in breast cancer can be successfully predicted from the transcriptome, going beyond the limited set of those measured.

}, issn = {2045-2322}, doi = {10.1038/srep29662}, author = {Auslander, Noam and Yizhak, Keren and Weinstock, Adam and Budhu, Anuradha and Tang, Wei and Wang, Xin Wei and Ambs, Stefan and Ruppin, Eytan} } @conference {49819, title = {Limitations of Current Approaches for Reference-Free, Graph-Based Variant Detection}, booktitle = {the 7th ACM International ConferenceProceedings of the 7th ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics - BCB {\textquoteright}16}, year = {2016}, publisher = {ACM Press}, organization = {ACM Press}, address = {Seattle, WA, USANew York, New York, USA}, isbn = {9781450342254}, doi = {10.1145/297516710.1145/2975167.2985653}, url = {http://dl.acm.org/citation.cfm?doid=2975167http://dl.acm.org/citation.cfm?doid=2975167.2985653}, author = {Bateman, Amelia and Todd Treangen and Pop, Mihai} } @article {49800, title = {Mash: fast genome and metagenome distance estimation using MinHash}, journal = {Genome Biology}, year = {2016}, month = {Jan-12-2016}, doi = {10.1186/s13059-016-0997-x}, url = {http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0997-xhttp://link.springer.com/content/pdf/10.1186/s13059-016-0997-x}, author = {Ondov, Brian D. and Todd Treangen and Melsted, {\'a}ll and Mallonee, Adam B. and Bergman, Nicholas H. and Koren, Sergey and Phillippy, Adam M.} } @article {49826, title = {methylFlow: cell-specific methylation pattern reconstruction from high-throughput bisulfite-converted DNA sequencing}, journal = {Bioinformatics}, volume = {32}, year = {2016}, month = {Jan-06-2016}, pages = {1618 - 1624}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btw287}, url = {https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btw287https://academic.oup.com/bioinformatics/article/32/11/1618/1743421/methylFlow-cellspecific-methylation-pattern}, author = {Dorri, Faezeh and Mendelowitz, Lee and Corrada Bravo, {\'e}ctor} } @article {49755, title = {Privacy-Preserving Microbiome Analysis Using Secure Computation}, journal = {Bioinformatics}, year = {2016}, month = {Nov-02-2016}, pages = {btw073}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btw073}, url = {http://bioinformatics.oxfordjournals.org/lookup/doi/10.1093/bioinformatics/btw073}, author = {Wagner, Justin and Paulson, Joseph N. and Wang, Xiao and Bhattacharjee, Bobby and Bravo, {\'e}ctor Corrada} } @article {49812, title = {The Role of Temporal Trends in Growing Networks.}, journal = {PLoS One}, volume = {11}, year = {2016}, month = {2016}, pages = {e0156505}, abstract = {

The rich get richer principle, manifested by the Preferential attachment (PA) mechanism, is widely considered one of the major factors in the growth of real-world networks. PA stipulates that popular nodes are bound to be more attractive than less popular nodes; for example, highly cited papers are more likely to garner further citations. However, it overlooks the transient nature of popularity, which is often governed by trends. Here, we show that in a wide range of real-world networks the recent popularity of a node, i.e., the extent by which it accumulated links recently, significantly influences its attractiveness and ability to accumulate further links. We proceed to model this observation with a natural extension to PA, named Trending Preferential Attachment (TPA), in which edges become less influential as they age. TPA quantitatively parametrizes a fundamental network property, namely the network{\textquoteright}s tendency to trends. Through TPA, we find that real-world networks tend to be moderately to highly trendy. Networks are characterized by different susceptibilities to trends, which determine their structure to a large extent. Trendy networks display complex structural traits, such as modular community structure and degree-assortativity, occurring regularly in real-world networks. In summary, this work addresses an inherent trait of complex networks, which greatly affects their growth and structure, and develops a unified model to address its interaction with preferential attachment.

}, issn = {1932-6203}, doi = {10.1371/journal.pone.0156505}, author = {Mokryn, Osnat and Wagner, Allon and Blattner, Marcel and Ruppin, Eytan and Shavitt, Yuval} } @article {49729, title = {Systems-Wide Prediction of Enzyme Promiscuity Reveals a New Underground Alternative Route for Pyridoxal 5{\textquoteright}-Phosphate Production in E. coli.}, journal = {PLoS Comput Biol}, volume = {12}, year = {2016}, month = {2016 Jan}, pages = {e1004705}, abstract = {

Recent insights suggest that non-specific and/or promiscuous enzymes are common and active across life. Understanding the role of such enzymes is an important open question in biology. Here we develop a genome-wide method, PROPER, that uses a permissive PSI-BLAST approach to predict promiscuous activities of metabolic genes. Enzyme promiscuity is typically studied experimentally using multicopy suppression, in which over-expression of a promiscuous {\textquoteright}replacer{\textquoteright} gene rescues lethality caused by inactivation of a {\textquoteright}target{\textquoteright} gene. We use PROPER to predict multicopy suppression in Escherichia coli, achieving highly significant overlap with published cases (hypergeometric p = 4.4e-13). We then validate three novel predicted target-replacer gene pairs in new multicopy suppression experiments. We next go beyond PROPER and develop a network-based approach, GEM-PROPER, that integrates PROPER with genome-scale metabolic modeling to predict promiscuous replacements via alternative metabolic pathways. GEM-PROPER predicts a new indirect replacer (thiG) for an essential enzyme (pdxB) in production of pyridoxal 5{\textquoteright}-phosphate (the active form of Vitamin B6), which we validate experimentally via multicopy suppression. We perform a structural analysis of thiG to determine its potential promiscuous active site, which we validate experimentally by inactivating the pertaining residues and showing a loss of replacer activity. Thus, this study is a successful example where a computational investigation leads to a network-based identification of an indirect promiscuous replacement of a key metabolic enzyme, which would have been extremely difficult to identify directly.

}, issn = {1553-7358}, doi = {10.1371/journal.pcbi.1004705}, author = {Oberhardt, Matthew A and Zarecki, Raphy and Reshef, Leah and Xia, Fangfang and Duran-Frigola, Miquel and Schreiber, Rachel and Henry, Christopher S and Ben-Tal, Nir and Dwyer, Daniel J and Gophna, Uri and Ruppin, Eytan} } @article {49799, title = {System-wide Clinical Proteomics of Breast Cancer Reveals Global Remodeling of Tissue Homeostasis.}, journal = {Cell Syst}, volume = {2}, year = {2016}, month = {2016 Mar 23}, pages = {172-84}, abstract = {

The genomic and transcriptomic landscapes of breast cancer have been extensively studied, but the proteomes of breast tumors are far less characterized. Here, we use high-resolution, high-accuracy mass spectrometry to perform a deep analysis of luminal-type breast cancer progression using clinical breast samples from primary tumors, matched lymph node metastases, and healthy breast epithelia. We used a super-SILAC mix to quantify over 10,000 proteins with high accuracy, enabling us to identify key proteins and pathways associated with tumorigenesis and metastatic spread. We found high expression levels of proteins associated with protein synthesis and degradation in cancer tissues, accompanied by metabolic alterations that may facilitate energy production in cancer cells within their natural environment. In addition, we found proteomic differences between breast cancer stages and minor differences between primary tumors and their matched lymph node metastases. These results highlight the potential of proteomic technology in the elucidation of clinically relevant cancer signatures.

}, issn = {2405-4712}, doi = {10.1016/j.cels.2016.02.001}, author = {Pozniak, Yair and Balint-Lahat, Nora and Rudolph, Jan Daniel and Lindskog, Cecilia and Katzir, Rotem and Avivi, Camilla and Pont{\'e}n, Fredrik and Ruppin, Eytan and Barshack, Iris and Geiger, Tamar} } @article {49817, title = {Therapeutic relevance of the protein phosphatase 2A in cancer}, journal = {Oncotarget.com}, year = {2016}, month = {Jul-09-2017}, doi = {10.18632/oncotarget.11399}, url = {https://www.oncotarget.com/article/11399}, author = {Cunningham, Chelsea E. and Li, Shuangshuang and Vizeacoumar, Frederick S. and Bhanumathy, Kalpana Kalyanasundaram and Lee, Joo Sang and Parameswaran, Sreejit and Furber, Levi and Abuhussein, Omar and Paul, James M. and McDonald, Megan and Templeton, Shaina D. and Shukla, Hersh and El Zawily, Amr M. and Boyd, Frederick and Alli, Nezeka and Mousseau, Darrell D. and Geyer, Ron and Bonham, Keith and Anderson, Deborah H. and Yan, Jiong and Yu-Lee, Li-Yuan and Weaver, Beth A. and Uppalapati, Maruti and Ruppin, Eytan and Sablina, Anna and Freywald, Andrew and Vizeacoumar, Franco J.} } @article {49794, title = {Transcriptome Remodeling in Trypanosoma cruzi and Human Cells during Intracellular Infection.}, journal = {PLoS Pathog}, volume = {12}, year = {2016}, month = {2016 Apr}, pages = {e1005511}, abstract = {

Intracellular colonization and persistent infection by the kinetoplastid protozoan parasite, Trypanosoma cruzi, underlie the pathogenesis of human Chagas disease. To obtain global insights into the T. cruzi infective process, transcriptome dynamics were simultaneously captured in the parasite and host cells in an infection time course of human fibroblasts. Extensive remodeling of the T. cruzi transcriptome was observed during the early establishment of intracellular infection, coincident with a major developmental transition in the parasite. Contrasting this early response, few additional changes in steady state mRNA levels were detected once mature T. cruzi amastigotes were formed. Our findings suggest that transcriptome remodeling is required to establish a modified template to guide developmental transitions in the parasite, whereas homeostatic functions are regulated independently of transcriptomic changes, similar to that reported in related trypanosomatids. Despite complex mechanisms for regulation of phenotypic expression in T. cruzi, transcriptomic signatures derived from distinct developmental stages mirror known or projected characteristics of T. cruzi biology. Focusing on energy metabolism, we were able to validate predictions forecast in the mRNA expression profiles. We demonstrate measurable differences in the bioenergetic properties of the different mammalian-infective stages of T. cruzi and present additional findings that underscore the importance of mitochondrial electron transport in T. cruzi amastigote growth and survival. Consequences of T. cruzi colonization for the host include dynamic expression of immune response genes and cell cycle regulators with upregulation of host cholesterol and lipid synthesis pathways, which may serve to fuel intracellular T. cruzi growth. Thus, in addition to the biological inferences gained from gene ontology and functional enrichment analysis of differentially expressed genes in parasite and host, our comprehensive, high resolution transcriptomic dataset provides a substantially more detailed interpretation of T. cruzi infection biology and offers a basis for future drug and vaccine discovery efforts.

}, issn = {1553-7374}, doi = {10.1371/journal.ppat.1005511}, author = {Li, Yuan and Shah-Simpson, Sheena and Okrah, Kwame and Belew, A Trey and Choi, Jungmin and Caradonna, Kacey L and Padmanabhan, Prasad and Ndegwa, David M and Temanni, M Ramzi and Corrada Bravo, Hector and El-Sayed, Najib M and Burleigh, Barbara A} } @article {49623, title = {Bayesian integration of genetics and epigenetics detects causal regulatory SNPs underlying expression variability}, journal = {Nature Communications}, volume = {6}, year = {2015}, month = {Dec-10-2015}, pages = {8555}, doi = {10.1038/ncomms9555}, url = {http://www.nature.com/doifinder/10.1038/ncomms9555}, author = {Das, Avinash and Morley, Michael and Moravec, Christine S. and Tang, W. H. W. and Hakonarson, Hakon and Ashley, Euan A. and Brandimarto, Jeffrey and Hu, Ray and Li, Mingyao and Li, Hongzhe and Liu, Yichuan and Qu, Liming and Sanchez, Pablo and Margulies, Kenneth B. and Cappola, Thomas P. and Jensen, Shane and Hannenhalli, Sridhar} } @article {49658, title = {Diversion of aspartate in ASS1-deficient tumours fosters de novo pyrimidine synthesis}, journal = {Nature}, volume = {527}, year = {2015}, month = {Nov-11-2015}, pages = {379 - 383}, issn = {0028-0836}, doi = {10.1038/nature15529}, url = {http://www.nature.com/doifinder/10.1038/nature15529}, author = {Rabinovich, Shiran and Adler, Lital and Yizhak, Keren and Sarver, Alona and Silberman, Alon and Agron, Shani and Stettner, Noa and Sun, Qin and Brandis, Alexander and Helbling, Daniel and Korman, Stanley and Itzkovitz, Shalev and Dimmock, David and Ulitsky, Igor and Nagamani, Sandesh C. S. and Ruppin, Eytan and Erez, Ayelet} } @article {49578, title = {The effects of telomere shortening on cancer cells: a network model of proteomic and microRNA analysis.}, volume = {105}, year = {2015}, month = {2015 Jan}, pages = {5-16}, abstract = {

Previously, we have shown that shortening of telomeres by telomerase inhibition sensitized cancer cells to cisplatinum, slowed their migration, increased DNA damage and impaired DNA repair. The mechanism behind these effects is not fully characterized. Its clarification could facilitate novel therapeutics development and may obviate the time consuming process of telomere shortening achieved by telomerase inhibition. Here we aimed to decipher the microRNA and proteomic profiling of cancer cells with shortened telomeres and identify the key mediators in telomere shortening-induced damage to those cells. Of 870 identified proteins, 98 were differentially expressed in shortened-telomere cells. 47 microRNAs were differentially expressed in these cells; some are implicated in growth arrest or act as oncogene repressors. The obtained data was used for a network construction, which provided us with nodal candidates that may mediate the shortened-telomere dependent features. These proteins{\textquoteright} expression was experimentally validated, supporting their potential central role in this system.

}, keywords = {Gene Expression Regulation, Neoplastic, Gene Regulatory Networks, HUMANS, MicroRNAs, Neoplasms, Oligonucleotides, Proteome, proteomics, Telomere Shortening, Tumor Cells, Cultured}, issn = {1089-8646}, doi = {10.1016/j.ygeno.2014.10.013}, author = {Uziel, O and Yosef, N and Sharan, R and Ruppin, E and Kupiec, M and Kushnir, M and Beery, E and Cohen-Diker, T and Nordenberg, J and Lahav, M} } @article {49601, title = {Epiviz: a view inside the design of an integrated visual analysis software for genomics}, volume = {16}, year = {2015}, month = {Jan-01-2015}, pages = {S4}, issn = {1471-2105}, doi = {10.1186/1471-2105-16-S11-S4}, url = {http://www.biomedcentral.com/1471-2105/16/S11/S4}, author = {Chelaru, Florin and Corrada Bravo, {\'e}ctor} } @article {49537, title = {Essential Genes in the Core Genome of the Human Pathogen Streptococcus pyogenes.}, journal = {Sci Rep}, volume = {5}, year = {2015}, month = {2015}, pages = {9838}, abstract = {

Streptococcus pyogenes (Group A Streptococcus, GAS) remains a major public health burden worldwide, infecting over 750 million people leading to over 500,000 deaths annually. GAS pathogenesis is complex, involving genetically distinct GAS strains and multiple infection sites. To overcome fastidious genetic manipulations and accelerate pathogenesis investigations in GAS, we developed a mariner-based system (Krmit) for en masse monitoring of complex mutant pools by transposon sequencing (Tn-seq). Highly saturated transposant libraries (Krmit insertions in ca. every 25 nucleotides) were generated in two distinct GAS clinical isolates, a serotype M1T1 invasive strain 5448 and a nephritogenic serotype M49 strain NZ131, and analyzed using a Bayesian statistical model to predict GAS essential genes, identifying sets of 227 and 241 of those genes in 5448 and NZ131, respectively. A large proportion of GAS essential genes corresponded to key cellular processes and metabolic pathways, and 177 were found conserved within the GAS core genome established from 20 available GAS genomes. Selected essential genes were validated using conditional-expression mutants. Finally, comparison to previous essentiality analyses in S. sanguinis and S. pneumoniae revealed significant overlaps, providing valuable insights for the development of new antimicrobials to treat infections by GAS and other pathogenic streptococci.

}, issn = {2045-2322}, doi = {10.1038/srep09838}, author = {Le Breton, Yoann and Belew, Ashton T and Valdes, Kayla M and Islam, Emrul and Curry, Patrick and Tettelin, Herv{\'e} and Shirtliff, Mark E and El-Sayed, Najib M and McIver, Kevin S} } @article {49579, title = {Fumarate induces redox-dependent senescence by modifying glutathione metabolism.}, volume = {6}, year = {2015}, month = {2015}, pages = {6001}, abstract = {

Mutations in the tricarboxylic acid (TCA) cycle enzyme fumarate hydratase (FH) are associated with a highly malignant form of renal cancer. We combined analytical chemistry and metabolic computational modelling to investigate the metabolic implications of FH loss in immortalized and primary mouse kidney cells. Here, we show that the accumulation of fumarate caused by the inactivation of FH leads to oxidative stress that is mediated by the formation of succinicGSH, a covalent adduct between fumarate and glutathione. Chronic succination of GSH, caused by the loss of FH, or by exogenous fumarate, leads to persistent oxidative stress and cellular senescence in vitro and in vivo. Importantly, the ablation of p21, a key mediator of senescence, in Fh1-deficient mice resulted in the transformation of benign renal cysts into a hyperplastic lesion, suggesting that fumarate-induced senescence needs to be bypassed for the initiation of renal cancers.

}, issn = {2041-1723}, doi = {10.1038/ncomms7001}, author = {Zheng, Liang and Cardaci, Simone and Jerby, Livnat and MacKenzie, Elaine D and Sciacovelli, Marco and Johnson, T Isaac and Gaude, Edoardo and King, Ayala and Leach, Joshua D G and Edrada-Ebel, RuAngelie and Hedley, Ann and Morrice, Nicholas A and Kalna, Gabriela and Blyth, Karen and Ruppin, Eytan and Frezza, Christian and Gottlieb, Eyal} } @article {49603, title = {Gene Expression Signatures Based on Variability can Robustly Predict Tumor Progression and Prognosis.}, volume = {14}, year = {2015}, month = {2015}, pages = {71-81}, abstract = {

Gene expression signatures are commonly used to create cancer prognosis and diagnosis methods, yet only a small number of them are successfully deployed in the clinic since many fail to replicate performance on subsequent validation. A primary reason for this lack of reproducibility is the fact that these signatures attempt to model the highly variable and unstable genomic behavior of cancer. Our group recently introduced gene expression anti-profiles as a robust methodology to derive gene expression signatures based on the observation that while gene expression measurements are highly heterogeneous across tumors of a specific cancer type relative to the normal tissue, their degree of deviation from normal tissue expression in specific genes involved in tissue differentiation is a stable tumor mark that is reproducible across experiments and cancer types. Here we show that constructing gene expression signatures based on variability and the anti-profile approach yields classifiers capable of successfully distinguishing benign growths from cancerous growths based on deviation from normal expression. We then show that this same approach generates stable and reproducible signatures that predict probability of relapse and survival based on tumor gene expression. These results suggest that using the anti-profile framework for the discovery of genomic signatures is an avenue leading to the development of reproducible signatures suitable for adoption in clinical settings.

}, issn = {1176-9351}, doi = {10.4137/CIN.S23862}, author = {Dinalankara, Wikum and Bravo, H{\'e}ctor Corrada} } @article {49734, title = {Genomic variation. Impact of regulatory variation from RNA to protein.}, journal = {Science}, volume = {347}, year = {2015}, month = {2015 Feb 6}, pages = {664-7}, abstract = {

The phenotypic consequences of expression quantitative trait loci (eQTLs) are presumably due to their effects on protein expression levels. Yet the impact of genetic variation, including eQTLs, on protein levels remains poorly understood. To address this, we mapped genetic variants that are associated with eQTLs, ribosome occupancy (rQTLs), or protein abundance (pQTLs). We found that most QTLs are associated with transcript expression levels, with consequent effects on ribosome and protein levels. However, eQTLs tend to have significantly reduced effect sizes on protein levels, which suggests that their potential impact on downstream phenotypes is often attenuated or buffered. Additionally, we identified a class of cis QTLs that affect protein abundance with little or no effect on messenger RNA or ribosome levels, which suggests that they may arise from differences in posttranslational regulation.

}, keywords = {3{\textquoteright} Flanking Region, 5{\textquoteright} Flanking Region, Cell Line, Exons, Gene Expression Regulation, Genetic Variation, HUMANS, PHENOTYPE, Protein Biosynthesis, Quantitative Trait Loci, Ribosomes, RNA, Messenger, Transcription, Genetic}, issn = {1095-9203}, doi = {10.1126/science.1260793}, author = {Battle, Alexis and Khan, Zia and Wang, Sidney H and Mitrano, Amy and Ford, Michael J and Pritchard, Jonathan K and Gilad, Yoav} } @article {49659, title = {Glutamine synthetase activity fuels nucleotide biosynthesis and supports growth of glutamine-restricted glioblastoma}, journal = {Nature Cell Biology}, volume = {17}, year = {2015}, month = {Nov-11-2016}, pages = {1556 - 1568}, issn = {1465-7392}, doi = {10.1038/ncb3272}, url = {http://www.nature.com/doifinder/10.1038/ncb3272}, author = {Tardito, Saverio and Oudin, {\"\i}s and Ahmed, Shafiq U. and Fack, Fred and Keunen, Olivier and Zheng, Liang and Miletic, Hrvoje and Sakariassen, {\O}ystein and Weinstock, Adam and Wagner, Allon and Lindsay, Susan L. and Hock, Andreas K. and Barnett, Susan C. and Ruppin, Eytan and {\o}rkve, Svein Harald and Lund-Johansen, Morten and Chalmers, Anthony J. and Bjerkvig, Rolf and Niclou, Simone P. and Gottlieb, Eyal} } @article {49797, title = {Heterogeneity of Transcription Factor binding specificity models within and across cell lines}, year = {2015}, doi = {10.1101/028787}, url = {http://biorxiv.org/lookup/doi/10.1101/028787}, author = {Sharmin, Mahfuza and Corrada Bravo, Hector and Hannenhalli, Sridhar S.} } @article {49540, title = {Impact of regulatory variation from RNA to protein}, volume = {347}, year = {2015}, month = {Jun-02-2015}, pages = {664 - 667}, issn = {0036-8075}, doi = {10.1126/science.1260793}, url = {http://www.sciencemag.org/cgi/doi/10.1126/science.1260793}, author = {Battle, A. and Khan, Z. and Wang, S. H. and Mitrano, A. and Ford, M. J. and Pritchard, J. K. and Gilad, Y.} } @article {49575, title = {Improved evidence-based genome-scale metabolic models for maize leaf, embryo, and endosperm.}, volume = {6}, year = {2015}, month = {2015}, pages = {142}, abstract = {

There is a growing demand for genome-scale metabolic reconstructions for plants, fueled by the need to understand the metabolic basis of crop yield and by progress in genome and transcriptome sequencing. Methods are also required to enable the interpretation of plant transcriptome data to study how cellular metabolic activity varies under different growth conditions or even within different organs, tissues, and developmental stages. Such methods depend extensively on the accuracy with which genes have been mapped to the biochemical reactions in the plant metabolic pathways. Errors in these mappings lead to metabolic reconstructions with an inflated number of reactions and possible generation of unreliable metabolic phenotype predictions. Here we introduce a new evidence-based genome-scale metabolic reconstruction of maize, with significant improvements in the quality of the gene-reaction associations included within our model. We also present a new approach for applying our model to predict active metabolic genes based on transcriptome data. This method includes a minimal set of reactions associated with low expression genes to enable activity of a maximum number of reactions associated with high expression genes. We apply this method to construct an organ-specific model for the maize leaf, and tissue specific models for maize embryo and endosperm cells. We validate our models using fluxomics data for the endosperm and embryo, demonstrating an improved capacity of our models to fit the available fluxomics data. All models are publicly available via the DOE Systems Biology Knowledgebase and PlantSEED, and our new method is generally applicable for analysis transcript profiles from any plant, paving the way for further in silico studies with a wide variety of plant genomes.

}, issn = {1664-462X}, doi = {10.3389/fpls.2015.00142}, author = {Seaver, Samuel M D and Bradbury, Louis M T and Frelin, Oc{\'e}ane and Zarecki, Raphy and Ruppin, Eytan and Hanson, Andrew D and Henry, Christopher S} } @article {49512, title = {Independent Emergence of Artemisinin Resistance Mutations Among Plasmodium falciparum in Southeast Asia}, journal = {Journal of Infectious Diseases}, volume = {211}, year = {2015}, month = {03/2015}, pages = {670 - 679}, issn = {1537-6613}, doi = {10.1093/infdis/jiu491}, author = {Takala-Harrison, S. and Jacob, C. G. and Arze, C. and Michael P. Cummings and Silva, J. C. and Dondorp, A. M. and Fukuda, M. M. and Hien, T. T. and Mayxay, M. and Noedl, H. and Nosten, F. and Kyaw, M. P. and Nhien, N. T. T. and Imwong, M. and Bethell, D. and Se, Y. and Lon, C. and Tyner, S. D. and Saunders, D. L. and Ariey, F. and Mercereau-Puijalon, O. and Menard, D. and Newton, P. N. and Khanthavong, M. and Hongvanthong, B. and Starzengruber, P. and Fuehrer, H.-P. and Swoboda, P. and Khan, W. A. and Phyo, A. P. and Nyunt, M. M. and Nyunt, M. H. and Brown, T. S. and Adams, M. and Pepin, C. S. and Bailey, J. and Tan, J. C. and Ferdig, M. T. and Clark, T. G. and Miotto, O. and MacInnis, B. and Kwiatkowski, D. P. and White, N. J. and Ringwald, P. and Plowe, CV} } @article {49606, title = {Orchestrating high-throughput genomic analysis with Bioconductor.}, volume = {12}, year = {2015}, month = {2015 Feb}, pages = {115-21}, abstract = {

Bioconductor is an open-source, open-development software project for the analysis and comprehension of high-throughput data in genomics and molecular biology. The project aims to enable interdisciplinary research, collaboration and rapid development of scientific software. Based on the statistical programming language R, Bioconductor comprises 934 interoperable packages contributed by a large, diverse community of scientists. Packages cover a range of bioinformatic and statistical applications. They undergo formal initial review and continuous automated testing. We present an overview for prospective users and contributors.

}, keywords = {Computational Biology, Gene Expression Profiling, Genomics, High-Throughput Screening Assays, Programming Languages, software, User-Computer Interface}, issn = {1548-7105}, doi = {10.1038/nmeth.3252}, author = {Huber, Wolfgang and Carey, Vincent J and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S and Bravo, H{\'e}ctor Corrada and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D and Irizarry, Rafael A and Lawrence, Michael and Love, Michael I and MacDonald, James and Obenchain, Valerie and Ole{\'s}, Andrzej K and Pag{\`e}s, Herv{\'e} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin} } @article {49513, title = {Plasmodium falciparum field isolates from areas of repeated emergence of drug resistant malaria show no evidence of hypermutator phenotype}, journal = {Infection, Genetics and Evolution}, volume = {30}, year = {2015}, month = {03/2015}, pages = {318 - 322}, issn = {15671348}, doi = {10.1016/j.meegid.2014.12.010}, author = {Brown, Tyler S. and Jacob, Christopher G and Silva, Joana C and Takala-Harrison, Shannon and Djimd{\'e}, Abdoulaye and Dondorp, Arjen M and Fukuda, Mark and Noedl, Harald and Nyunt, Myaing Myaing and Kyaw, Myat Phone and Mayxay, Mayfong and Hien, Tran Tinh and Plowe, Christopher V and Michael P. Cummings} } @article {49754, title = {Privacy-Preserving Microbiome Analysis Using Secure Computation}, year = {2015}, doi = {10.1101/025999}, url = {http://biorxiv.org/lookup/doi/10.1101/025999}, author = {Wagner, Justin and Paulson, Joseph N. and Wang, Xiao-Shun and Bhattacharjee, Bobby and Corrada Bravo, Hector} } @article {49620, title = {Relationships within Cladobranchia (Gastropoda: Nudibranchia) based on RNA-Seq data: an initial investigation}, journal = {Royal Society Open Science}, volume = {23547143619757560685451171766}, year = {2015}, month = {Nov-09-2016}, pages = {150196}, doi = {10.1098/rsos.150196}, url = {http://rsos.royalsocietypublishing.org/lookup/doi/10.1098/rsos.150196}, author = {Goodheart, Jessica and Bazinet, Adam L. and Collins, Allen G. and CUMMINGS, MICHAEL P.} } @article {49591, title = {RNA-Seq identifies novel myocardial gene expression signatures of heart failure.}, volume = {105}, year = {2015}, month = {2015 Feb}, pages = {83-9}, abstract = {

Heart failure is a complex clinical syndrome and has become the most common reason for adult hospitalization in developed countries. Two subtypes of heart failure, ischemic heart disease (ISCH) and dilated cardiomyopathy (DCM), have been studied using microarray platforms. However, microarray has limited resolution. Here we applied RNA sequencing (RNA-Seq) to identify gene signatures for heart failure from six individuals, including three controls, one ISCH and two DCM patients. Using genes identified from this small RNA-Seq dataset, we were able to accurately classify heart failure status in a much larger set of 313 individuals. The identified genes significantly overlapped with genes identified via genome-wide association studies for cardiometabolic traits and the promoters of those genes were enriched for binding sites for transcriptions factors. Our results indicate that it is possible to use RNA-Seq to classify disease status for complex diseases such as heart failure using an extremely small training dataset.

}, issn = {1089-8646}, doi = {10.1016/j.ygeno.2014.12.002}, author = {Liu, Yichuan and Morley, Michael and Brandimarto, Jeffrey and Hannenhalli, Sridhar and Hu, Yu and Ashley, Euan A and Tang, W H Wilson and Moravec, Christine S and Margulies, Kenneth B and Cappola, Thomas P and Li, Mingyao} } @article {49609, title = {Shape analysis of high-throughput transcriptomics experiment data.}, volume = {16}, year = {2015}, month = {2015 Oct}, pages = {627-40}, abstract = {

The recent growth of high-throughput transcriptome technology has been paralleled by the development of statistical methodologies to analyze the data they produce. Some of these newly developed methods are based on the assumption that the data observed or a transformation of the data are relatively symmetric with light tails, usually summarized by assuming a Gaussian random component. It is indeed very difficult to assess this assumption for small sample sizes. In this article, we utilize L-moments statistics as the basis of exploratory data analysis, the assessment of distributional assumptions, and the hypothesis testing of high-throughput transcriptomic data. In particular, we use L-moments ratios for assessing the shape (skewness and kurtosis) of high-throughput transcriptome data. Based on these statistics, we propose an algorithm for identifying genes with distributions that are markedly different from the majority in the data. In addition, we also illustrate the utility of this framework to characterize the robustness of distributional assumptions. We apply it to RNA-seq data and find that methods based on the simple [Formula: see text]-test for differential expression analysis using L-moments as weights are robust.

}, issn = {1468-4357}, doi = {10.1093/biostatistics/kxv018}, author = {Okrah, Kwame and Corrada Bravo, Hector} } @article {49796, title = {Simultaneous transcriptional profiling of Leishmania major and its murine macrophage host cell reveals insights into host-pathogen interactions.}, journal = {BMC Genomics}, volume = {16}, year = {2015}, month = {2015}, pages = {1108}, abstract = {

BACKGROUND: Parasites of the genus Leishmania are the causative agents of leishmaniasis, a group of diseases that range in manifestations from skin lesions to fatal visceral disease. The life cycle of Leishmania parasites is split between its insect vector and its mammalian host, where it resides primarily inside of macrophages. Once intracellular, Leishmania parasites must evade or deactivate the host{\textquoteright}s innate and adaptive immune responses in order to survive and replicate.

RESULTS: We performed transcriptome profiling using RNA-seq to simultaneously identify global changes in murine macrophage and L. major gene expression as the parasite entered and persisted within murine macrophages during the first 72 h of an infection. Differential gene expression, pathway, and gene ontology analyses enabled us to identify modulations in host and parasite responses during an infection. The most substantial and dynamic gene expression responses by both macrophage and parasite were observed during early infection. Murine genes related to both pro- and anti-inflammatory immune responses and glycolysis were substantially upregulated and genes related to lipid metabolism, biogenesis, and Fc gamma receptor-mediated phagocytosis were downregulated. Upregulated parasite genes included those aimed at mitigating the effects of an oxidative response by the host immune system while downregulated genes were related to translation, cell signaling, fatty acid biosynthesis, and flagellum structure.

CONCLUSIONS: The gene expression patterns identified in this work yield signatures that characterize multiple developmental stages of L. major parasites and the coordinated response of Leishmania-infected macrophages in the real-time setting of a dual biological system. This comprehensive dataset offers a clearer and more sensitive picture of the interplay between host and parasite during intracellular infection, providing additional insights into how pathogens are able to evade host defenses and modulate the biological functions of the cell in order to survive in the mammalian environment.

}, issn = {1471-2164}, doi = {10.1186/s12864-015-2237-2}, author = {Dillon, Laura A L and Suresh, Rahul and Okrah, Kwame and Corrada Bravo, Hector and Mosser, David M and El-Sayed, Najib M} } @article {49539, title = {Transcriptomic profiling of gene expression and RNA processing during Leishmania major differentiation.}, volume = {43}, year = {2015}, month = {2015 Aug 18}, pages = {6799-813}, abstract = {

Protozoan parasites of the genus Leishmania are the etiological agents of leishmaniasis, a group of diseases with a worldwide incidence of 0.9-1.6 million cases per year. We used RNA-seq to conduct a high-resolution transcriptomic analysis of the global changes in gene expression and RNA processing events that occur as L. major transforms from non-infective procyclic promastigotes to infective metacyclic promastigotes. Careful statistical analysis across multiple biological replicates and the removal of batch effects provided a high quality framework for comprehensively analyzing differential gene expression and transcriptome remodeling in this pathogen as it acquires its infectivity. We also identified precise 5{\textquoteright} and 3{\textquoteright} UTR boundaries for a majority of Leishmania genes and detected widespread alternative trans-splicing and polyadenylation. An investigation of possible correlations between stage-specific preferential trans-splicing or polyadenylation sites and differentially expressed genes revealed a lack of systematic association, establishing that differences in expression levels cannot be attributed to stage-regulated alternative RNA processing. Our findings build on and improve existing expression datasets and provide a substantially more detailed view of L. major biology that will inform the field and potentially provide a stronger basis for drug discovery and vaccine development efforts.

}, issn = {1362-4962}, doi = {10.1093/nar/gkv656}, author = {Dillon, Laura A L and Okrah, Kwame and Hughitt, V Keith and Suresh, Rahul and Li, Yuan and Fernandes, Maria Cecilia and Belew, A Trey and Corrada Bravo, Hector and Mosser, David M and El-Sayed, Najib M} } @article {49599, title = {BlindCall: ultra-fast base-calling of high-throughput sequencing data by blind deconvolution.}, volume = {30}, year = {2014}, month = {2014 May 1}, pages = {1214-9}, abstract = {

MOTIVATION: Base-calling of sequencing data produced by high-throughput sequencing platforms is a fundamental process in current bioinformatics analysis. However, existing third-party probabilistic or machine-learning methods that significantly improve the accuracy of base-calls on these platforms are impractical for production use due to their computational inefficiency.

RESULTS: We directly formulate base-calling as a blind deconvolution problem and implemented BlindCall as an efficient solver to this inverse problem. BlindCall produced base-calls at accuracy comparable to state-of-the-art probabilistic methods while processing data at rates 10 times faster in most cases. The computational complexity of BlindCall scales linearly with read length making it better suited for new long-read sequencing technologies.

}, keywords = {algorithms, High-Throughput Nucleotide Sequencing, HUMANS, Probability, Reproducibility of Results, Sequence Analysis, DNA, software, Time factors}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu010}, author = {Ye, Chengxi and Hsiao, Chiaowen and Corrada Bravo, Hector} } @article {49863, title = {Complete genome sequence of the quality control strain Staphylococcus aureus subsp. aureus ATCC 25923}, journal = {Genome announcements}, volume = {2}, year = {2014}, pages = {e01110{\textendash}14}, author = {Treangen, Todd J and Maybank, Rosslyn A and Enke, Sana and Friss, Mary Beth and Diviak, Lynn F and Karaolis, David KR and Koren, Sergey and Ondov, Brian and Phillippy, Adam M and Bergman, Nicholas H} } @article {49725, title = {A computational study of the Warburg effect identifies metabolic targets inhibiting cancer migration.}, journal = {Mol Syst Biol}, volume = {10}, year = {2014}, month = {2014}, pages = {744}, abstract = {

Over the last decade, the field of cancer metabolism has mainly focused on studying the role of tumorigenic metabolic rewiring in supporting cancer proliferation. Here, we perform the first genome-scale computational study of the metabolic underpinnings of cancer migration. We build genome-scale metabolic models of the NCI-60 cell lines that capture the Warburg effect (aerobic glycolysis) typically occurring in cancer cells. The extent of the Warburg effect in each of these cell line models is quantified by the ratio of glycolytic to oxidative ATP flux (AFR), which is found to be highly positively associated with cancer cell migration. We hence predicted that targeting genes that mitigate the Warburg effect by reducing the AFR may specifically inhibit cancer migration. By testing the anti-migratory effects of silencing such 17 top predicted genes in four breast and lung cancer cell lines, we find that up to 13 of these novel predictions significantly attenuate cell migration either in all or one cell line only, while having almost no effect on cell proliferation. Furthermore, in accordance with the predictions, a significant reduction is observed in the ratio between experimentally measured ECAR and OCR levels following these perturbations. Inhibiting anti-migratory targets is a promising future avenue in treating cancer since it may decrease cytotoxic-related side effects that plague current anti-proliferative treatments. Furthermore, it may reduce cytotoxic-related clonal selection of more aggressive cancer cells and the likelihood of emerging resistance.

}, issn = {1744-4292}, doi = {10.15252/msb.20145746}, author = {Yizhak, Keren and Le D{\'e}v{\'e}dec, Sylvia E and Rogkoti, Vasiliki Maria and Baenke, Franziska and de Boer, Vincent C and Frezza, Christian and Schulze, Almut and van de Water, Bob and Ruppin, Eytan} } @article {49611, title = {Construction of a dairy microbial genome catalog opens new perspectives for the metagenomic analysis of dairy fermented products}, journal = {BMC GenomicsBMC Genomics}, volume = {15}, number = {1}, year = {2014}, pages = {1101}, abstract = {BACKGROUND:Microbial communities of traditional cheeses are complex and insufficiently characterized. The origin, safety and functional role in cheese making of these microbial communities are still not well understood. Metagenomic analysis of these communities by high throughput shotgun sequencing is a promising approach to characterize their genomic and functional profiles. Such analyses, however, critically depend on the availability of appropriate reference genome databases against which the sequencing reads can be aligned.RESULTS:We built a reference genome catalog suitable for short read metagenomic analysis using a low-cost sequencing strategy. We selected 142 bacteria isolated from dairy products belonging to 137 different species and 67 genera, and succeeded to reconstruct the draft genome of 117 of them at a standard or high quality level, including isolates from the genera Kluyvera, Luteococcus and Marinilactibacillus, still missing from public database. To demonstrate the potential of this catalog, we analysed the microbial composition of the surface of two smear cheeses and one blue-veined cheese, and showed that a significant part of the microbiota of these traditional cheeses was composed of microorganisms newly sequenced in our study.CONCLUSIONS:Our study provides data, which combined with publicly available genome references, represents the most expansive catalog to date of cheese-associated bacteria. Using this extended dairy catalog, we revealed the presence in traditional cheese of dominant microorganisms not deliberately inoculated, mainly Gram-negative genera such as Pseudoalteromonas haloplanktis or Psychrobacter immobilis, that may contribute to the characteristics of cheese produced through traditional methods.}, isbn = {1471-2164}, author = {Almeida, Mathieu and Hebert, Agnes and Abraham, Anne-Laure and Rasmussen, Simon and Monnet, Christophe and Pons, Nicolas and Delbes, Celine and Loux, Valentin and Batto, Jean-Michel and Leonard, Pierre and Kennedy, Sean and Ehrlich, Stanislas and Pop, Mihai and Montel, Marie-Christine and Irlinger, Francoise and Renault, Pierre} } @article {38584, title = {CTCF binding site sequence differences are associated with unique regulatory and functional trends during embryonic stem cell differentiation}, journal = {Nucleic Acids ResNucleic Acids ResNucleic Acids Res}, volume = {42}, number = {2}, year = {2014}, note = {Plasschaert, Robert N
Vigneau, Sebastien
Tempera, Italo
Gupta, Ravi
Maksimoska, Jasna
Everett, Logan
Davuluri, Ramana
Mamorstein, Ronen
Lieberman, Paul M
Schultz, David
Hannenhalli, Sridhar
Bartolomei, Marisa S
eng
K99AI099153/AI/NIAID NIH HHS/
P30 CA10815/CA/NCI NIH HHS/
R01 CA140652/CA/NCI NIH HHS/
R01-GM052880/GM/NIGMS NIH HHS/
R01CA140652/CA/NCI NIH HHS/
R01GM085226/GM/NIGMS NIH HHS/
R01HD042026/HD/NICHD NIH HHS/
T32GM008216/GM/NIGMS NIH HHS/
Research Support, N.I.H., Extramural
Research Support, Non-U.S. Gov{\textquoteright}t
England
2013/10/15 06:00
Nucleic Acids Res. 2014 Jan;42(2):774-89. doi: 10.1093/nar/gkt910. Epub 2013 Oct 10.}, month = {Jan}, pages = {774-89}, abstract = {CTCF (CCCTC-binding factor) is a highly conserved multifunctional DNA-binding protein with thousands of binding sites genome-wide. Our previous work suggested that differences in CTCF{\textquoteright}s binding site sequence may affect the regulation of CTCF recruitment and its function. To investigate this possibility, we characterized changes in genome-wide CTCF binding and gene expression during differentiation of mouse embryonic stem cells. After separating CTCF sites into three classes (LowOc, MedOc and HighOc) based on similarity to the consensus motif, we found that developmentally regulated CTCF binding occurs preferentially at LowOc sites, which have lower similarity to the consensus. By measuring the affinity of CTCF for selected sites, we show that sites lost during differentiation are enriched in motifs associated with weaker CTCF binding in vitro. Specifically, enrichment for T at the 18(th) position of the CTCF binding site is associated with regulated binding in the LowOc class and can predictably reduce CTCF affinity for binding sites. Finally, by comparing changes in CTCF binding with changes in gene expression during differentiation, we show that LowOc and HighOc sites are associated with distinct regulatory functions. Our results suggest that the regulatory control of CTCF is dependent in part on specific motifs within its binding site.}, keywords = {*Gene Expression Regulation, *Regulatory Elements, Transcriptional, Animals, Binding Sites, Cell Differentiation/*genetics, Cells, Cultured, Embryonic Stem Cells/cytology/*metabolism, Mice, Nucleotide Motifs, Protein Binding, Repressor Proteins/*metabolism}, isbn = {1362-4962 (Electronic)
0305-1048 (Linking)}, author = {Plasschaert, R. N. and Vigneau, S. and Tempera, I. and Gupta, R. and Maksimoska, J. and Everett, L. and Davuluri, R. and Mamorstein, R. and Lieberman, P. M. and Schultz, D. and Sridhar Hannenhalli and Bartolomei, M. S.} } @article {49596, title = {Determinants of expression variability}, volume = {42}, year = {2014}, month = {Jan-04-2014}, pages = {3503 - 3514}, issn = {0305-1048}, doi = {10.1093/nar/gkt1364}, url = {http://nar.oxfordjournals.org/lookup/doi/10.1093/nar/gkt1364}, author = {Alemu, E. Y. and Carl, J. W. and Corrada Bravo, H. and Hannenhalli, S.} } @article {49600, title = {Diarrhea in young children from low-income countries leads to large-scale alterations in intestinal microbiota composition.}, volume = {15}, year = {2014}, month = {2014}, pages = {R76}, abstract = {

BACKGROUND: Diarrheal diseases continue to contribute significantly to morbidity and mortality in infants and young children in developing countries. There is an urgent need to better understand the contributions of novel, potentially uncultured, diarrheal pathogens to severe diarrheal disease, as well as distortions in normal gut microbiota composition that might facilitate severe disease.

RESULTS: We use high throughput 16S rRNA gene sequencing to compare fecal microbiota composition in children under five years of age who have been diagnosed with moderate to severe diarrhea (MSD) with the microbiota from diarrhea-free controls. Our study includes 992 children from four low-income countries in West and East Africa, and Southeast Asia. Known pathogens, as well as bacteria currently not considered as important diarrhea-causing pathogens, are positively associated with MSD, and these include Escherichia/Shigella, and Granulicatella species, and Streptococcus mitis/pneumoniae groups. In both cases and controls, there tend to be distinct negative correlations between facultative anaerobic lineages and obligate anaerobic lineages. Overall genus-level microbiota composition exhibit a shift in controls from low to high levels of Prevotella and in MSD cases from high to low levels of Escherichia/Shigella in younger versus older children; however, there was significant variation among many genera by both site and age.

CONCLUSIONS: Our findings expand the current understanding of microbiota-associated diarrhea pathogenicity in young children from developing countries. Our findings are necessarily based on correlative analyses and must be further validated through epidemiological and molecular techniques.

}, keywords = {Bangladesh, Base Sequence, Case-Control Studies, Child, Preschool, Diarrhea, Infantile, Dysentery, Feces, Female, Gambia, HUMANS, Infant, Infant, Newborn, Intestines, Kenya, Male, Mali, Microbiota, Molecular Typing, Poverty, RNA, Bacterial, RNA, Ribosomal, 16S}, issn = {1474-760X}, doi = {10.1186/gb-2014-15-6-r76}, author = {Pop, Mihai and Walker, Alan W and Paulson, Joseph and Lindsay, Brianna and Antonio, Martin and Hossain, M Anowar and Oundo, Joseph and Tamboura, Boubou and Mai, Volker and Astrovskaya, Irina and Corrada Bravo, Hector and Rance, Richard and Stares, Mark and Levine, Myron M and Panchalingam, Sandra and Kotloff, Karen and Ikumapayi, Usman N and Ebruke, Chinelo and Adeyemi, Mitchell and Ahmed, Dilruba and Ahmed, Firoz and Alam, Meer Taifur and Amin, Ruhul and Siddiqui, Sabbir and Ochieng, John B and Ouma, Emmanuel and Juma, Jane and Mailu, Euince and Omore, Richard and Morris, J Glenn and Breiman, Robert F and Saha, Debasish and Parkhill, Julian and Nataro, James P and Stine, O Colin} } @article {49602, title = {Epiviz: interactive visual analytics for functional genomics data.}, volume = {11}, year = {2014}, month = {2014 Sep}, pages = {938-40}, abstract = {

Visualization is an integral aspect of genomics data analysis. Algorithmic-statistical analysis and interactive visualization are most effective when used iteratively. Epiviz (http://epiviz.cbcb.umd.edu/), a web-based genome browser, and the Epivizr Bioconductor package allow interactive, extensible and reproducible visualization within a state-of-the-art data-analysis platform.

}, keywords = {algorithms, Chromosome mapping, Data Mining, database management systems, Databases, Genetic, Genomics, Internet, software, User-Computer Interface}, issn = {1548-7105}, doi = {10.1038/nmeth.3038}, author = {Chelaru, Florin and Smith, Llewellyn and Goldstein, Naomi and Bravo, H{\'e}ctor Corrada} } @article {38274, title = {A Gateway for Phylogenetic Analysis Powered by Grid Computing Featuring GARLI 2.0}, journal = {Syst Biol}, year = {2014}, type = {10.1093/sysbio/syu031}, abstract = {

We introduce molecularevolution.org, a publicly available gateway for high-throughput, maximum likelihood phylogenetic analysis powered by grid computing. The gateway features a garli 2.0 web service that enables a user to quickly and easily submit thousands of maximum likelihood tree searches or bootstrap searches that are executed in parallel on distributed computing resources. The garli web service allows one to easily specify partitioned substitution models using a graphical interface, and it performs sophisticated post-processing of phylogenetic results. Although the garli web service has been used by the research community for over three years, here we formally announce the availability of the service, describe its capabilities, highlight new features and recent improvements, and provide details about how the grid system efficiently delivers high-quality phylogenetic results.

}, author = {Adam L. Bazinet and Zwickl, Derrick J. and Michael P. Cummings} } @article {49604, title = {Large hypomethylated blocks as a universal defining epigenetic alteration in human solid tumors.}, volume = {6}, year = {2014}, month = {2014}, pages = {61}, abstract = {

BACKGROUND: One of the most provocative recent observations in cancer epigenetics is the discovery of large hypomethylated blocks, including single copy genes, in colorectal cancer, that correspond in location to heterochromatic LOCKs (large organized chromatin lysine-modifications) and LADs (lamin-associated domains).

METHODS: Here we performed a comprehensive genome-scale analysis of 10 breast, 28 colon, nine lung, 38 thyroid, 18 pancreas cancers, and five pancreas neuroendocrine tumors as well as matched normal tissue from most of these cases, as well as 51 premalignant lesions. We used a new statistical approach that allows the identification of large hypomethylated blocks on the Illumina HumanMethylation450 BeadChip platform.

RESULTS: We find that hypomethylated blocks are a universal feature of common solid human cancer, and that they occur at the earliest stage of premalignant tumors and progress through clinical stages of thyroid and colon cancer development. We also find that the disrupted CpG islands widely reported previously, including hypermethylated island bodies and hypomethylated shores, are enriched in hypomethylated blocks, with flattening of the methylation signal within and flanking the islands. Finally, we found that genes showing higher between individual gene expression variability are enriched within these hypomethylated blocks.

CONCLUSION: Thus hypomethylated blocks appear to be a universal defining epigenetic alteration in human cancer, at least for common solid tumors.

}, issn = {1756-994X}, doi = {10.1186/s13073-014-0061-y}, author = {Timp, Winston and Bravo, H{\'e}ctor Corrada and McDonald, Oliver G and Goggins, Michael and Umbricht, Chris and Zeiger, Martha and Feinberg, Andrew P and Irizarry, Rafael A} } @article {49583, title = {Network-level architecture and the evolutionary potential of underground metabolism}, volume = {111}, year = {2014}, month = {Dec-08-2014}, pages = {11762 - 11767}, issn = {0027-8424}, doi = {10.1073/pnas.1406102111}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.1406102111}, author = {Notebaart, R. A. and Szappanos, B. and Kintses, B. and Pal, F. and Gyorkei, A. and Bogos, B. and Lazar, V. and Spohn, R. and Bogos, B. and Wagner, A. and Ruppin, E. and Pal, C. and Papp, B.} } @article {49862, title = {A new rhesus macaque assembly and annotation for next-generation sequencing analyses}, journal = {Biology direct}, volume = {9}, year = {2014}, pages = {20}, author = {Zimin, Aleksey V and Cornish, Adam S and Maudhoo, Mnirnal D and Gibbs, Robert M and Zhang, Xiongfei and Pandey, Sanjit and Meehan, Daniel T and Wipfler, Kristin and Bosinger, Steven E and Johnson, Zachary P and Todd Treangen} } @article {49607, title = {Removing batch effects for prediction problems with frozen surrogate variable analysis.}, volume = {2}, year = {2014}, month = {2014}, pages = {e561}, abstract = {

Batch effects are responsible for the failure of promising genomic prognostic signatures, major ambiguities in published genomic results, and retractions of widely-publicized findings. Batch effect corrections have been developed to remove these artifacts, but they are designed to be used in population studies. But genomic technologies are beginning to be used in clinical applications where samples are analyzed one at a time for diagnostic, prognostic, and predictive applications. There are currently no batch correction methods that have been developed specifically for prediction. In this paper, we propose an new method called frozen surrogate variable analysis (fSVA) that borrows strength from a training set for individual sample batch correction. We show that fSVA improves prediction accuracy in simulations and in public genomic studies. fSVA is available as part of the sva Bioconductor package.

}, issn = {2167-8359}, doi = {10.7717/peerj.561}, author = {Parker, Hilary S and Corrada Bravo, Hector and Leek, Jeffrey T} } @article {49763, title = {Reply to: "A fair comparison"}, journal = {Nature Methods}, volume = {11}, year = {2014}, month = {Apr-03-2016}, pages = {359 - 360}, issn = {1548-7091}, doi = {10.1038/nmeth.2898}, url = {http://www.nature.com/doifinder/10.1038/nmeth.2898}, author = {Paulson, Joseph N and Bravo, {\'e}ctor Corrada and Pop, Mihai} } @article {49608, title = {RNA-sequencing of the brain transcriptome implicates dysregulation of neuroplasticity, circadian rhythms and GTPase binding in bipolar disorder.}, volume = {19}, year = {2014}, month = {2014 Nov}, pages = {1179-85}, abstract = {

RNA-sequencing (RNA-seq) is a powerful technique to investigate the complexity of gene expression in the human brain. We used RNA-seq to survey the brain transcriptome in high-quality postmortem dorsolateral prefrontal cortex from 11 individuals diagnosed with bipolar disorder (BD) and from 11 age- and gender-matched controls. Deep sequencing was performed, with over 350 million reads per specimen. At a false discovery rate of <5\%, we detected five differentially expressed (DE) genes and 12 DE transcripts, most of which have not been previously implicated in BD. Among these, Prominin 1/CD133 and ATP-binding cassette-sub-family G-member2 (ABCG2) have important roles in neuroplasticity. We also show for the first time differential expression of long noncoding RNAs (lncRNAs) in BD. DE transcripts include those of serine/arginine-rich splicing factor 5 (SRSF5) and regulatory factor X4 (RFX4), which along with lncRNAs have a role in mammalian circadian rhythms. The DE genes were significantly enriched for several Gene Ontology categories. Of these, genes involved with GTPase binding were also enriched for BD-associated SNPs from previous genome-wide association studies, suggesting that differential expression of these genes is not simply a consequence of BD or its treatment. Many of these findings were replicated by microarray in an independent sample of 60 cases and controls. These results highlight common pathways for inherited and non-inherited influences on disease risk that may constitute good targets for novel therapies.

}, keywords = {Adult, Aged, Bipolar Disorder, Circadian Rhythm, Female, Genome-Wide Association Study, GTP Phosphohydrolases, HUMANS, Male, Meta-Analysis as Topic, Microarray Analysis, Middle Aged, Neuronal Plasticity, Polymerase Chain Reaction, Prefrontal Cortex, Principal Component Analysis, Sequence Analysis, RNA, Transcriptome, Young Adult}, issn = {1476-5578}, doi = {10.1038/mp.2013.170}, author = {Akula, N and Barb, J and Jiang, X and Wendland, J R and Choi, K H and Sen, S K and Hou, L and Chen, D T W and Laje, G and Johnson, K and Lipska, B K and Kleinman, J E and Corrada-Bravo, H and Detera-Wadleigh, S and Munson, P J and McMahon, F J} } @article {38469, title = {RNA-sequencing of the brain transcriptome implicates dysregulation of neuroplasticity, circadian rhythms and GTPase binding in bipolar disorder}, journal = {Molecular psychiatry}, year = {2014}, note = {http://www.ncbi.nlm.nih.gov/pubmed/24393808?dopt=Abstract}, type = {10.1038/mp.2013.170}, abstract = {RNA-sequencing (RNA-seq) is a powerful technique to investigate the complexity of gene expression in the human brain. We used RNA-seq to survey the brain transcriptome in high-quality postmortem dorsolateral prefrontal cortex from 11 individuals diagnosed with bipolar disorder (BD) and from 11 age- and gender-matched controls. Deep sequencing was performed, with over 350 million reads per specimen. At a false discovery rate of <5\%, we detected five differentially expressed (DE) genes and 12 DE transcripts, most of which have not been previously implicated in BD. Among these, Prominin 1/CD133 and ATP-binding cassette-sub-family G-member2 (ABCG2) have important roles in neuroplasticity. We also show for the first time differential expression of long noncoding RNAs (lncRNAs) in BD. DE transcripts include those of serine/arginine-rich splicing factor 5 (SRSF5) and regulatory factor X4 (RFX4), which along with lncRNAs have a role in mammalian circadian rhythms. The DE genes were significantly enriched for several Gene Ontology categories. Of these, genes involved with GTPase binding were also enriched for BD-associated SNPs from previous genome-wide association studies, suggesting that differential expression of these genes is not simply a consequence of BD or its treatment. Many of these findings were replicated by microarray in an independent sample of 60 cases and controls. These results highlight common pathways for inherited and non-inherited influences on disease risk that may constitute good targets for novel therapies.Molecular Psychiatry advance online publication, 7 January 2014; doi:10.1038/mp.2013.170.}, author = {Akula, N. and Barb, J. and Jiang, X. and Wendland, J. R. and Choi, K. H. and Sen, S. K. and Hou, L. and Chen, D. T. W. and Laje, G. and Johnson, K. and Lipska, B. K. and Kleinman, J. E. and H{\'e}ctor Corrada Bravo and Detera-Wadleigh, S. and Munson, P. J. and McMahon, F. J.} } @article {49736, title = {Stoichiometry of site-specific lysine acetylation in an entire proteome.}, journal = {J Biol Chem}, volume = {289}, year = {2014}, month = {2014 Aug 1}, pages = {21326-38}, abstract = {

Acetylation of lysine ϵ-amino groups influences many cellular processes and has been mapped to thousands of sites across many organisms. Stoichiometric information of acetylation is essential to accurately interpret biological significance. Here, we developed and employed a novel method for directly quantifying stoichiometry of site-specific acetylation in the entire proteome of Escherichia coli. By coupling isotopic labeling and a novel pairing algorithm, our approach performs an in silico enrichment of acetyl peptides, circumventing the need for immunoenrichment. We investigated the function of the sole NAD(+)-dependent protein deacetylase, CobB, on both site-specific and global acetylation. We quantified 2206 peptides from 899 proteins and observed a wide distribution of acetyl stoichiometry, ranging from less than 1\% up to 98\%. Bioinformatic analysis revealed that metabolic enzymes, which either utilize or generate acetyl-CoA, and proteins involved in transcriptional and translational processes displayed the highest degree of acetylation. Loss of CobB led to increased global acetylation at low stoichiometry sites and induced site-specific changes at high stoichiometry sites, and biochemical analysis revealed altered acetyl-CoA metabolism. Thus, this study demonstrates that sirtuin deacetylase deficiency leads to both site-specific and global changes in protein acetylation stoichiometry, affecting central metabolism.

}, keywords = {Acetylation, Amino Acid Sequence, Bacterial Proteins, Chromatography, High Pressure Liquid, Computational Biology, Escherichia coli, Lysine, Molecular Sequence Data, Proteome, Tandem Mass Spectrometry}, issn = {1083-351X}, doi = {10.1074/jbc.M114.581843}, author = {Baeza, Josue and Dowell, James A and Smallegan, Michael J and Fan, Jing and Amador-Noguez, Daniel and Khan, Zia and Denu, John M} } @article {38142, title = {Can RNA-Seq resolve the rapid radiation of advanced moths and butterflies (Hexapoda: Lepidoptera: Apoditrysia)? An exploratory study}, journal = {PLoS One}, volume = {8}, year = {2013}, type = {10.1371/journal.pone.0082615}, abstract = {

Recent molecular phylogenetic studies of the insect order Lepidoptera have robustly resolved family-level divergences within most superfamilies, and most divergences among the relatively species-poor early-arising superfamilies. In sharp contrast, relationships among the superfamilies of more advanced moths and butterflies that comprise the mega-diverse clade Apoditrysia (ca. 145,000 spp.) remain mostly poorly supported. This uncertainty, in turn, limits our ability to discern the origins, ages and evolutionary consequences of traits hypothesized to promote the spectacular diversification of Apoditrysia. Low support along the apoditrysian "backbone" probably reflects rapid diversification. If so, it may be feasible to strengthen resolution by radically increasing the gene sample, but case studies have been few. We explored the potential of next-generation sequencing to conclusively resolve apoditrysian relationships. We used transcriptome RNA-Seq to generate 1579 putatively orthologous gene sequences across a broad sample of 40 apoditrysians plus four outgroups, to which we added two taxa from previously published data. Phylogenetic analysis of a 46-taxon, 741-gene matrix, resulting from a strict filter that eliminated ortholog groups containing any apparent paralogs, yielded dramatic overall increase in bootstrap support for deeper nodes within Apoditrysia as compared to results from previous and concurrent 19-gene analyses. High support was restricted mainly to the huge subclade Obtectomera broadly defined, in which 11 of 12 nodes subtending multiple superfamilies had bootstrap support of 100\%. The strongly supported nodes showed little conflict with groupings from previous studies, and were little affected by changes in taxon sampling, suggesting that they reflect true signal rather than artifacts of massive gene sampling. In contrast, strong support was seen at only 2 of 11 deeper nodes among the "lower", non-obtectomeran apoditrysians. These represent a much harder phylogenetic problem, for which one path to resolution might include further increase in gene sampling, together with improved orthology assignments.

}, author = {Adam L. Bazinet and Michael P. Cummings and Mitter, Kim T. and Mitter, Charles W.} } @article {49828, title = {Contribution of nucleosome binding preferences and co-occurring DNA sequences to transcription factor binding}, journal = {BMC Genomics}, volume = {14}, year = {2013}, month = {Jan-01-2013}, pages = {428}, issn = {1471-2164}, doi = {10.1186/1471-2164-14-428}, url = {http://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-14-428}, author = {He, Ximiao and Chatterjee, Raghunath and John, Sam and Bravo, Hector and Sathyanarayana, B K and Biddie, Simon C and FitzGerald, Peter C and Stamatoyannopoulos, John A and Hager, Gordon L and Vinson, Charles} } @article {38194, title = {A decision-theory approach to interpretable set analysis for high-dimensional data}, journal = {BiometricsBiometrics}, volume = {69}, year = {2013}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23909925?dopt=Abstract}, type = {10.1111/biom.12060}, abstract = {A key problem in high-dimensional significance analysis is to find pre-defined sets that show enrichment for a statistical signal of interest; the classic example is the enrichment of gene sets for differentially expressed genes. Here, we propose a new decision-theory approach to the analysis of gene sets which focuses on estimating the fraction of non-null variables in a set. We introduce the idea of "atoms," non-overlapping sets based on the original pre-defined set annotations. Our approach focuses on finding the union of atoms that minimizes a weighted average of the number of false discoveries and missed discoveries. We introduce a new false discovery rate for sets, called the atomic false discovery rate (afdr), and prove that the optimal estimator in our decision-theory framework is to threshold the afdr. These results provide a coherent and interpretable framework for the analysis of sets that addresses the key issues of overlapping annotations and difficulty in interpreting p values in both competitive and self-contained tests. We illustrate our method and compare it to a popular existing method using simulated examples, as well as gene-set and brain ROI data analyses.}, author = {Boca, Simina M. and H{\'e}ctor Corrada Bravo and Caffo, Brian and Leek, Jeffrey T. and Parmigiani, Giovanni} } @article {38203, title = {Differential abundance analysis for microbial marker-gene surveys}, journal = {Nature methods}, volume = {10}, year = {2013}, publisher = {Nature Publishing Group}, chapter = {1200}, abstract = {We introduce a methodology to assess differential abundance in sparse high-throughput microbial marker-gene survey data. Our approach, implemented in the metagenomeSeq Bioconductor package, relies on a novel normalization technique and a statistical model that accounts for undersampling{\textemdash}a common feature of large-scale marker-gene studies. Using simulated data and several published microbiota data sets, we show that metagenomeSeq outperforms the tools currently used in this field.}, isbn = {1548-7091}, doi = {10.1038/nmeth.2658}, url = {http://www.nature.com/nmeth/journal/v10/n12/full/nmeth.2658.html}, author = {Joseph N. Paulson and Stine, O. Colin and H{\'e}ctor Corrada Bravo and M. Pop} } @article {38284, title = {Genetic loci associated with delayed clearance of Plasmodium falciparum following artemisinin treatment in Southeast Asia}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {110}, year = {2013}, type = {10.1073/pnas.1211205110}, abstract = {The recent emergence of artemisinin-resistant Plasmodium falciparum malaria in western Cambodia could threaten prospects for malaria elimination. Identification of the genetic basis of resistance would provide tools for molecular surveillance, aiding efforts to contain resistance. Clinical trials of artesunate efficacy were conducted in Bangladesh, in northwestern Thailand near the Myanmar border, and at two sites in western Cambodia. Parasites collected from trial participants were genotyped at 8,079 single nucleotide polymorphisms (SNPs) using a P. falciparum-specific SNP array. Parasite genotypes were examined for signatures of recent positive selection and association with parasite clearance phenotypes to identify regions of the genome associated with artemisinin resistance. Four SNPs on chromosomes 10 (one), 13 (two), and 14 (one) were significantly associated with delayed parasite clearance. The two SNPs on chromosome 13 are in a region of the genome that appears to be under strong recent positive selection in Cambodia. The SNPs on chromosomes 10 and 13 lie in or near genes involved in postreplication repair, a DNA damage-tolerance pathway. Replication and validation studies are needed to refine the location of loci responsible for artemisinin resistance and to understand the mechanism behind it; however, two SNPs on chromosomes 10 and 13 may be useful markers of delayed parasite clearance in surveillance for artemisinin resistance in Southeast Asia.}, author = {Takala-Harrison, Shannon and Clark, Taane G. and Jacob, Christopher G. and Michael P. Cummings and Miotto, Olivo and Dondorp, Arjen M. and Fukuda, Mark M. and Nosten, Francois and Noedl, Harald and Imwong, Mallika and Bethell, Delia and Se, Youry and Lon, Chanthap and Tyner, Stuart D. and Saunders, David L. and Socheat, Duong and Ariey, Frederic and Phyo, Aung Pyae and Starzengruber, Peter and Fuehrer, Hans-Peter and Swoboda, Paul and Stepniewska, Kasia and Flegg, Jennifer and Arze, Cesar and Cerqueira, Gustavo C. and Silva, Joana C. and Ricklefs, Stacy M. and Porcella, Stephen F. and Stephens, Robert M. and Adams, Matthew and Kenefic, Leo J. and Campino, Susana and Auburn, Sarah and Macinnis, Bronwyn and Kwiatkowski, Dominic P. and Su, Xin-Zhuan and White, Nicholas J. and Ringwald, Pascal and Plowe, Christopher V.} } @article {49858, title = {Genome sequence of the attenuated Carbosap vaccine strain of Bacillus anthracis}, journal = {Genome announcements}, volume = {1}, year = {2013}, pages = {e00067{\textendash}12}, author = {Harrington, Robin and Ondov, Brian D and Radune, Diana and Friss, Mary Beth and Klubnik, Joy and Diviak, Lynn and Hnath, Jonathan and Cendrowski, Stephen R and Blank, Thomas E and Karaolis, David and Todd Treangen} } @article {38306, title = {Genome sequencing of four strains of Rickettsia prowazekii, the causative agent of epidemic typhus, including one flying squirrel isolate}, journal = {Genome announcementsGenome announcements}, volume = {1}, year = {2013}, publisher = {American Society for Microbiology}, isbn = {2169-8287}, author = {Bishop-Lilly, Kimberly A. and Ge, Hong and Butani, Amy and Osborne, Brian and Verratti, Kathleen and Mokashi, Vishwesh and Nagarajan, Niranjan and M. Pop and Read, Timothy D. and Richards, Allen L.} } @article {38358, title = {A large-scale, higher-level, molecular phylogenetic study of the insect order Lepidoptera (moths and butterflies)}, journal = {PLoS OnePLoS One}, volume = {8}, year = {2013}, type = {10.1371/journal.pone.0058568}, abstract = {

BACKGROUND: Higher-level relationships within the Lepidoptera, and particularly within the species-rich subclade Ditrysia, are generally not well understood, although recent studies have yielded progress. We present the most comprehensive molecular analysis of lepidopteran phylogeny to date, focusing on relationships among superfamilies.

METHODOLOGY PRINCIPAL FINDINGS: 483 taxa spanning 115 of 124 families were sampled for 19 protein-coding nuclear genes, from which maximum likelihood tree estimates and bootstrap percentages were obtained using GARLI. Assessment of heuristic search effectiveness showed that better trees and higher bootstrap percentages probably remain to be discovered even after 1000 or more search replicates, but further search proved impractical even with grid computing. Other analyses explored the effects of sampling nonsynonymous change only versus partitioned and unpartitioned total nucleotide change; deletion of rogue taxa; and compositional heterogeneity. Relationships among the non-ditrysian lineages previously inferred from morphology were largely confirmed, plus some new ones, with strong support. Robust support was also found for divergences among non-apoditrysian lineages of Ditrysia, but only rarely so within Apoditrysia. Paraphyly for Tineoidea is strongly supported by analysis of nonsynonymous-only signal; conflicting, strong support for tineoid monophyly when synonymous signal was added back is shown to result from compositional heterogeneity. CONCLUSIONS SIGNIFICANCE: Support for among-superfamily relationships outside the Apoditrysia is now generally strong. Comparable support is mostly lacking within Apoditrysia, but dramatically increased bootstrap percentages for some nodes after rogue taxon removal, and concordance with other evidence, strongly suggest that our picture of apoditrysian phylogeny is approximately correct. This study highlights the challenge of finding optimal topologies when analyzing hundreds of taxa. It also shows that some nodes get strong support only when analysis is restricted to nonsynonymous change, while total change is necessary for strong support of others. Thus, multiple types of analyses will be necessary to fully resolve lepidopteran phylogeny.

}, keywords = {Animals, Butterflies, Moths, Phylogeny}, author = {Regier, Jerome C. and Mitter, Charles and Zwick, Andreas and Adam L. Bazinet and Michael P. Cummings and Kawahara, Akito Y. and Sohn, Jae-Cheon and Zwickl, Derrick J. and Cho, Soowon and Davis, Donald R. and Baixeras, Joaquin and Brown, John and Parr, Cynthia and Weller, Susan and Lees, David C. and Mitter, Kim T.} } @article {38509, title = {Somatic alterations contributing to metastasis of a castration-resistant prostate cancer}, journal = {Human mutationHuman mutation}, volume = {34}, year = {2013}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23636849?dopt=Abstract}, type = {10.1002/humu.22346}, abstract = {Metastatic castration-resistant prostate cancer (mCRPC) is a lethal disease, and molecular markers that differentiate indolent from aggressive subtypes are needed. We sequenced the exomes of five metastatic tumors and healthy kidney tissue from an index case with mCRPC to identify lesions associated with disease progression and metastasis. An Ashkenazi Jewish (AJ) germline founder mutation, del185AG in BRCA1, was observed and AJ ancestry was confirmed. Sixty-two somatic variants altered proteins in tumors, including cancer-associated genes, TMPRSS2-ERG, PBRM1, and TET2. The majority (n = 53) of somatic variants were present in all metastases and only a subset (n = 31) was observed in the primary tumor. Integrating tumor next-generation sequencing and DNA copy number showed somatic loss of BRCA1 and TMPRSS2-ERG. We sequenced 19 genes with deleterious mutations in the index case in additional mCRPC samples and detected a frameshift, two somatic missense alterations, tumor loss of heterozygosity, and combinations of germline missense SNPs in TET2. In summary, genetic analysis of metastases from an index case permitted us to infer a chronology for the clonal spread of disease based on sequential accrual of somatic lesions. The role of TET2 in mCRPC deserves additional analysis and may define a subset of metastatic disease.}, author = {Nickerson, Michael L. and Im, Kate M. and Misner, Kevin J. and Tan, Wei and Lou, Hong and Gold, Bert and Wells, David W. and H{\'e}ctor Corrada Bravo and Fredrikson, Karin M. and Harkins, Timothy T. and Milos, Patrice and Zbar, Berton and Linehan, W. Marston and Yeager, Meredith and Andresson, Thorkell and Dean, Michael and Bova, G. Steven} } @article {38529, title = {TIGRFAMs and Genome Properties in 2013}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {41}, year = {2013}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23197656?dopt=Abstract}, type = {10.1093/nar/gks1234}, abstract = {TIGRFAMs, available online at http://www.jcvi.org/tigrfams is a database of protein family definitions. Each entry features a seed alignment of trusted representative sequences, a hidden Markov model (HMM) built from that alignment, cutoff scores that let automated annotation pipelines decide which proteins are members, and annotations for transfer onto member proteins. Most TIGRFAMs models are designated equivalog, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence. Models describing more functionally heterogeneous families are designated subfamily or domain, and assign less specific but more widely applicable annotations. The Genome Properties database, available at http://www.jcvi.org/genome-properties, specifies how computed evidence, including TIGRFAMs HMM results, should be used to judge whether an enzymatic pathway, a protein complex or another type of molecular subsystem is encoded in a genome. TIGRFAMs and Genome Properties content are developed in concert because subsystems reconstruction for large numbers of genomes guides selection of seed alignment sequences and cutoff values during protein family construction. Both databases specialize heavily in bacterial and archaeal subsystems. At present, 4284 models appear in TIGRFAMs, while 628 systems are described by Genome Properties. Content derives both from subsystem discovery work and from biocuration of the scientific literature.}, keywords = {Databases, Protein, Genome, Archaeal, Genome, Bacterial, Genomics, Internet, Markov chains, Molecular Sequence Annotation, Proteins, sequence alignment}, author = {Haft, Daniel H. and J. Selengut and Richter, Roland A. and Harkins, Derek and Basu, Malay K. and Beck, Erin} } @article {49764, title = {TIGRFAMs and Genome Properties in 2013.}, journal = {Nucleic Acids Res}, volume = {41}, year = {2013}, month = {2013 Jan}, pages = {D387-95}, abstract = {

TIGRFAMs, available online at http://www.jcvi.org/tigrfams is a database of protein family definitions. Each entry features a seed alignment of trusted representative sequences, a hidden Markov model (HMM) built from that alignment, cutoff scores that let automated annotation pipelines decide which proteins are members, and annotations for transfer onto member proteins. Most TIGRFAMs models are designated equivalog, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence. Models describing more functionally heterogeneous families are designated subfamily or domain, and assign less specific but more widely applicable annotations. The Genome Properties database, available at http://www.jcvi.org/genome-properties, specifies how computed evidence, including TIGRFAMs HMM results, should be used to judge whether an enzymatic pathway, a protein complex or another type of molecular subsystem is encoded in a genome. TIGRFAMs and Genome Properties content are developed in concert because subsystems reconstruction for large numbers of genomes guides selection of seed alignment sequences and cutoff values during protein family construction. Both databases specialize heavily in bacterial and archaeal subsystems. At present, 4284 models appear in TIGRFAMs, while 628 systems are described by Genome Properties. Content derives both from subsystem discovery work and from biocuration of the scientific literature.

}, keywords = {Databases, Protein, Genome, Archaeal, Genome, Bacterial, Genomics, Internet, Markov chains, Molecular Sequence Annotation, Proteins, sequence alignment}, issn = {1362-4962}, doi = {10.1093/nar/gks1234}, author = {Haft, Daniel H and Selengut, Jeremy D and Richter, Roland A and Harkins, Derek and Basu, Malay K and Beck, Erin} } @article {38128, title = {BEAGLE: An Application Programming Interface and High-Performance Computing Library for Statistical Phylogenetics}, journal = {Systematic BiologySyst BiolSystematic BiologySyst Biol}, volume = {61}, year = {2012}, type = {10.1093/sysbio/syr100}, abstract = {Phylogenetic inference is fundamental to our understanding of most aspects of the origin and evolution of life, and in recent years, there has been a concentration of interest in statistical approaches such as Bayesian inference and maximum likelihood estimation. Yet, for large data sets and realistic or interesting models of evolution, these approaches remain computationally demanding. High-throughput sequencing can yield data for thousands of taxa, but scaling to such problems using serial computing often necessitates the use of nonstatistical or approximate approaches. The recent emergence of graphics processing units (GPUs) provides an opportunity to leverage their excellent floating-point computational performance to accelerate statistical phylogenetic inference. A specialized library for phylogenetic calculation would allow existing software packages to make more effective use of available computer hardware, including GPUs. Adoption of a common library would also make it easier for other emerging computing architectures, such as field programmable gate arrays, to be used in the future. We present BEAGLE, an application programming interface (API) and library for high-performance statistical phylogenetic inference. The API provides a uniform interface for performing phylogenetic likelihood calculations on a variety of compute hardware platforms. The library includes a set of efficient implementations and can currently exploit hardware including GPUs using NVIDIA CUDA, central processing units (CPUs) with Streaming SIMD Extensions and related processor supplementary instruction sets, and multicore CPUs via OpenMP. To demonstrate the advantages of a common API, we have incorporated the library into several popular phylogenetic software packages. The BEAGLE library is free open source software licensed under the Lesser GPL and available from http://beagle-lib.googlecode.com. An example client program is available as public domain software.}, keywords = {Bayesian phylogenetics, gpu, maximum likelihood, parallel computing}, isbn = {1063-5157, 1076-836X}, author = {Ayres, Daniel L. and Darling, Aaron and Zwickl, Derrick J. and Beerli, Peter and Holder, Mark T. and Lewis, Paul O. and Huelsenbeck, John P. and Ronquist, Fredrik and Swofford, David L. and Michael P. Cummings and Rambaut, Andrew and Suchard, Marc A.} } @article {38155, title = {A comparative evaluation of sequence classification programs}, journal = {BMC BioinformaticsBMC Bioinformatics}, volume = {13}, year = {2012}, abstract = {Background A fundamental problem in modern genomics is to taxonomically or functionally classify DNA sequence fragments derived from environmental sampling (i.e., metagenomics). Several different methods have been proposed for doing this effectively and efficiently, and many have been implemented in software. In addition to varying their basic algorithmic approach to classification, some methods screen sequence reads for {\textquoteright}barcoding genes{\textquoteright} like 16S rRNA, or various types of protein-coding genes. Due to the sheer number and complexity of methods, it can be difficult for a researcher to choose one that is well-suited for a particular analysis. Results We divided the very large number of programs that have been released in recent years for solving the sequence classification problem into three main categories based on the general algorithm they use to compare a query sequence against a database of sequences. We also evaluated the performance of the leading programs in each category on data sets whose taxonomic and functional composition is known. Conclusions We found significant variability in classification accuracy, precision, and resource consumption of sequence classification programs when used to analyze various metagenomics data sets. However, we observe some general trends and patterns that will be useful to researchers who use sequence classification programs.}, author = {Adam L. Bazinet and Michael P. Cummings} } @article {49743, title = {Drosophila Src regulates anisotropic apical surface growth to control epithelial tube size.}, journal = {Nat Cell Biol}, volume = {14}, year = {2012}, month = {2012 May}, pages = {518-25}, abstract = {

Networks of epithelial and endothelial tubes are essential for the function of organs such as the lung, kidney and vascular system. The sizes and shapes of these tubes are highly regulated to match their individual functions. Defects in tube size can cause debilitating diseases such as polycystic kidney disease and ischaemia. It is therefore critical to understand how tube dimensions are regulated. Here we identify the tyrosine kinase Src as an instructive regulator of epithelial-tube length in the Drosophila tracheal system. Loss-of-function Src42 mutations shorten tracheal tubes, whereas Src42 overexpression elongates them. Surprisingly, Src42 acts distinctly from known tube-size pathways and regulates both the amount of apical surface growth and, with the conserved formin dDaam, the direction of growth. Quantitative three-dimensional image analysis reveals that Src42- and dDaam-mutant tracheal cells expand more in the circumferential than the axial dimension, resulting in tubes that are shorter in length-but larger in diameter-than wild-type tubes. Thus, Src42 and dDaam control tube dimensions by regulating the direction of anisotropic growth, a mechanism that has not previously been described.

}, keywords = {Animals, Drosophila, Epithelium, src-Family Kinases}, issn = {1476-4679}, doi = {10.1038/ncb2467}, author = {Nelson, Kevin S and Khan, Zia and Moln{\'a}r, Imre and Mih{\'a}ly, J{\'o}zsef and Kaschube, Matthias and Beitel, Greg J} } @article {49553, title = {Drosophila Src regulates anisotropic apical surface growth to control epithelial tube size}, volume = {14}, year = {2012}, month = {Jan-03-2014}, pages = {518 - 525}, issn = {1465-7392}, doi = {10.1038/ncb2467}, url = {http://www.nature.com/doifinder/10.1038/ncb2467}, author = {Nelson, Kevin S. and Khan, Zia and {\'a}r, Imre and {\'a}ly, {\'o}zsef and Kaschube, Matthias and Beitel, Greg J.} } @article {38264, title = {A framework for human microbiome research}, journal = {NatureNature}, volume = {486}, year = {2012}, author = {Meth{\'e}, B. A. and Nelson, K. E. and M. Pop and Creasy, H. H. and Giglio, M. G. and Huttenhower, C. and Gevers, D. and Petrosino, J. F. and Abubucker, S. and Badger, J. H. and others,} } @article {38276, title = {Gene expression anti-profiles as a basis for accurate universal cancer signatures}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {13}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23088656?dopt=Abstract}, type = {10.1186/1471-2105-13-272}, abstract = {BACKGROUND: Early screening for cancer is arguably one of the greatest public health advances over the last fifty years. However, many cancer screening tests are invasive (digital rectal exams), expensive (mammograms, imaging) or both (colonoscopies). This has spurred growing interest in developing genomic signatures that can be used for cancer diagnosis and prognosis. However, progress has been slowed by heterogeneity in cancer profiles and the lack of effective computational prediction tools for this type of data. RESULTS: We developed anti-profiles as a first step towards translating experimental findings suggesting that stochastic across-sample hyper-variability in the expression of specific genes is a stable and general property of cancer into predictive and diagnostic signatures. Using single-chip microarray normalization and quality assessment methods, we developed an anti-profile for colon cancer in tissue biopsy samples. To demonstrate the translational potential of our findings, we applied the signature developed in the tissue samples, without any further retraining or normalization, to screen patients for colon cancer based on genomic measurements from peripheral blood in an independent study (AUC of 0.89). This method achieved higher accuracy than the signature underlying commercially available peripheral blood screening tests for colon cancer (AUC of 0.81). We also confirmed the existence of hyper-variable genes across a range of cancer types and found that a significant proportion of tissue-specific genes are hyper-variable in cancer. Based on these observations, we developed a universal cancer anti-profile that accurately distinguishes cancer from normal regardless of tissue type (ten-fold cross-validation AUC > 0.92). CONCLUSIONS: We have introduced anti-profiles as a new approach for developing cancer genomic signatures that specifically takes advantage of gene expression heterogeneity. We have demonstrated that anti-profiles can be successfully applied to develop peripheral-blood based diagnostics for cancer and used anti-profiles to develop a highly accurate universal cancer signature. By using single-chip normalization and quality assessment methods, no further retraining of signatures developed by the anti-profile approach would be required before their application in clinical settings. Our results suggest that anti-profiles may be used to develop inexpensive and non-invasive universal cancer screening tests.}, keywords = {Area Under Curve, Colonic Neoplasms, Gene Expression Profiling, Genetic Variation, Genomics, HUMANS, Oligonucleotide Array Sequence Analysis, Prognosis, Transcriptome, Tumor Markers, Biological}, author = {H{\'e}ctor Corrada Bravo and Pihur, Vasyl and McCall, Matthew and Irizarry, Rafael A. and Leek, Jeffrey T.} } @article {49740, title = {Global secretome analysis identifies novel mediators of bone metastasis.}, journal = {Cell Res}, volume = {22}, year = {2012}, month = {2012 Sep}, pages = {1339-55}, abstract = {

Bone is the one of the most common sites of distant metastasis of solid tumors. Secreted proteins are known to influence pathological interactions between metastatic cancer cells and the bone stroma. To comprehensively profile secreted proteins associated with bone metastasis, we used quantitative and non-quantitative mass spectrometry to globally analyze the secretomes of nine cell lines of varying bone metastatic ability from multiple species and cancer types. By comparing the secretomes of parental cells and their bone metastatic derivatives, we identified the secreted proteins that were uniquely associated with bone metastasis in these cell lines. We then incorporated bioinformatic analyses of large clinical metastasis datasets to obtain a list of candidate novel bone metastasis proteins of several functional classes that were strongly associated with both clinical and experimental bone metastasis. Functional validation of selected proteins indicated that in vivo bone metastasis can be promoted by high expression of (1) the salivary cystatins CST1, CST2, and CST4; (2) the plasminogen activators PLAT and PLAU; or (3) the collagen functionality proteins PLOD2 and COL6A1. Overall, our study has uncovered several new secreted mediators of bone metastasis and therefore demonstrated that secretome analysis is a powerful method for identification of novel biomarkers and candidate therapeutic targets.

}, keywords = {Animals, Biomarkers, Tumor, Bone Neoplasms, Cell Line, Tumor, Collagen Type VI, Computational Biology, HUMANS, Mass Spectrometry, Mice, Neoplasms, Plasminogen Activators, Procollagen-Lysine, 2-Oxoglutarate 5-Dioxygenase, Proteome, proteomics, Salivary Cystatins}, issn = {1748-7838}, doi = {10.1038/cr.2012.89}, author = {Blanco, Mario Andres and LeRoy, Gary and Khan, Zia and Ale{\v c}kovi{\'c}, Ma{\v s}a and Zee, Barry M and Garcia, Benjamin A and Kang, Yibin} } @article {49549, title = {Global secretome analysis identifies novel mediators of bone metastasis}, volume = {22}, year = {2012}, month = {Dec-09-2012}, pages = {1339 - 1355}, issn = {1001-0602}, doi = {10.1038/cr.2012.89}, url = {http://www.nature.com/doifinder/10.1038/cr.2012.89}, author = {Blanco, Mario Andres and LeRoy, Gary and Khan, Zia and {\v c}kovi{\'c}, {\v s}a and Zee, Barry M and Garcia, Benjamin A and Kang, Yibin} } @article {38333, title = {Identification of Coli Surface Antigen 23, a Novel Adhesin of Enterotoxigenic Escherichia coli}, journal = {Infection and immunityInfection and immunity}, volume = {80}, year = {2012}, publisher = {American Society for Microbiology}, author = {Del Canto, F. and Botkin, D. J. and Valenzuela, P. and Popov, V. and Ruiz-Perez, F. and Nataro, J. P. and Levine, M. M. and Stine, O. C. and M. Pop and Torres, A. G. and others,} } @article {38352, title = {InterPro in 2011: new developments in the family and domain prediction database}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {40}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22096229?dopt=Abstract}, type = {10.1093/nar/gkr948}, abstract = {InterPro (http://www.ebi.ac.uk/interpro/) is a database that integrates diverse information about protein families, domains and functional sites, and makes it freely available to the public via Web-based interfaces and services. Central to the database are diagnostic models, known as signatures, against which protein sequences can be searched to determine their potential function. InterPro has utility in the large-scale analysis of whole genomes and meta-genomes, as well as in characterizing individual protein sequences. Herein we give an overview of new developments in the database and its associated software since 2009, including updates to database content, curation processes and Web and programmatic interfaces.}, keywords = {Databases, Protein, Protein Structure, Tertiary, Proteins, Sequence Analysis, Protein, software, Terminology as Topic, User-Computer Interface}, author = {Hunter, Sarah and Jones, Philip and Mitchell, Alex and Apweiler, Rolf and Attwood, Teresa K. and Bateman, Alex and Bernard, Thomas and Binns, David and Bork, Peer and Burge, Sarah and de Castro, Edouard and Coggill, Penny and Corbett, Matthew and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D. and Fraser, Matthew and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and McMenamin, Conor and Mi, Huaiyu and Mutowo-Muellenet, Prudence and Mulder, Nicola and Natale, Darren and Orengo, Christine and Pesseat, Sebastien and Punta, Marco and Quinn, Antony F. and Rivoire, Catherine and Sangrador-Vegas, Amaia and J. Selengut and Sigrist, Christian J. A. and Scheremetjew, Maxim and Tate, John and Thimmajanarthanan, Manjulapramila and Thomas, Paul D. and Wu, Cathy H. and Yeats, Corin and Yong, Siew-Yit} } @article {49765, title = {InterPro in 2011: new developments in the family and domain prediction database.}, journal = {Nucleic Acids Res}, volume = {40}, year = {2012}, month = {2012 Jan}, pages = {D306-12}, abstract = {

InterPro (http://www.ebi.ac.uk/interpro/) is a database that integrates diverse information about protein families, domains and functional sites, and makes it freely available to the public via Web-based interfaces and services. Central to the database are diagnostic models, known as signatures, against which protein sequences can be searched to determine their potential function. InterPro has utility in the large-scale analysis of whole genomes and meta-genomes, as well as in characterizing individual protein sequences. Herein we give an overview of new developments in the database and its associated software since 2009, including updates to database content, curation processes and Web and programmatic interfaces.

}, keywords = {Databases, Protein, Protein Structure, Tertiary, Proteins, Sequence Analysis, Protein, software, Terminology as Topic, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkr948}, author = {Hunter, Sarah and Jones, Philip and Mitchell, Alex and Apweiler, Rolf and Attwood, Teresa K and Bateman, Alex and Bernard, Thomas and Binns, David and Bork, Peer and Burge, Sarah and de Castro, Edouard and Coggill, Penny and Corbett, Matthew and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D and Fraser, Matthew and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and McMenamin, Conor and Mi, Huaiyu and Mutowo-Muellenet, Prudence and Mulder, Nicola and Natale, Darren and Orengo, Christine and Pesseat, Sebastien and Punta, Marco and Quinn, Antony F and Rivoire, Catherine and Sangrador-Vegas, Amaia and Selengut, Jeremy D and Sigrist, Christian J A and Scheremetjew, Maxim and Tate, John and Thimmajanarthanan, Manjulapramila and Thomas, Paul D and Wu, Cathy H and Yeats, Corin and Yong, Siew-Yit} } @article {49517, title = {A molecular phylogeny for the leaf-roller moths (Lepidoptera: Tortricidae) and its implications for classification and life history evolution.}, journal = {PloS one}, volume = {7}, year = {2012}, month = {2012}, pages = {e35574}, abstract = {Tortricidae, one of the largest families of microlepidopterans, comprise about 10,000 described species worldwide, including important pests, biological control agents and experimental models. Understanding of tortricid phylogeny, the basis for a predictive classification, is currently provisional. We present the first detailed molecular estimate of relationships across the tribes and subfamilies of Tortricidae, assess its concordance with previous morphological evidence, and re-examine postulated evolutionary trends in host plant use and biogeography.}, author = {Regier, Jerome C and Brown, John W and Mitter, Charles and Baixeras, Joaquin and Cho, Soowon and Michael P. Cummings and Zwick, Andreas} } @article {38412, title = {Occurrence of protozoans \& their limnological relationships in some ponds of Mathbaria, Bangladesh}, journal = {University Journal of Zoology, Rajshahi UniversityUniversity Journal of Zoology, Rajshahi University}, volume = {29}, year = {2012}, isbn = {1023-6104}, author = {Mozumder, P. K. and Banu, M. A. and Naser, M. N. and Ali, M. S. and Alam, M. and Sack, R. B. and Rita R. Colwell and Huq, A.} } @article {38421, title = {The partitioned LASSO-patternsearch algorithm with application to gene expression data}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {13}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22587526?dopt=Abstract}, type = {10.1186/1471-2105-13-98}, abstract = {BACKGROUND: In systems biology, the task of reverse engineering gene pathways from data has been limited not just by the curse of dimensionality (the interaction space is huge) but also by systematic error in the data. The gene expression barcode reduces spurious association driven by batch effects and probe effects. The binary nature of the resulting expression calls lends itself perfectly to modern regularization approaches that thrive in high-dimensional settings. RESULTS: The Partitioned LASSO-Patternsearch algorithm is proposed to identify patterns of multiple dichotomous risk factors for outcomes of interest in genomic studies. A partitioning scheme is used to identify promising patterns by solving many LASSO-Patternsearch subproblems in parallel. All variables that survive this stage proceed to an aggregation stage where the most significant patterns are identified by solving a reduced LASSO-Patternsearch problem in just these variables. This approach was applied to genetic data sets with expression levels dichotomized by gene expression bar code. Most of the genes and second-order interactions thus selected and are known to be related to the outcomes. CONCLUSIONS: We demonstrate with simulations and data analyses that the proposed method not only selects variables and patterns more accurately, but also provides smaller models with better prediction accuracy, in comparison to several alternative methodologies.}, keywords = {algorithms, Breast Neoplasms, Computer simulation, Female, Gene expression, Gene Expression Profiling, Genomics, HUMANS, Models, Genetic}, author = {Shi, Weiliang and Wahba, Grace and Irizarry, Rafael A. and H{\'e}ctor Corrada Bravo and Wright, Stephen J.} } @article {49531, title = {Plasmodium falciparum merozoite surface protein 1 blocks the proinflammatory protein S100P.}, volume = {109}, year = {2012}, month = {2012 Apr 3}, pages = {5429-34}, abstract = {

The malaria parasite, Plasmodium falciparum, and the human immune system have coevolved to ensure that the parasite is not eliminated and reinfection is not resisted. This relationship is likely mediated through a myriad of host-parasite interactions, although surprisingly few such interactions have been identified. Here we show that the 33-kDa fragment of P. falciparum merozoite surface protein 1 (MSP1(33)), an abundant protein that is shed during red blood cell invasion, binds to the proinflammatory protein, S100P. MSP1(33) blocks S100P-induced NFκB activation in monocytes and chemotaxis in neutrophils. Remarkably, S100P binds to both dimorphic alleles of MSP1, estimated to have diverged >27 Mya, suggesting an ancient, conserved relationship between these parasite and host proteins that may serve to attenuate potentially damaging inflammatory responses.

}, keywords = {Amino Acid Sequence, Animals, Calcium-Binding Proteins, Chromatography, Gel, Electrophoresis, Polyacrylamide Gel, Enzyme-Linked Immunosorbent Assay, HUMANS, Merozoite Surface Protein 1, Microscopy, Confocal, Molecular Sequence Data, Neoplasm Proteins, Plasmodium falciparum, Sequence Homology, Amino Acid, Surface Plasmon Resonance}, issn = {1091-6490}, doi = {10.1073/pnas.1202689109}, author = {Waisberg, Michael and Cerqueira, Gustavo C and Yager, Stephanie B and Francischetti, Ivo M B and Lu, Jinghua and Gera, Nidhi and Srinivasan, Prakash and Miura, Kazutoyo and Rada, Balazs and Lukszo, Jan and Barbian, Kent D and Leto, Thomas L and Porcella, Stephen F and Narum, David L and El-Sayed, Najib and Miller, Louis H and Pierce, Susan K} } @article {49547, title = {Quantitative measurement of allele-specific protein expression in a diploid yeast hybrid by LC-MS}, journal = {Molecular Systems Biology}, volume = {8}, year = {2012}, month = {Feb-08-2013}, doi = {10.1038/msb.2012.34}, url = {http://msb.embopress.org/cgi/doi/10.1038/msb.2012.34}, author = {Khan, Zia and Bloom, Joshua S and Amini, Sasan and Singh, Mona and Perlman, David H and Caudy, Amy A and Kruglyak, Leonid} } @article {49548, title = {Quantitative measurement of allele-specific protein expression in a diploid yeast hybrid by LC-MS.}, volume = {8}, year = {2012}, month = {2012}, pages = {602}, abstract = {

Understanding the genetic basis of gene regulatory variation is a key goal of evolutionary and medical genetics. Regulatory variation can act in an allele-specific manner (cis-acting) or it can affect both alleles of a gene (trans-acting). Differential allele-specific expression (ASE), in which the expression of one allele differs from another in a diploid, implies the presence of cis-acting regulatory variation. While microarrays and high-throughput sequencing have enabled genome-wide measurements of transcriptional ASE, methods for measurement of protein ASE (pASE) have lagged far behind. We describe a flexible, accurate, and scalable strategy for measurement of pASE by liquid chromatography-coupled mass spectrometry (LC-MS). We apply this approach to a hybrid between the yeast species Saccharomyces cerevisiae and Saccharomyces bayanus. Our results provide the first analysis of the relative contribution of cis-acting and trans-acting regulatory differences to protein expression divergence between yeast species.

}, keywords = {Alleles, Chromatography, Liquid, Fungal Proteins, Gene Expression Profiling, Gene Expression Regulation, Fungal, HUMANS, Mass Spectrometry, proteomics, Regression Analysis, Saccharomyces, Saccharomyces cerevisiae, Saccharomyces cerevisiae Proteins, Species Specificity}, issn = {1744-4292}, doi = {10.1038/msb.2012.34}, author = {Khan, Zia and Bloom, Joshua S and Amini, Sasan and Singh, Mona and Perlman, David H and Caudy, Amy A and Kruglyak, Leonid} } @article {38510, title = {Speeding Up Particle Trajectory Simulations under Moving Force Fields using GPUs}, journal = {Journal of Computing and Information Science in EngineeringJournal of Computing and Information Science in Engineering}, year = {2012}, abstract = {In this paper, we introduce a GPU-based framework forsimulating particle trajectories under both static and dynamic force fields. By exploiting the highly parallel nature of the problem and making efficient use of the available hardware, our simulator exhibits a significant speedup over its CPU- based analog. We apply our framework to a specific experi- mental simulation: the computation of trapping probabilities associated with micron-sized silica beads in optical trapping workbenches. When evaluating large numbers of trajectories (4096), we see approximately a 356 times speedup of the GPU-based simulator over its CPU-based counterpart.}, author = {Patro, R. and Dickerson, J. P. and Bista, S. and Gupta, S. K. and Varshney, Amitabh} } @article {38516, title = {Structure, function and diversity of the healthy human microbiome}, journal = {NatureNature}, volume = {486}, year = {2012}, author = {Huttenhower, C. and Gevers, D. and Knight, R. and Abubucker, S. and Badger, J. H. and Chinwalla, A. T. and Creasy, H. H. and Earl, A. M. and Fitzgerald, M. G. and Fulton, R. S. and others,} } @article {38526, title = {Temporal and Spatial Variability in the Distribution of Vibrio vulnificus in the Chesapeake Bay: A Hindcast Study}, journal = {EcoHealthEcoHealth}, year = {2012}, type = {10.1007/s10393-011-0736-4}, abstract = {Vibrio vulnificus, an estuarine bacterium, is the causative agent of seafood-related gastroenteritis, primary septicemia, and wound infections worldwide. It occurs as part of the normal microflora of coastal marine environments and can be isolated from water, sediment, and oysters. Hindcast prediction was undertaken to determine spatial and temporal variability in the likelihood of occurrence of V. vulnificus in surface waters of the Chesapeake Bay. Hindcast predictions were achieved by forcing a multivariate habitat suitability model with simulated sea surface temperature and salinity in the Bay for the period between 1991 and 2005 and the potential hotspots of occurrence of V. vulnificus in the Chesapeake Bay were identified. The likelihood of occurrence of V. vulnificus during high and low rainfall years was analyzed. From results of the study, it is concluded that hindcast prediction yields an improved understanding of environmental conditions associated with occurrence of V. vulnificus in the Chesapeake Bay.}, author = {Banakar, V. and Constantin de Magny, G. and Jacobs, J. and Murtugudde, R. and Huq, A. and J. Wood, R. and Rita R. Colwell} } @article {38566, title = {Vibrio Cholerae Classical Biotype Strains Reveal Distinct Signatures in Mexico}, journal = {Journal of Clinical MicrobiologyJ. Clin. Microbiol.Journal of Clinical MicrobiologyJ. Clin. Microbiol.}, year = {2012}, type = {10.1128/JCM.00189-12}, abstract = {Vibrio cholerae O1 Classical (CL) biotype caused the 5th and 6th, and probably the earlier cholera pandemics, before the El Tor (ET) biotype initiated the 7th pandemic in Asia in the 1970{\textquoteright}s by completely displacing the CL biotype. Although the CL biotype was thought to be extinct in Asia, and it had never been reported from Latin America, V. cholerae CL and ET biotypes, including hybrid ET were found associated with endemic cholera in Mexico between 1991 and 1997. In this study, CL biotype strains isolated from endemic cholera in Mexico, between 1983 and 1997 were characterized in terms of major phenotypic and genetic traits, and compared with CL biotype strains isolated in Bangladesh between 1962 and 1989. According to sero- and bio-typing data, all V. cholerae strains tested had the major phenotypic and genotypic characteristics specific for the CL biotype. Antibiograms revealed the majority of the Bangladeshi strains to be resistant to trimethoprim/sulfamethoxazole, furazolidone, ampicillin, and gentamycin, while the Mexican strains were sensitive to all of these drugs, as well as to ciprofloxacin, erythromycin, and tetracycline. Pulsed-field gel electrophoresis (PFGE) of NotI-digested genomic DNA revealed characteristic banding patterns for all the CL biotype strains, although the Mexican strains differed with the Bangladeshi strains in 1-2 DNA bands. The difference may be subtle, but consistent, as confirmed by the sub-clustering patterns in the PFGE-based dendrogram, and can serve as regional signature, suggesting pre-1991 existence and evolution of the CL biotype strains in the Americas, independent from that of Asia.}, isbn = {0095-1137, 1098-660X}, author = {Alam, Munirul and Islam, M. Tarequl and Rashed, Shah Manzur and Johura, Fatema-Tuz and Bhuiyan, Nurul A. and Delgado, Gabriela and Morales, Rosario and Mendez, Jose Luis and Navarro, Armando and Watanabe, Haruo and Hasan, Nur- A. and Rita R. Colwell and Cravioto, Alejandro} } @article {49554, title = {Accurate proteome-wide protein quantification from high-resolution 15N mass spectra}, volume = {12}, year = {2011}, month = {Jan-01-2011}, pages = {R122}, issn = {1465-6906}, doi = {10.1186/gb-2011-12-12-r122}, url = {http://genomebiology.com/2012/12/12/R122}, author = {Khan, Zia and Amini, Sasan and Bloom, Joshua S and Ruse, Cristian and Caudy, Amy A and Kruglyak, Leonid and Singh, Mona and Perlman, David H and Tavazoie, Saeed} } @article {49744, title = {Accurate proteome-wide protein quantification from high-resolution 15N mass spectra.}, journal = {Genome Biol}, volume = {12}, year = {2011}, month = {2011}, pages = {R122}, abstract = {

In quantitative mass spectrometry-based proteomics, the metabolic incorporation of a single source of 15N-labeled nitrogen has many advantages over using stable isotope-labeled amino acids. However, the lack of a robust computational framework for analyzing the resulting spectra has impeded wide use of this approach. We have addressed this challenge by introducing a new computational methodology for analyzing 15N spectra in which quantification is integrated with identification. Application of this method to an Escherichia coli growth transition reveals significant improvement in quantification accuracy over previous methods.

}, keywords = {algorithms, Amino Acid Sequence, Bacterial Proteins, Escherichia coli, Isotope Labeling, Mass Spectrometry, Molecular Sequence Data, Nitrogen Isotopes, Proteome, proteomics, Sensitivity and Specificity, software}, issn = {1474-760X}, doi = {10.1186/gb-2011-12-12-r122}, author = {Khan, Zia and Amini, Sasan and Bloom, Joshua S and Ruse, Cristian and Caudy, Amy A and Kruglyak, Leonid and Singh, Mona and Perlman, David H and Tavazoie, Saeed} } @article {38125, title = {Bacillus anthracis comparative genome analysis in support of the Amerithrax investigation}, journal = {Proceedings of the National Academy of SciencesProceedings of the National Academy of Sciences}, volume = {108}, year = {2011}, publisher = {National Acad Sciences}, author = {Rasko, D. A. and Worsham, P. L. and Abshire, T. G. and Stanley, S. T. and Bannan, J. D. and Wilson, M. R. and Langham, R. J. and Decker, R. S. and Jiang, L. and Read, T. D. and others,} } @article {38141, title = {Can Deliberately Incomplete Gene Sample Augmentation Improve a Phylogeny Estimate for the Advanced Moths and Butterflies (Hexapoda: Lepidoptera)?}, journal = {Systematic BiologySyst BiolSystematic BiologySyst Biol}, volume = {60}, year = {2011}, type = {10.1093/sysbio/syr079}, abstract = {This paper addresses the question of whether one can economically improve the robustness of a molecular phylogeny estimate by increasing gene sampling in only a subset of taxa, without having the analysis invalidated by artifacts arising from large blocks of missing data. Our case study stems from an ongoing effort to resolve poorly understood deeper relationships in the large clade Ditrysia ( > 150,000 species) of the insect order Lepidoptera (butterflies and moths). Seeking to remedy the overall weak support for deeper divergences in an initial study based on five nuclear genes (6.6 kb) in 123 exemplars, we nearly tripled the total gene sample (to 26 genes, 18.4 kb) but only in a third (41) of the taxa. The resulting partially augmented data matrix (45\% intentionally missing data) consistently increased bootstrap support for groupings previously identified in the five-gene (nearly) complete matrix, while introducing no contradictory groupings of the kind that missing data have been predicted to produce. Our results add to growing evidence that data sets differing substantially in gene and taxon sampling can often be safely and profitably combined. The strongest overall support for nodes above the family level came from including all nucleotide changes, while partitioning sites into sets undergoing mostly nonsynonymous versus mostly synonymous change. In contrast, support for the deepest node for which any persuasive molecular evidence has yet emerged (78{\textendash}85\% bootstrap) was weak or nonexistent unless synonymous change was entirely excluded, a result plausibly attributed to compositional heterogeneity. This node (Gelechioidea + Apoditrysia), tentatively proposed by previous authors on the basis of four morphological synapomorphies, is the first major subset of ditrysian superfamilies to receive strong statistical support in any phylogenetic study. A {\textquotedblleft}more-genes-only{\textquotedblright} data set (41 taxa{\texttimes}26 genes) also gave strong signal for a second deep grouping (Macrolepidoptera) that was obscured, but not strongly contradicted, in more taxon-rich analyses.}, keywords = {Ditrysia, gene sampling, Hexapoda, Lepidoptera, missing data, molecular phylogenetics, nuclear genes, taxon sampling}, isbn = {1063-5157, 1076-836X}, author = {Cho, Soowon and Zwick, Andreas and Regier, Jerome C. and Mitter, Charles and Michael P. Cummings and Yao, Jianxiu and Du, Zaile and Zhao, Hong and Kawahara, Akito Y. and Weller, Susan and Davis, Donald R. and Baixeras, Joaquin and Brown, John W. and Parr, Cynthia} } @article {38151, title = {Clonal transmission, dual peak, and off-season cholera in Bangladesh}, journal = {Infection Ecology \& EpidemiologyInfection Ecology \& Epidemiology}, volume = {1}, year = {2011}, type = {10.3402/iee.v1i0.7273}, author = {Alam, M. and Islam, A. and Bhuiyan, N. A. and Rahim, N. and Hossain, A. and Khan, G. Y. and Ahmed, D. and Watanabe, H. and Izumiya, H. and Faruque, A. S. G. and Rita R. Colwell} } @proceedings {38176, title = {Computing the Tree of Life: Leveraging the Power of Desktop and Service Grids}, year = {2011}, month = {2011}, type = {10.1109/IPDPS.2011.344}, abstract = {The trend in life sciences research, particularly in molecular evolutionary systematics, is toward larger data sets and ever-more detailed evolutionary models, which can generate substantial computational loads. Over the past several years we have developed a grid computing system aimed at providing researchers the computational power needed to complete such analyses in a timely manner. Our grid system, known as The Lattice Project, was the first to combine two models of grid computing - the service model, which mainly federates large institutional HPC resources, and the desktop model, which harnesses the power of PCs volunteered by the general public. Recently we have developed a "science portal" style web interface that makes it easier than ever for phylogenetic analyses to be completed using GARLI, a popular program that uses a maximum likelihood method to infer the evolutionary history of organisms on the basis of genetic sequence data. This paper describes our approach to scheduling thousands of GARLI jobs with diverse requirements to heterogeneous grid resources, which include volunteer computers running BOINC software. A key component of this system provides a priori GARLI runtime estimates using machine learning with random forests.}, keywords = {(artificial, (mathematics), analysis, BOINC, COMPUTATION, computational, computing, data, Estimation, evolutionary, GARLI, genetic, Grid, GRIDS, handling, heterogeneous, History, HPC, information, intelligence), interface, interfaces, Internet, jobs, lattice, learning, life, likelihood, load, machine, maximum, method, model, molecular, phylogenetic, portal, Portals, power, project, resource, Science, sequence, service, services, sets, software, substantial, system, systematics, tree, TREES, user, Web}, author = {Adam L. Bazinet and Michael P. Cummings} } @article {49556, title = {Direct targeting of Sec23a by miR-200s influences cancer cell secretome and promotes metastatic colonization}, volume = {17}, year = {2011}, month = {Jul-08-2011}, pages = {1101 - 1108}, issn = {1078-8956}, doi = {10.1038/nm.2401}, url = {http://www.nature.com/doifinder/10.1038/nm.2401}, author = {Korpal, Manav and Ell, Brian J and Buffa, Francesca M and Ibrahim, Toni and Blanco, Mario A and {\`a}-Terrassa, Toni and Mercatali, Laura and Khan, Zia and Goodarzi, Hani and Hua, Yuling and Wei, Yong and Hu, Guohong and Garcia, Benjamin A and Ragoussis, Jiannis and Amadori, Dino and Harris, Adrian L and Kang, Yibin} } @article {49746, title = {Direct targeting of Sec23a by miR-200s influences cancer cell secretome and promotes metastatic colonization.}, journal = {Nat Med}, volume = {17}, year = {2011}, month = {2011 Sep}, pages = {1101-8}, abstract = {

Although the role of miR-200s in regulating E-cadherin expression and epithelial-to-mesenchymal transition is well established, their influence on metastatic colonization remains controversial. Here we have used clinical and experimental models of breast cancer metastasis to discover a pro-metastatic role of miR-200s that goes beyond their regulation of E-cadherin and epithelial phenotype. Overexpression of miR-200s is associated with increased risk of metastasis in breast cancer and promotes metastatic colonization in mouse models, phenotypes that cannot be recapitulated by E-cadherin expression alone. Genomic and proteomic analyses revealed global shifts in gene expression upon miR-200 overexpression toward that of highly metastatic cells. miR-200s promote metastatic colonization partly through direct targeting of Sec23a, which mediates secretion of metastasis-suppressive proteins, including Igfbp4 and Tinagl1, as validated by functional and clinical correlation studies. Overall, these findings suggest a pleiotropic role of miR-200s in promoting metastatic colonization by influencing E-cadherin-dependent epithelial traits and Sec23a-mediated tumor cell secretome.

}, keywords = {Animals, Cadherins, Cell Line, Tumor, Female, Gene Expression Profiling, Gene Expression Regulation, Neoplastic, HUMANS, Mass Spectrometry, Mice, Mice, Inbred BALB C, Microarray Analysis, MicroRNAs, Neoplasm Metastasis, Statistics, Nonparametric, Vesicular Transport Proteins}, issn = {1546-170X}, doi = {10.1038/nm.2401}, author = {Korpal, Manav and Ell, Brian J and Buffa, Francesca M and Ibrahim, Toni and Blanco, Mario A and Celi{\`a}-Terrassa, Toni and Mercatali, Laura and Khan, Zia and Goodarzi, Hani and Hua, Yuling and Wei, Yong and Hu, Guohong and Garcia, Benjamin A and Ragoussis, Jiannis and Amadori, Dino and Harris, Adrian L and Kang, Yibin} } @article {49829, title = {Effective detection of rare variants in pooled DNA samples using Cross-pool tailcurve analysis}, journal = {Genome Biology}, volume = {12}, year = {2011}, month = {Jan-01-2011}, pages = {R93}, issn = {1465-6906}, doi = {10.1186/gb-2011-12-9-r93}, url = {http://genomebiology.biomedcentral.com/articles/10.1186/gb-2011-12-9-r93}, author = {Niranjan, Tejasvi S and Adamczyk, Abby and Bravo, Hector and Taub, Margaret A and Wheelan, Sarah J and Irizarry, Rafael and Wang, Tao} } @article {49831, title = {Increased methylation variation in epigenetic domains across cancer types}, journal = {Nature Genetics}, volume = {43}, year = {2011}, month = {Feb-06-2013}, pages = {768 - 775}, issn = {1061-4036}, doi = {10.1038/ng.865}, url = {http://www.nature.com/doifinder/10.1038/ng.865}, author = {Hansen, Kasper Daniel and Timp, Winston and Bravo, H{\'e}ctor Corrada and Sabunciyan, Sarven and Langmead, Benjamin and McDonald, Oliver G and Wen, Bo and Wu, Hao and Liu, Yun and Diep, Dinh and Briem, Eirikur and Zhang, Kun and Irizarry, Rafael A and Feinberg, Andrew P} } @article {38347, title = {Influence of host gene transcription level and orientation on HIV-1 latency in a primary-cell model}, journal = {Journal of virologyJournal of virology}, volume = {85}, year = {2011}, note = {http://www.ncbi.nlm.nih.gov/pubmed/21430059?dopt=Abstract}, type = {10.1128/JVI.02536-10}, abstract = {Human immunodeficiency virus type 1 (HIV-1) establishes a latent reservoir in resting memory CD4(+) T cells. This latent reservoir is a major barrier to the eradication of HIV-1 in infected individuals and is not affected by highly active antiretroviral therapy (HAART). Reactivation of latent HIV-1 is a possible strategy for elimination of this reservoir. The mechanisms with which latency is maintained are unclear. In the analysis of the regulation of HIV-1 gene expression, it is important to consider the nature of HIV-1 integration sites. In this study, we analyzed the integration and transcription of latent HIV-1 in a primary CD4(+) T cell model of latency. The majority of integration sites in latently infected cells were in introns of transcription units. Serial analysis of gene expression (SAGE) demonstrated that more than 90\% of those host genes harboring a latent integrated provirus were transcriptionally active, mostly at high levels. For latently infected cells, we observed a modest preference for integration in the same transcriptional orientation as the host gene (63.8\% versus 36.2\%). In contrast, this orientation preference was not observed in acutely infected or persistently infected cells. These results suggest that transcriptional interference may be one of the important factors in the establishment and maintenance of HIV-1 latency. Our findings suggest that disrupting the negative control of HIV-1 transcription by upstream host promoters could facilitate the reactivation of latent HIV-1 in some resting CD4(+) T cells.}, keywords = {CD4-Positive T-Lymphocytes, Cells, Cultured, Gene Expression Profiling, Gene Expression Regulation, Viral, HIV-1, HUMANS, Transcription, Genetic, Virus Integration, Virus Latency}, author = {Shan, Liang and Yang, Hung-Chih and Rabi, S. Alireza and H{\'e}ctor Corrada Bravo and Shroff, Neeta S. and Irizarry, Rafael A. and Zhang, Hao and Margolick, Joseph B. and Siliciano, Janet D. and Siliciano, Robert F.} } @article {49832, title = {Influence of Host Gene Transcription Level and Orientation on HIV-1 Latency in a Primary-Cell Model}, journal = {Journal of Virology}, volume = {85}, year = {2011}, month = {Jan-06-2011}, pages = {5384 - 5393}, issn = {0022-538X}, doi = {10.1128/JVI.02536-10}, url = {http://jvi.asm.org/cgi/doi/10.1128/JVI.02536-10https://syndication.highwire.org/content/doi/10.1128/JVI.02536-10}, author = {Shan, L. and Yang, H.-C. and Rabi, S. A. and Bravo, H. C. and Shroff, N. S. and Irizarry, R. A. and Zhang, H. and Margolick, J. B. and Siliciano, J. D. and Siliciano, R. F.} } @article {38362, title = {Long-term effects of ocean warming on the prokaryotic community: evidence from the vibrios}, journal = {The ISME JournalThe ISME journal}, volume = {6}, year = {2011}, type = {10.1038/ismej.2011.89}, abstract = {The long-term effects of ocean warming on prokaryotic communities are unknown because of lack of historical data. We overcame this gap by applying a retrospective molecular analysis to the bacterial community on formalin-fixed samples from the historical Continuous Plankton Recorder archive, which is one of the longest and most geographically extensive collections of marine biological samples in the world. We showed that during the last half century, ubiquitous marine bacteria of the Vibrio genus, including Vibrio cholerae, increased in dominance within the plankton-associated bacterial community of the North Sea, where an unprecedented increase in bathing infections related to these bacteria was recently reported. Among environmental variables, increased sea surface temperature explained 45\% of the variance in Vibrio data, supporting the view that ocean warming is favouring the spread of vibrios and may be the cause of the globally increasing trend in their associated diseases.}, keywords = {ecophysiology, ecosystems, environmental biotechnology, geomicrobiology, ISME J, microbe interactions, microbial communities, microbial ecology, microbial engineering, microbial epidemiology, microbial genomics, microorganisms}, isbn = {1751-7362}, author = {Vezzulli, Luigi and Brettar, Ingrid and Pezzati, Elisabetta and Reid, Philip C. and Rita R. Colwell and H{\"o}fle, Manfred G. and Pruzzo, Carla} } @proceedings {38367, title = {MDMap: A system for data-driven layout and exploration of molecular dynamics simulations}, year = {2011}, month = {2011}, type = {10.1109/BioVis.2011.6094055}, abstract = {Contemporary molecular dynamics simulations result in a glut of simulation data, making analysis and discovery a difficult and burdensome task. We present MDMap, a system designed to summarize long-running molecular dynamics (MD) simulations. We represent a molecular dynamics simulation as a state transition graph over a set of intermediate (stable and semi-stable) states. The transitions amongst the states together with their frequencies represent the flow of a biomolecule through the trajectory space. MDMap automatically determines potential intermediate conformations and the transitions amongst them by analyzing the conformational space explored by the MD simulation. MDMap is an automated system to visualize MD simulations as state-transition diagrams, and can replace the current tedious manual layouts of biomolecular folding landscapes with an automated tool. The layout of the representative states and the corresponding transitions among them is presented to the user as a visual synopsis of the long-running MD simulation. We compare and contrast multiple presentations of the state transition diagrams, such as conformational embedding, and spectral, hierarchical, and force-directed graph layouts. We believe this system could provide a road-map for the visualization of other stochastic time-varying simulations in a variety of different domains.}, keywords = {Biology, biomolecular, computing, data, digital, driven, DYNAMICS, exploration, folding, graph, landscapes, Layout, MDMap, method, molecular, processes, simulation, Simulations, space, state, Stochastic, THEORY, time-varying, Trajectory, transition}, author = {Patro, R. and Ip, Cheuk Yiu and Bista, S. and Cho, S. S. and Thirumalai, D. and Varshney, Amitabh} } @article {49830, title = {A Model for Early Prediction of Facial Nerve Recovery After Vestibular Schwannoma Surgery}, journal = {Otology \& Neurotology}, volume = {32}, year = {2011}, month = {Jan-01-2011}, pages = {826 - 833}, issn = {1531-7129}, doi = {10.1097/MAO.0b013e31821b0afd}, url = {http://content.wkhealth.com/linkback/openurl?sid=WKPTLP:landingpage\&an=00129492-201107000-00019}, author = {Rivas, Alejandro and Boahene, Kofi D. and Bravo, H{\'e}ctor Corrada and Tan, Marietta and Tamargo, Rafael J. and Francis, Howard W.} } @article {38452, title = {ProPhylo: partial phylogenetic profiling to guide protein family construction and assignment of biological process}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {12}, year = {2011}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22070167?dopt=Abstract}, type = {10.1186/1471-2105-12-434}, abstract = {BACKGROUND: Phylogenetic profiling is a technique of scoring co-occurrence between a protein family and some other trait, usually another protein family, across a set of taxonomic groups. In spite of several refinements in recent years, the technique still invites significant improvement. To be its most effective, a phylogenetic profiling algorithm must be able to examine co-occurrences among protein families whose boundaries are uncertain within large homologous protein superfamilies. RESULTS: Partial Phylogenetic Profiling (PPP) is an iterative algorithm that scores a given taxonomic profile against the taxonomic distribution of families for all proteins in a genome. The method works through optimizing the boundary of each protein family, rather than by relying on prebuilt protein families or fixed sequence similarity thresholds. Double Partial Phylogenetic Profiling (DPPP) is a related procedure that begins with a single sequence and searches for optimal granularities for its surrounding protein family in order to generate the best query profiles for PPP. We present ProPhylo, a high-performance software package for phylogenetic profiling studies through creating individually optimized protein family boundaries. ProPhylo provides precomputed databases for immediate use and tools for manipulating the taxonomic profiles used as queries. CONCLUSION: ProPhylo results show universal markers of methanogenesis, a new DNA phosphorothioation-dependent restriction enzyme, and efficacy in guiding protein family construction. The software and the associated databases are freely available under the open source Perl Artistic License from ftp://ftp.jcvi.org/pub/data/ppp/.}, keywords = {algorithms, Archaea, Archaeal Proteins, DNA, Methane, Phylogeny, software}, author = {Basu, Malay K. and J. Selengut and Haft, Daniel H.} } @article {49777, title = {ProPhylo: partial phylogenetic profiling to guide protein family construction and assignment of biological process.}, journal = {BMC Bioinformatics}, volume = {12}, year = {2011}, month = {2011}, pages = {434}, abstract = {

BACKGROUND: Phylogenetic profiling is a technique of scoring co-occurrence between a protein family and some other trait, usually another protein family, across a set of taxonomic groups. In spite of several refinements in recent years, the technique still invites significant improvement. To be its most effective, a phylogenetic profiling algorithm must be able to examine co-occurrences among protein families whose boundaries are uncertain within large homologous protein superfamilies.

RESULTS: Partial Phylogenetic Profiling (PPP) is an iterative algorithm that scores a given taxonomic profile against the taxonomic distribution of families for all proteins in a genome. The method works through optimizing the boundary of each protein family, rather than by relying on prebuilt protein families or fixed sequence similarity thresholds. Double Partial Phylogenetic Profiling (DPPP) is a related procedure that begins with a single sequence and searches for optimal granularities for its surrounding protein family in order to generate the best query profiles for PPP. We present ProPhylo, a high-performance software package for phylogenetic profiling studies through creating individually optimized protein family boundaries. ProPhylo provides precomputed databases for immediate use and tools for manipulating the taxonomic profiles used as queries.

CONCLUSION: ProPhylo results show universal markers of methanogenesis, a new DNA phosphorothioation-dependent restriction enzyme, and efficacy in guiding protein family construction. The software and the associated databases are freely available under the open source Perl Artistic License from ftp://ftp.jcvi.org/pub/data/ppp/.

}, keywords = {algorithms, Archaea, Archaeal Proteins, DNA, Methane, Phylogeny, software}, issn = {1471-2105}, doi = {10.1186/1471-2105-12-434}, author = {Basu, Malay K and Selengut, Jeremy D and Haft, Daniel H} } @article {38508, title = {Social Snapshot: A System for Temporally Coupled Social Photography}, journal = {Computer Graphics and Applications, IEEEComputer Graphics and Applications, IEEE}, volume = {31}, year = {2011}, type = {10.1109/MCG.2010.107}, abstract = {Social Snapshot actively acquires and reconstructs temporally dynamic data. The system enables spatiotemporal 3D photography using commodity devices, assisted by their auxiliary sensors and network functionality. It engages users, making them active rather than passive participants in data acquisition.}, keywords = {3D, ACQUISITION, computing, coupled, data, Photography, reconstruction, sciences, snapshot, social, spatiotemporal, temporally}, isbn = {0272-1716}, author = {Patro, R. and Ip, Cheuk Yiu and Bista, S. and Varshney, Amitabh} } @article {38524, title = {Temperature regulation of virulence factors in the pathogen Vibrio coralliilyticus}, journal = {The ISME JournalThe ISME journal}, volume = {6}, year = {2011}, type = {10.1038/ismej.2011.154}, abstract = {Sea surface temperatures (SST) are rising because of global climate change. As a result, pathogenic Vibrio species that infect humans and marine organisms during warmer summer months are of growing concern. Coral reefs, in particular, are already experiencing unprecedented degradation worldwide due in part to infectious disease outbreaks and bleaching episodes that are exacerbated by increasing SST. For example, Vibrio coralliilyticus, a globally distributed bacterium associated with multiple coral diseases, infects corals at temperatures above 27 {\textdegree}C. The mechanisms underlying this temperature-dependent pathogenicity, however, are unknown. In this study, we identify potential virulence mechanisms using whole genome sequencing of V. coralliilyticus ATCC (American Type Culture Collection) BAA-450. Furthermore, we demonstrate direct temperature regulation of numerous virulence factors using proteomic analysis and bioassays. Virulence factors involved in motility, host degradation, secretion, antimicrobial resistance and transcriptional regulation are upregulated at the higher virulent temperature of 27 {\textdegree}C, concurrent with phenotypic changes in motility, antibiotic resistance, hemolysis, cytotoxicity and bioluminescence. These results provide evidence that temperature regulates multiple virulence mechanisms in V. coralliilyticus, independent of abundance. The ecological and biological significance of this temperature-dependent virulence response is reinforced by climate change models that predict tropical SST to consistently exceed 27 {\textdegree}C during the spring, summer and fall seasons. We propose V. coralliilyticus as a model Gram-negative bacterium to study temperature-dependent pathogenicity in Vibrio-related diseases.}, keywords = {ecophysiology, ecosystems, environmental biotechnology, geomicrobiology, ISME J, microbe interactions, microbial communities, microbial ecology, microbial engineering, microbial epidemiology, microbial genomics, microorganisms}, isbn = {1751-7362}, author = {Kimes, Nikole E. and Grim, Christopher J. and Johnson, Wesley R. and Hasan, Nur A. and Tall, Ben D. and Kothary, Mahendra H. and Kiss, Hajnalka and Munk, A. Christine and Tapia, Roxanne and Green, Lance and Detter, Chris and Bruce, David C. and Brettin, Thomas S. and Rita R. Colwell and Morris, Pamela J.} } @article {38158, title = {Comparative genomic analysis reveals evidence of two novel Vibrio species closely related to V. cholerae}, journal = {BMC MicrobiologyBMC Microbiology}, volume = {10}, year = {2010}, abstract = {In recent years genome sequencing has been used to characterize new bacterial species, a method of analysis available as a result of improved methodology and reduced cost. Included in a constantly expanding list of Vibrio species are several that have been reclassified as novel members of the Vibrionaceae. The description of two putative new Vibrio species, Vibrio sp. RC341 and Vibrio sp. RC586 for which we propose the names V. metecus and V. parilis, respectively, previously characterized as non-toxigenic environmental variants of V. cholerae is presented in this study. Results Based on results of whole-genome average nucleotide identity (ANI), average amino acid identity (AAI), rpoB similarity, MLSA, and phylogenetic analysis, the new species are concluded to be phylogenetically closely related to V. cholerae and V. mimicus. Vibrio sp. RC341 and Vibrio sp. RC586 demonstrate features characteristic of V. cholerae and V. mimicus, respectively, on differential and selective media, but their genomes show a 12 to 15\% divergence (88 to 85\% ANI and 92 to 91\% AAI) compared to the sequences of V. cholerae and V. mimicus genomes (ANI <95\% and AAI <96\% indicative of separate species). Vibrio sp. RC341 and Vibrio sp. RC586 share 2104 ORFs (59\%) and 2058 ORFs (56\%) with the published core genome of V. cholerae and 2956 (82\%) and 3048 ORFs (84\%) with V. mimicus MB-451, respectively. The novel species share 2926 ORFs with each other (81\% Vibrio sp. RC341 and 81\% Vibrio sp. RC586). Virulence-associated factors and genomic islands of V. cholerae and V. mimicus, including VSP-I and II, were found in these environmental Vibrio spp. Conclusions Results of this analysis demonstrate these two environmental vibrios, previously characterized as variant V. cholerae strains, are new species which have evolved from ancestral lineages of the V. cholerae and V. mimicus clade. The presence of conserved integration loci for genomic islands as well as evidence of horizontal gene transfer between these two new species, V. cholerae, and V. mimicus suggests genomic islands and virulence factors are transferred between these species.}, author = {Bradd, H. and Christopher, G. and Nur, H. and Seon-Young, C. and Jongsik, C. and Thomas, B. and David, B. and Jean, C. and Chris, D. J. and Cliff, H. and Rita R. Colwell} } @article {38210, title = {Diversity and distribution of cholix toxin, a novel ADP-ribosylating factor from Vibrio cholerae}, journal = {Environmental Microbiology ReportsEnvironmental Microbiology Reports}, volume = {2}, year = {2010}, type = {10.1111/j.1758-2229.2010.00139.x}, abstract = {Non-toxigenic non-O1, non-O139 Vibrio cholerae strains isolated from both environmental and clinical settings carry a suite of virulence factors aside from cholera toxin. Among V. cholerae strains isolated from coastal waters of southern California, this includes cholix toxin, an ADP-ribosylating factor that is capable of halting protein synthesis in eukaryotic cells. The prevalence of the gene encoding cholix toxin, chxA, was assessed among a collection of 155 diverse V. cholerae strains originating from both clinical and environmental settings in Bangladesh and Mexico and other countries around the globe. The chxA gene was present in 47\% of 83 non-O1, non-O139 strains and 16\% of 72 O1/O139 strains screened as part of this study. A total of 86 chxA gene sequences were obtained, and phylogenetic analysis revealed that they fall into two distinct clades. These two clades were also observed in the phylogenies of several housekeeping genes, suggesting that the divergence observed in chxA extends to other regions of the V. cholerae genome, and most likely has arisen from vertical descent rather than horizontal transfer. Our results clearly indicate that ChxA is a major toxin of V. cholerae with a worldwide distribution that is preferentially associated with non-pandemic strains.}, isbn = {1758-2229}, author = {Purdy, Alexandra E. and Balch, Deborah and Liz{\'a}rraga-Partida, Marcial Leonardo and Islam, Mohammad Sirajul and Martinez-Urtaza, Jaime and Huq, Anwar and Rita R. Colwell and Bartlett, Douglas H.} } @inbook {38246, title = {Evolutionary framework for Lepidoptera model systems}, booktitle = {Genetics and Molecular Biology of LepidopteraGenetics and Molecular Biology of Lepidoptera}, year = {2010}, publisher = {Taylor \& Francis}, organization = {Taylor \& Francis}, address = {Boca Raton}, abstract = {{\textquotedblleft}Model systems{\textquotedblright} are specific organisms upon which detailed studies have been conducted examining a fundamental biological question. If the studies are robust, their results can be extrapolated among an array of organisms that possess features in common with the subject organism. The true power of model systems lies in the ability to extrapolate these details across larger groups of organisms. In order to generalize these results, comparative studies are essential and require that model systems be placed into their evolutionary or phylogenetic context. This chapter examines model systems in the insect order Lepidoptera from the perspective of several different superfamilies. Historically, many species of Lepidoptera have been essential in the development of invaluable model systems in the fields of development biology, genetics, molecular biology, physiology, co-evolution, population dynamics, and ecology.}, author = {Roe, A. and Weller, S. and Baixeras, J. and Brown, J. W. and Michael P. Cummings and Davis, D. R. and Horak, M. and Kawahara, A. Y. and Mitter, C. and Parr, C. S. and Regier, J. C. and Rubinoff, D. and Simonsen, T. J. and Wahlberg, N. and Zwick, A.}, editor = {Goldsmith, M. and Marec, F.} } @article {38263, title = {Finishing genomes with limited resources: lessons from an ensemble of microbial genomes}, journal = {BMC GenomicsBMC Genomics}, volume = {11}, year = {2010}, type = {10.1186/1471-2164-11-242}, abstract = {While new sequencing technologies have ushered in an era where microbial genomes can be easily sequenced, the goal of routinely producing high-quality draft and finished genomes in a cost-effective fashion has still remained elusive. Due to shorter read lengths and limitations in library construction protocols, shotgun sequencing and assembly based on these technologies often results in fragmented assemblies. Correspondingly, while draft assemblies can be obtained in days, finishing can take many months and hence the time and effort can only be justified for high-priority genomes and in large sequencing centers. In this work, we revisit this issue in light of our own experience in producing finished and nearly-finished genomes for a range of microbial species in a small-lab setting. These genomes were finished with surprisingly little investments in terms of time, computational effort and lab work, suggesting that the increased access to sequencing might also eventually lead to a greater proportion of finished genomes from small labs and genomics cores.}, isbn = {1471-2164}, author = {Nagarajan, Niranjan and Cook, Christopher and Di Bonaventura, Maria Pia and Ge, Hong and Richards, Allen and Bishop-Lilly, Kimberly A. and DeSalle, Robert and Read, Timothy D. and M. Pop} } @inbook {49666, title = {Genetics of Trypanosoma cruzi in American Trypanosomiasis: Chagas Disease One hundred Years of Research }, year = {2010}, publisher = {Elsevier Press}, organization = {Elsevier Press}, address = {Burlington}, author = {Bartholomeu, D. and Buck, G. and Teixeira, S. and El-Sayed, N.M.} } @article {38301, title = {Genome Sequence of Hybrid Vibrio Cholerae O1 MJ-1236, B-33, and CIRS101 and Comparative Genomics with V. Cholerae}, journal = {Journal of BacteriologyJ. Bacteriol.Journal of BacteriologyJ. Bacteriol.}, volume = {192}, year = {2010}, type = {10.1128/JB.00040-10}, abstract = {The genomes of Vibrio cholerae O1 Matlab variant MJ-1236, Mozambique O1 El Tor variant B33, and altered O1 El Tor CIRS101 were sequenced. All three strains were found to belong to the phylocore group 1 clade of V. cholerae, which includes the 7th-pandemic O1 El Tor and serogroup O139 isolates, despite displaying certain characteristics of the classical biotype. All three strains were found to harbor a hybrid variant of CTXΦ and an integrative conjugative element (ICE), leading to their establishment as successful clinical clones and the displacement of prototypical O1 El Tor. The absence of strain- and group-specific genomic islands, some of which appear to be prophages and phage-like elements, seems to be the most likely factor in the recent establishment of dominance of V. cholerae CIRS101 over the other two hybrid strains.}, isbn = {0021-9193, 1098-5530}, author = {Grim, Christopher J. and Hasan, Nur A. and Taviani, Elisa and Haley, Bradd and Jongsik, Chun and Brettin, Thomas S. and Bruce, David C. and Detter, J. Chris and Han, Cliff S. and Chertkov, Olga and Challacombe, Jean and Huq, Anwar and Nair, G. Balakrish and Rita R. Colwell} } @article {38335, title = {Identification of Pathogenic Vibrio Species by Multilocus PCR-Electrospray Ionization Mass Spectrometry and Its Application to Aquatic Environments of the Former Soviet Republic of Georgia}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, volume = {76}, year = {2010}, type = {10.1128/AEM.01919-09}, abstract = {The Ibis T5000 is a novel diagnostic platform that couples PCR and mass spectrometry. In this study, we developed an assay that can identify all known pathogenic Vibrio species and field-tested it using natural water samples from both freshwater lakes and the Georgian coastal zone of the Black Sea. Of the 278 total water samples screened, 9 different Vibrio species were detected, 114 (41\%) samples were positive for V. cholerae, and 5 (0.8\%) samples were positive for the cholera toxin A gene (ctxA). All ctxA-positive samples were from two freshwater lakes, and no ctxA-positive samples from any of the Black Sea sites were detected.}, isbn = {0099-2240, 1098-5336}, author = {Whitehouse, Chris A. and Baldwin, Carson and Sampath, Rangarajan and Blyn, Lawrence B. and Melton, Rachael and Li, Feng and Hall, Thomas A. and Harpin, Vanessa and Matthews, Heather and Tediashvili, Marina and Jaiani, Ekaterina and Kokashvili, Tamar and Janelidze, Nino and Grim, Christopher and Rita R. Colwell and Huq, Anwar} } @inbook {38338, title = {Identifying Differentially Abundant Metabolic Pathways in Metagenomic Datasets}, booktitle = {Bioinformatics Research and ApplicationsBioinformatics Research and Applications}, series = {Lecture Notes in Computer Science}, volume = {6053}, year = {2010}, publisher = {Springer Berlin / Heidelberg}, organization = {Springer Berlin / Heidelberg}, abstract = {Enabled by rapid advances in sequencing technology, metagenomic studies aim to characterize entire communities of microbes bypassing the need for culturing individual bacterial members. One major goal of such studies is to identify specific functional adaptations of microbial communities to their habitats. Here we describe a powerful analytical method (MetaPath) that can identify differentially abundant pathways in metagenomic data-sets, relying on a combination of metagenomic sequence data and prior metabolic pathway knowledge. We show that MetaPath outperforms other common approaches when evaluated on simulated datasets. We also demonstrate the power of our methods in analyzing two, publicly available, metagenomic datasets: a comparison of the gut microbiome of obese and lean twins; and a comparison of the gut microbiome of infant and adult subjects. We demonstrate that the subpathways identified by our method provide valuable insights into the biological activities of the microbiome.}, isbn = {978-3-642-13077-9}, author = {Liu, Bo and M. Pop}, editor = {Borodovsky, Mark and Gogarten, Johann and Przytycka, Teresa and Rajasekaran, Sanguthevar} } @article {38349, title = {Intensity normalization improves color calling in SOLiD sequencing}, journal = {Nat MethNat MethNat MethNat Meth}, volume = {7}, year = {2010}, type = {10.1038/nmeth0510-336}, isbn = {1548-7091}, author = {Wu, Hao and Irizarry, Rafael A. and H{\'e}ctor Corrada Bravo} } @article {49650, title = {A model for using a concept inventory as a tool for students{\textquoteright} assessment and faculty professional development.}, journal = {CBE Life Sci Educ}, volume = {9}, year = {2010}, month = {2010 Winter}, pages = {408-16}, abstract = {

This essay describes how the use of a concept inventory has enhanced professional development and curriculum reform efforts of a faculty teaching community. The Host Pathogen Interactions (HPI) teaching team is composed of research and teaching faculty with expertise in HPI who share the goal of improving the learning experience of students in nine linked undergraduate microbiology courses. To support evidence-based curriculum reform, we administered our HPI Concept Inventory as a pre- and postsurvey to approximately 400 students each year since 2006. The resulting data include student scores as well as their open-ended explanations for distractor choices. The data have enabled us to address curriculum reform goals of 1) reconciling student learning with our expectations, 2) correlating student learning with background variables, 3) understanding student learning across institutions, 4) measuring the effect of teaching techniques on student learning, and 5) demonstrating how our courses collectively form a learning progression. The analysis of the concept inventory data has anchored and deepened the team{\textquoteright}s discussions of student learning. Reading and discussing students{\textquoteright} responses revealed the gap between our understanding and the students{\textquoteright} understanding. We provide evidence to support the concept inventory as a tool for assessing student understanding of HPI concepts and faculty development.

}, keywords = {Curriculum, Faculty, Models, Theoretical, Research, Students, Teaching}, issn = {1931-7913}, doi = {10.1187/cbe.10-05-0069}, author = {Marbach-Ad, Gili and McAdams, Katherine C and Benson, Spencer and Briken, Volker and Cathcart, Laura and Chase, Michael and El-Sayed, Najib M and Frauwirth, Kenneth and Fredericksen, Brenda and Joseph, Sam W and Lee, Vincent and McIver, Kevin S and Mosser, David and Quimby, B Booth and Shields, Patricia and Song, Wenxia and Stein, Daniel C and Stewart, Richard and Thompson, Katerina V and Smith, Ann C} } @article {49833, title = {Model-Based Quality Assessment and Base-Calling for Second-Generation Sequencing Data}, journal = {Biometrics}, volume = {66}, year = {2010}, month = {Jan-09-2010}, pages = {665 - 674}, doi = {10.1111/j.1541-0420.2009.01353.x}, url = {http://doi.wiley.com/10.1111/j.1541-0420.2009.01353.xhttps://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1111\%2Fj.1541-0420.2009.01353.x}, author = {Bravo, H{\'e}ctor Corrada and Irizarry, Rafael A.} } @article {38417, title = {Overcoming bias and systematic errors in next generation sequencing data}, journal = {Genome medicineGenome medicine}, volume = {2}, year = {2010}, note = {http://www.ncbi.nlm.nih.gov/pubmed/21144010?dopt=Abstract}, type = {10.1186/gm208}, abstract = {Considerable time and effort has been spent in developing analysis and quality assessment methods to allow the use of microarrays in a clinical setting. As is the case for microarrays and other high-throughput technologies, data from new high-throughput sequencing technologies are subject to technological and biological biases and systematic errors that can impact downstream analyses. Only when these issues can be readily identified and reliably adjusted for will clinical applications of these new technologies be feasible. Although much work remains to be done in this area, we describe consistently observed biases that should be taken into account when analyzing high-throughput sequencing data. In this article, we review current knowledge about these biases, discuss their impact on analysis results, and propose solutions.}, author = {Taub, Margaret A. and H{\'e}ctor Corrada Bravo and Irizarry, Rafael A.} } @article {38448, title = {The pre-seventh pandemic Vibrio cholerae BX 330286 El Tor genome: evidence for the environment as a genome reservoir}, journal = {Environmental Microbiology ReportsEnvironmental Microbiology Reports}, volume = {2}, year = {2010}, type = {10.1111/j.1758-2229.2010.00141.x}, abstract = {Vibrio cholerae O1 El Tor BX 330286 was isolated from a water sample in Australia in 1986, 9 years after an indigenous outbreak of cholera occurred in that region. This environmental strain encodes virulence factors highly similar to those of clinical strains, suggesting an ability to cause disease in humans. We demonstrate its high similarity in gene content and genome-wide nucleotide sequence to clinical V. cholerae strains, notably to pre-seventh pandemic O1 El Tor strains isolated in 1910 (V. cholerae NCTC 8457) and 1937 (V. cholerae MAK 757), as well as seventh pandemic strains isolated after 1960 globally. Here we demonstrate that this strain represents a transitory clone with shared characteristics between pre-seventh and seventh pandemic strains of V. cholerae. Interestingly, this strain was isolated 25 years after the beginning of the seventh pandemic, suggesting the environment as a genome reservoir in areas where cholera does not occur in sporadic, endemic or epidemic form.}, isbn = {1758-2229}, author = {Haley, Bradd J. and Grim, Christopher J. and Hasan, Nur A. and Taviani, Elisa and Jongsik, Chun and Brettin, Thomas S. and Bruce, David C. and Challacombe, Jean F. and Detter, J. Chris and Han, Cliff S. and Huq, Anwar and Nair, G. Balakrish and Rita R. Colwell} } @article {38522, title = {Tackling the widespread and critical impact of batch effects in high-throughput data}, journal = {Nature reviews. GeneticsNature reviews. Genetics}, volume = {11}, year = {2010}, note = {http://www.ncbi.nlm.nih.gov/pubmed/20838408?dopt=Abstract}, type = {10.1038/nrg2825}, abstract = {High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.}, keywords = {biotechnology, Computational Biology, Genomics, Oligonucleotide Array Sequence Analysis, Periodicals as Topic, Research Design, Sequence Analysis, DNA}, author = {Leek, Jeffrey T. and Scharpf, Robert B. and H{\'e}ctor Corrada Bravo and Simcha, David and Langmead, Benjamin and Johnson, W. Evan and Geman, Donald and Baggerly, Keith and Irizarry, Rafael A.} } @article {38579, title = {Young Proteins Experience More Variable Selection Pressures Than Old Proteins}, journal = {Genome ResearchGenome Res.Genome ResearchGenome Res.}, volume = {20}, year = {2010}, type = {10.1101/gr.109595.110}, abstract = {It is well known that young proteins tend to experience weaker purifying selection and evolve more quickly than old proteins. Here, we show that, in addition, young proteins tend to experience more variable selection pressures over time than old proteins. We demonstrate this pattern in three independent taxonomic groups: yeast, Drosophila, and mammals. The increased variability of selection pressures on young proteins is highly significant even after controlling for the fact that young proteins are typically shorter and experience weaker purifying selection than old proteins. The majority of our results are consistent with the hypothesis that the function of a young gene tends to change over time more readily than that of an old gene. At the same time, our results may be caused in part by young genes that serve constant functions over time, but nevertheless appear to evolve under changing selection pressures due to depletion of adaptive mutations. In either case, our results imply that the evolution of a protein-coding sequence is partly determined by its age and origin, and not only by the phenotypic properties of the encoded protein. We discuss, via specific examples, the consequences of these findings for understanding of the sources of evolutionary novelty.}, isbn = {1088-9051, 1549-5469}, author = {Vishnoi, Anchal and Kryazhimskiy, Sergey and Bazykin, Georgii A. and Sridhar Hannenhalli and Plotkin, Joshua B.} } @article {49645, title = {Assessing Student Understanding of Host Pathogen Interactions Using a Concept Inventory}, journal = {J. Microbiol. Biol. Ed.}, volume = {10}, year = {2009}, pages = {43-50}, author = {Marbach-Ad, G. and Briken, V. and El-Sayed, N.M. and Frauwirth, K. and Fredericksen, B. and Hutcheson, S. and Gao, L.-Y. and Joseph, S. and Lee, V. and McIver, K.S. and Mosser, D. and Quimby, B.B. and Shields, P. and Song, W. and Stein, D.C. and Yuan, R.T. and Smith, A.C.} } @article {38190, title = {CTCF binding site classes exhibit distinct evolutionary, genomic, epigenomic and transcriptomic features}, journal = {Genome BiologyGenome Biology}, volume = {10}, year = {2009}, type = {10.1186/gb-2009-10-11-r131}, abstract = {CTCF (CCCTC-binding factor) is an evolutionarily conserved zinc finger protein involved in diverse functions ranging from negative regulation of MYC, to chromatin insulation of the beta-globin gene cluster, to imprinting of the Igf2 locus. The 11 zinc fingers of CTCF are known to differentially contribute to the CTCF-DNA interaction at different binding sites. It is possible that the differences in CTCF-DNA conformation at different binding sites underlie CTCF{\textquoteright}s functional diversity. If so, the CTCF binding sites may belong to distinct classes, each compatible with a specific functional role.}, isbn = {1465-6906}, author = {Essien, Kobby and Vigneau, Sebastien and Apreleva, Sofia and Singh, Larry N. and Bartolomei, Marisa S. and Sridhar Hannenhalli} } @article {49836, title = {Estimating Tree-Structured Covariance Matrices via Mixed-Integer Programming}, journal = {J Mach Learn Res}, volume = {5}, year = {2009}, pages = {41-48}, chapter = {41}, author = {Corrada Bravo, Hector and Wright, Stephen and Eng, Kevin H. and Keles, S{\"u}nd{\"u}z and Wahba, Grace} } @article {49834, title = {Examining the relative influence of familial, genetic, and environmental covariate information in flexible risk models}, journal = {Proceedings of the National Academy of Sciences}, volume = {106}, year = {2009}, month = {Jul-05-2010}, pages = {8128 - 8133}, issn = {0027-8424}, doi = {10.1073/pnas.0902906106}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.0902906106https://syndication.highwire.org/content/doi/10.1073/pnas.0902906106}, author = {Bravo, H. C. and Lee, K. E. and Klein, B. E. K. and Klein, R. and Iyengar, S. K. and Wahba, G.} } @article {38256, title = {Extreme polymorphism in a vaccine antigen and risk of clinical malaria: implications for vaccine development}, journal = {Sci Transl MedSci Transl Med}, volume = {1}, year = {2009}, type = {10.1126/scitranslmed.3000257}, abstract = {Vaccines directed against the blood stages of Plasmodium falciparum malaria are intended to prevent the parasite from invading and replicating within host cells. No blood-stage malaria vaccine has shown clinical efficacy in humans. Most malaria vaccine antigens are parasite surface proteins that have evolved extensive genetic diversity, and this diversity could allow malaria parasites to escape vaccine-induced immunity. We examined the extent and within-host dynamics of genetic diversity in the blood-stage malaria vaccine antigen apical membrane antigen-1 in a longitudinal study in Mali. Two hundred and fourteen unique apical membrane antigen-1 haplotypes were identified among 506 human infections, and amino acid changes near a putative invasion machinery binding site were strongly associated with the development of clinical symptoms, suggesting that these residues may be important to consider in designing polyvalent apical membrane antigen-1 vaccines and in assessing vaccine efficacy in field trials. This extreme diversity may pose a serious obstacle to an effective polyvalent recombinant subunit apical membrane antigen-1 vaccine.}, author = {Takala, S. L. and Coulibaly, D. and Thera, M. A. and Batchelor, A. H. and Michael P. Cummings and Escalante, A. A. and Ouattara, A. and Traor{\'e}, K. and Niangaly, A. and Djimd{\'e}, A. A. and Doumbo, O. K. and Plowe, C. V.} } @article {38290, title = {Genome assortment, not serogroup, defines Vibrio cholerae pandemic strains}, journal = {NatureNature}, year = {2009}, abstract = {Vibrio cholerae, the causative agent of cholera, is a bacterium autochthonous to the aquatic environment, and a serious public health threat. V. cholerae serogroup O1 is responsible for the previous two cholera pandemics, in which classical and El Tor biotypes were dominant in the 6th and the current 7th pandemics, respectively. Cholera researchers continually face newly emerging and re-emerging pathogenic clones carrying combinations of new serogroups as well as of phenotypic and genotypic properties. These genotype and phenotype changes have hampered control of the disease. Here we compare the complete genome sequences of 23 strains of V. cholerae isolated from a variety of sources and geographical locations over the past 98 years in an effort to elucidate the evolutionary mechanisms governing genetic diversity and genesis of new pathogenic clones. The genome-based phylogeny revealed 12 distinct V. cholerae phyletic lineages, of which one, designated the V. cholerae core genome (CG), comprises both O1 classical and EI Tor biotypes. All 7th pandemic clones share nearly identical gene content, i.e., the same genome backbone. The transition from 6th to 7th pandemic strains is defined here as a {\textquoteright}shift{\textquoteright} between pathogenic clones belonging to the same O1 serogroup, but from significantly different phyletic lineages within the CG clade. In contrast, transition among clones during the present 7th pandemic period can be characterized as a {\textquoteright}drift{\textquoteright} between clones, differentiated mainly by varying composition of laterally transferred genomic islands, resulting in emergence of variants, exemplified by V.cholerae serogroup O139 and V.cholerae O1 El Tor hybrid clones that produce cholera toxin of classical biotype. Based on the comprehensive comparative genomics presented in this study it is concluded that V. cholerae undergoes extensive genetic recombination via lateral gene transfer, and, therefore, genome assortment, not serogroup, should be used to define pathogenic V. cholerae clones.}, keywords = {59, CHOLERA, genes, Genetics, GENOTYPE, ISLANDS, ORIGIN, PHENOTYPE, PUBLIC HEALTH, recombination, STRAINS, Toxins}, author = {Brettin, Thomas S. and Bruce, David C. and Challacombe, Jean F. and Detter, John C. and Han, Cliff S. and Munik, A. C. and Chertkov, Olga and Meincke, Linda and Saunders, Elizabeth and Choi, Seon Y. and Haley, Bradd J. and Taviani, Elisa and Jeon, Yoon-Seong and Kim, Dong Wook and Lee, Jae-Hak and Walters, Ronald A. and Hug, Anwar and Rita R. Colwell} } @article {49646, title = {The genome of the blood fluke Schistosoma mansoni.}, journal = {Nature}, volume = {460}, year = {2009}, month = {2009 Jul 16}, pages = {352-8}, abstract = {

Schistosoma mansoni is responsible for the neglected tropical disease schistosomiasis that affects 210 million people in 76 countries. Here we present analysis of the 363 megabase nuclear genome of the blood fluke. It encodes at least 11,809 genes, with an unusual intron size distribution, and new families of micro-exon genes that undergo frequent alternative splicing. As the first sequenced flatworm, and a representative of the Lophotrochozoa, it offers insights into early events in the evolution of the animals, including the development of a body pattern with bilateral symmetry, and the development of tissues into organs. Our analysis has been informed by the need to find new drug targets. The deficits in lipid metabolism that make schistosomes dependent on the host are revealed, and the identification of membrane receptors, ion channels and more than 300 proteases provide new insights into the biology of the life cycle and new targets. Bioinformatics approaches have identified metabolic chokepoints, and a chemogenomic screen has pinpointed schistosome proteins for which existing drugs may be active. The information generated provides an invaluable resource for the research community to develop much needed new control tools for the treatment and eradication of this important and neglected disease.

}, keywords = {Animals, Biological Evolution, Exons, Genes, Helminth, Genome, Helminth, Host-Parasite Interactions, Introns, Molecular Sequence Data, Physical Chromosome Mapping, Schistosoma mansoni, Schistosomiasis mansoni}, issn = {1476-4687}, doi = {10.1038/nature08160}, author = {Berriman, Matthew and Haas, Brian J and LoVerde, Philip T and Wilson, R Alan and Dillon, Gary P and Cerqueira, Gustavo C and Mashiyama, Susan T and Al-Lazikani, Bissan and Andrade, Luiza F and Ashton, Peter D and Aslett, Martin A and Bartholomeu, Daniella C and Blandin, Ga{\"e}lle and Caffrey, Conor R and Coghlan, Avril and Coulson, Richard and Day, Tim A and Delcher, Art and DeMarco, Ricardo and Djikeng, Appolinaire and Eyre, Tina and Gamble, John A and Ghedin, Elodie and Gu, Yong and Hertz-Fowler, Christiane and Hirai, Hirohisha and Hirai, Yuriko and Houston, Robin and Ivens, Alasdair and Johnston, David A and Lacerda, Daniela and Macedo, Camila D and McVeigh, Paul and Ning, Zemin and Oliveira, Guilherme and Overington, John P and Parkhill, Julian and Pertea, Mihaela and Pierce, Raymond J and Protasio, Anna V and Quail, Michael A and Rajandream, Marie-Ad{\`e}le and Rogers, Jane and Sajid, Mohammed and Salzberg, Steven L and Stanke, Mario and Tivey, Adrian R and White, Owen and Williams, David L and Wortman, Jennifer and Wu, Wenjie and Zamanian, Mostafa and Zerlotini, Adhemar and Fraser-Liggett, Claire M and Barrell, Barclay G and El-Sayed, Najib M} } @article {49644, title = {Genomic organization and expression profile of the mucin-associated surface protein (masp) family of the human pathogen Trypanosoma cruzi.}, journal = {Nucleic Acids Res}, volume = {37}, year = {2009}, month = {2009 Jun}, pages = {3407-17}, abstract = {

A novel large multigene family was recently identified in the human pathogen Trypanosoma cruzi, causative agent of Chagas disease, and corresponds to approximately 6\% of the parasite diploid genome. The predicted gene products, mucin-associated surface proteins (MASPs), are characterized by highly conserved N- and C-terminal domains and a strikingly variable and repetitive central region. We report here an analysis of the genomic organization and expression profile of masp genes. Masps are not randomly distributed throughout the genome but instead are clustered with genes encoding mucin and other surface protein families. Masp transcripts vary in size, are preferentially expressed during the trypomastigote stage and contain highly conserved 5{\textquoteright} and 3{\textquoteright} untranslated regions. A sequence analysis of a trypomastigote cDNA library reveals the expression of multiple masp variants with a bias towards a particular masp subgroup. Immunofluorescence assays using antibodies generated against a MASP peptide reveals that the expression of particular MASPs at the cell membrane is limited to subsets of the parasite population. Western blots of phosphatidylinositol-specific phospholipase C (PI-PLC)-treated parasites suggest that MASP may be GPI-anchored and shed into the medium culture, thus contributing to the large repertoire of parasite polypeptides that are exposed to the host immune system.

}, keywords = {3{\textquoteright} Flanking Region, 5{\textquoteright} Flanking Region, Amino Acid Sequence, Animals, Base Sequence, Conserved Sequence, Gene Expression Profiling, Genes, Protozoan, Genome, Protozoan, Membrane Proteins, Molecular Sequence Data, Mucins, Multigene Family, Protozoan Proteins, RNA, Messenger, Trypanosoma cruzi}, issn = {1362-4962}, doi = {10.1093/nar/gkp172}, author = {Bartholomeu, Daniella C and Cerqueira, Gustavo C and Le{\~a}o, Ana Carolina A and daRocha, Wanderson D and Pais, Fabiano S and Macedo, Camila and Djikeng, Appolinaire and Teixeira, Santuza M R and El-Sayed, Najib M} } @article {49781, title = {InterPro: the integrative protein signature database.}, journal = {Nucleic Acids Res}, volume = {37}, year = {2009}, month = {2009 Jan}, pages = {D211-5}, abstract = {

The InterPro database (http://www.ebi.ac.uk/interpro/) integrates together predictive models or {\textquoteright}signatures{\textquoteright} representing protein domains, families and functional sites from multiple, diverse source databases: Gene3D, PANTHER, Pfam, PIRSF, PRINTS, ProDom, PROSITE, SMART, SUPERFAMILY and TIGRFAMs. Integration is performed manually and approximately half of the total approximately 58,000 signatures available in the source databases belong to an InterPro entry. Recently, we have started to also display the remaining un-integrated signatures via our web interface. Other developments include the provision of non-signature data, such as structural data, in new XML files on our FTP site, as well as the inclusion of matchless UniProtKB proteins in the existing match XML files. The web interface has been extended and now links out to the ADAN predicted protein-protein interaction database and the SPICE and Dasty viewers. The latest public release (v18.0) covers 79.8\% of UniProtKB (v14.1) and consists of 16 549 entries. InterPro data may be accessed either via the web address above, via web services, by downloading files by anonymous FTP or by using the InterProScan search software (http://www.ebi.ac.uk/Tools/InterProScan/).

}, keywords = {Databases, Protein, Proteins, Sequence Analysis, Protein, Systems Integration}, issn = {1362-4962}, doi = {10.1093/nar/gkn785}, author = {Hunter, Sarah and Apweiler, Rolf and Attwood, Teresa K and Bairoch, Amos and Bateman, Alex and Binns, David and Bork, Peer and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Laugraud, Aur{\'e}lie and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and Mistry, Jaina and Mitchell, Alex and Mulder, Nicola and Natale, Darren and Orengo, Christine and Quinn, Antony F and Selengut, Jeremy D and Sigrist, Christian J A and Thimma, Manjula and Thomas, Paul D and Valentin, Franck and Wilson, Derek and Wu, Cathy H and Yeats, Corin} } @article {38353, title = {InterPro: the integrative protein signature database}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {37}, year = {2009}, note = {http://www.ncbi.nlm.nih.gov/pubmed/18940856?dopt=Abstract}, type = {10.1093/nar/gkn785}, abstract = {The InterPro database (http://www.ebi.ac.uk/interpro/) integrates together predictive models or {\textquoteright}signatures{\textquoteright} representing protein domains, families and functional sites from multiple, diverse source databases: Gene3D, PANTHER, Pfam, PIRSF, PRINTS, ProDom, PROSITE, SMART, SUPERFAMILY and TIGRFAMs. Integration is performed manually and approximately half of the total approximately 58,000 signatures available in the source databases belong to an InterPro entry. Recently, we have started to also display the remaining un-integrated signatures via our web interface. Other developments include the provision of non-signature data, such as structural data, in new XML files on our FTP site, as well as the inclusion of matchless UniProtKB proteins in the existing match XML files. The web interface has been extended and now links out to the ADAN predicted protein-protein interaction database and the SPICE and Dasty viewers. The latest public release (v18.0) covers 79.8\% of UniProtKB (v14.1) and consists of 16 549 entries. InterPro data may be accessed either via the web address above, via web services, by downloading files by anonymous FTP or by using the InterProScan search software (http://www.ebi.ac.uk/Tools/InterProScan/).}, keywords = {Databases, Protein, Proteins, Sequence Analysis, Protein, Systems Integration}, author = {Hunter, Sarah and Apweiler, Rolf and Attwood, Teresa K. and Bairoch, Amos and Bateman, Alex and Binns, David and Bork, Peer and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D. and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Laugraud, Aur{\'e}lie and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and Mistry, Jaina and Mitchell, Alex and Mulder, Nicola and Natale, Darren and Orengo, Christine and Quinn, Antony F. and J. Selengut and Sigrist, Christian J. A. and Thimma, Manjula and Thomas, Paul D. and Valentin, Franck and Wilson, Derek and Wu, Cathy H. and Yeats, Corin} } @article {49559, title = {Measuring differential gene expression by short read sequencing: quantitative comparison to 2-channel gene expression microarrays}, volume = {10}, year = {2009}, month = {Jan-01-2009}, pages = {221}, issn = {1471-2164}, doi = {10.1186/1471-2164-10-221}, url = {http://www.biomedcentral.com/1471-2164/10/221}, author = {Bloom, Joshua S and Khan, Zia and Kruglyak, Leonid and Singh, Mona and Caudy, Amy A} } @article {49749, title = {Measuring differential gene expression by short read sequencing: quantitative comparison to 2-channel gene expression microarrays.}, journal = {BMC Genomics}, volume = {10}, year = {2009}, month = {2009}, pages = {221}, abstract = {

BACKGROUND: High-throughput cDNA synthesis and sequencing of poly(A)-enriched RNA is rapidly emerging as a technology competing to replace microarrays as a quantitative platform for measuring gene expression.

RESULTS: Consequently, we compared full length cDNA sequencing to 2-channel gene expression microarrays in the context of measuring differential gene expression. Because of its comparable cost to a gene expression microarray, our study focused on the data obtainable from a single lane of an Illumina 1 G sequencer. We compared sequencing data to a highly replicated microarray experiment profiling two divergent strains of S. cerevisiae.

CONCLUSION: Using a large number of quantitative PCR (qPCR) assays, more than previous studies, we found that neither technology is decisively better at measuring differential gene expression. Further, we report sequencing results from a diploid hybrid of two strains of S. cerevisiae that indicate full length cDNA sequencing can discover heterozygosity and measure quantitative allele-specific expression simultaneously.

}, keywords = {algorithms, DNA, Complementary, DNA, Fungal, Gene Expression Profiling, Oligonucleotide Array Sequence Analysis, Saccharomyces cerevisiae, sequence alignment, Sequence Analysis, DNA}, issn = {1471-2164}, doi = {10.1186/1471-2164-10-221}, author = {Bloom, Joshua S and Khan, Zia and Kruglyak, Leonid and Singh, Mona and Caudy, Amy A} } @article {38379, title = {Microbial oceanography in a sea of opportunity}, journal = {NatureNature}, volume = {459}, year = {2009}, type = {10.1038/nature08056}, abstract = {Plankton use solar energy to drive the nutrient cycles that make the planet habitable for larger organisms. We can now explore the diversity and functions of plankton using genomics, revealing the gene repertoires associated with survival in the oceans. Such studies will help us to appreciate the sensitivity of ocean systems and of the ocean{\textquoteright}s response to climate change, improving the predictive power of climate models.}, keywords = {Astronomy, astrophysics, Biochemistry, Bioinformatics, Biology, biotechnology, cancer, cell cycle, cell signalling, climate change, Computational Biology, development, developmental biology, DNA, drug discovery, earth science, ecology, environmental science, Evolution, evolutionary biology, functional genomics, Genetics, Genomics, geophysics, immunology, interdisciplinary science, life, marine biology, materials science, medical research, medicine, metabolomics, molecular biology, molecular interactions, nanotechnology, Nature, neurobiology, neuroscience, palaeobiology, pharmacology, Physics, proteomics, quantum physics, RNA, Science, science news, science policy, signal transduction, structural biology, systems biology, transcriptomics}, isbn = {0028-0836}, author = {Bowler, Chris and Karl, David M. and Rita R. Colwell} } @article {38385, title = {Model-based quality assessment and base-calling for second-generation sequencing data}, journal = {Johns Hopkins University, Dept. of Biostatistics Working PapersJohns Hopkins University, Dept. of Biostatistics Working Papers}, year = {2009}, author = {Irizarry, R. A. and H{\'e}ctor Corrada Bravo} } @article {38391, title = {Motifs and cis-regulatory modules mediating the expression of genes co-expressed in presynaptic neurons}, journal = {Genome BiologyGenome Biology}, volume = {10}, year = {2009}, type = {10.1186/gb-2009-10-7-r72}, abstract = {Hundreds of proteins modulate neurotransmitter release and synaptic plasticity during neuronal development and in response to synaptic activity. The expression of genes in the pre- and post-synaptic neurons is under stringent spatio-temporal control, but the mechanism underlying the neuronal expression of these genes remains largely unknown.}, isbn = {1465-6906}, author = {Liu, Rui and Sridhar Hannenhalli and Bucan, Maja} } @article {38404, title = {New records of phytoplankton for Bangladesh. 9. Some rare and a new species}, journal = {Bangladesh Journal of Plant TaxonomyBangladesh Journal of Plant Taxonomy}, volume = {16}, year = {2009}, type = {10.3329/bjpt.v16i1.2734}, abstract = {Ten taxa belonging to Chlorophyceae, Cyanophyceae, Bacillariophyceae and Euglenophyceae, and one with an uncertain taxonomic position have been described in this paper. Of these, 10 taxa have been found to be globally rare and new records for Bangladesh, whereas Strombomonas islamii Khondker sp. nov. has been described as new to science.}, isbn = {1028-2092}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38432, title = {A phylogenetic mixture model for the evolution of gene expression}, journal = {Molecular biology and evolutionMolecular biology and evolution}, volume = {26}, year = {2009}, author = {Eng, K. H. and H{\'e}ctor Corrada Bravo and Keles, S.} } @article {49558, title = {A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays}, volume = {25}, year = {2009}, month = {Jan-07-2009}, pages = {1609 - 1616}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btp275}, url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btp275}, author = {Khan, Z. and Bloom, J. S. and Kruglyak, L. and Singh, M.} } @article {49748, title = {A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays.}, journal = {Bioinformatics}, volume = {25}, year = {2009}, month = {2009 Jul 1}, pages = {1609-16}, abstract = {

MOTIVATION: High-throughput sequencing technologies place ever increasing demands on existing algorithms for sequence analysis. Algorithms for computing maximal exact matches (MEMs) between sequences appear in two contexts where high-throughput sequencing will vastly increase the volume of sequence data: (i) seeding alignments of high-throughput reads for genome assembly and (ii) designating anchor points for genome-genome comparisons.

RESULTS: We introduce a new algorithm for finding MEMs. The algorithm leverages a sparse suffix array (SA), a text index that stores every K-th position of the text. In contrast to a full text index that stores every position of the text, a sparse SA occupies much less memory. Even though we use a sparse index, the output of our algorithm is the same as a full text index algorithm as long as the space between the indexed suffixes is not greater than a minimum length of a MEM. By relying on partial matches and additional text scanning between indexed positions, the algorithm trades memory for extra computation. The reduced memory usage makes it possible to determine MEMs between significantly longer sequences.

AVAILABILITY: Source code for the algorithm is available under a BSD open source license at http://compbio.cs.princeton.edu/mems. The implementation can serve as a drop-in replacement for the MEMs algorithm in MUMmer 3.

}, keywords = {algorithms, Base Sequence, Genomics, sequence alignment, Sequence Analysis, DNA}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btp275}, author = {Khan, Zia and Bloom, Joshua S and Kruglyak, Leonid and Singh, Mona} } @article {38446, title = {Predicting the distribution of Vibrio spp. in the Chesapeake Bay: a Vibrio cholerae case study}, journal = {EcoHealthEcoHealth}, volume = {6}, year = {2009}, type = {10.1007/s10393-009-0273-6}, abstract = {Vibrio cholerae, the causative agent of cholera, is a naturally occurring inhabitant of the Chesapeake Bay and serves as a predictor for other clinically important vibrios, including Vibrio parahaemolyticus and Vibrio vulnificus. A system was constructed to predict the likelihood of the presence of V. cholerae in surface waters of the Chesapeake Bay, with the goal to provide forecasts of the occurrence of this and related pathogenic Vibrio spp. Prediction was achieved by driving an available multivariate empirical habitat model estimating the probability of V. cholerae within a range of temperatures and salinities in the Bay, with hydrodynamically generated predictions of ambient temperature and salinity. The experimental predictions provided both an improved understanding of the in situ variability of V. cholerae, including identification of potential hotspots of occurrence, and usefulness as an early warning system. With further development of the system, prediction of the probability of the occurrence of related pathogenic vibrios in the Chesapeake Bay, notably V. parahaemolyticus and V. vulnificus, will be possible, as well as its transport to any geographical location where sufficient relevant data are available.}, author = {Constantin de Magny, G. and Long, W. and Brown, C. W. and Hood, R. R. and Huq, A. and Murtugudde, R. and Rita R. Colwell} } @article {49557, title = {Protein quantification across hundreds of experimental conditions}, volume = {106}, year = {2009}, month = {Mar-09-2010}, pages = {15544 - 15548}, issn = {0027-8424}, doi = {10.1073/pnas.0904100106}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.0904100106}, author = {Khan, Z. and Bloom, J. S. and Garcia, B. A. and Singh, M. and Kruglyak, L.} } @article {49747, title = {Protein quantification across hundreds of experimental conditions.}, journal = {Proc Natl Acad Sci U S A}, volume = {106}, year = {2009}, month = {2009 Sep 15}, pages = {15544-8}, abstract = {

Quantitative studies of protein abundance rarely span more than a small number of experimental conditions and replicates. In contrast, quantitative studies of transcript abundance often span hundreds of experimental conditions and replicates. This situation exists, in part, because extracting quantitative data from large proteomics datasets is significantly more difficult than reading quantitative data from a gene expression microarray. To address this problem, we introduce two algorithmic advances in the processing of quantitative proteomics data. First, we use space-partitioning data structures to handle the large size of these datasets. Second, we introduce techniques that combine graph-theoretic algorithms with space-partitioning data structures to collect relative protein abundance data across hundreds of experimental conditions and replicates. We validate these algorithmic techniques by analyzing several datasets and computing both internal and external measures of quantification accuracy. We demonstrate the scalability of these techniques by applying them to a large dataset that comprises a total of 472 experimental conditions and replicates.

}, keywords = {algorithms, Animals, Automatic Data Processing, Chromatography, Liquid, Databases, Factual, Fungal Proteins, HUMANS, Isotopes, Mice, Proteins, proteomics, Tandem Mass Spectrometry}, issn = {1091-6490}, doi = {10.1073/pnas.0904100106}, author = {Khan, Zia and Bloom, Joshua S and Garcia, Benjamin A and Singh, Mona and Kruglyak, Leonid} } @article {38498, title = {Serogroup, Virulence, and Genetic Traits of Vibrio Parahaemolyticus in the Estuarine Ecosystem of Bangladesh}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, volume = {75}, year = {2009}, type = {10.1128/AEM.00266-09}, abstract = {Forty-two strains of Vibrio parahaemolyticus were isolated from Bay of Bengal estuaries and, with two clinical strains, analyzed for virulence, phenotypic, and molecular traits. Serological analysis indicated O8, O3, O1, and K21 to be the major O and K serogroups, respectively, and O8:K21, O1:KUT, and O3:KUT to be predominant. The K antigen(s) was untypeable, and pandemic serogroup O3:K6 was not detected. The presence of genes toxR and tlh were confirmed by PCR in all but two strains, which also lacked toxR. A total of 18 (41\%) strains possessed the virulence gene encoding thermostable direct hemolysin (TDH), and one had the TDH-related hemolysin (trh) gene, but not tdh. Ten (23\%) strains exhibited Kanagawa phenomenon that surrogates virulence, of which six, including the two clinical strains, possessed tdh. Of the 18 tdh-positive strains, 17 (94\%), including the two clinical strains, had the seromarker O8:K21, one was O9:KUT, and the single trh-positive strain was O1:KUT. None had the group-specific or ORF8 pandemic marker gene. DNA fingerprinting employing pulsed-field gel electrophoresis (PFGE) of SfiI-digested DNA and cluster analysis showed divergence among the strains. Dendrograms constructed using PFGE (SfiI) images from a soft database, including those of pandemic and nonpandemic strains of diverse geographic origin, however, showed that local strains formed a cluster, i.e., {\textquotedblleft}clonal cluster,{\textquotedblright} as did pandemic strains of diverse origin. The demonstrated prevalence of tdh-positive and diarrheagenic serogroup O8:K21 strains in coastal villages of Bangladesh indicates a significant human health risk for inhabitants.}, isbn = {0099-2240, 1098-5336}, author = {Alam, Munirul and Chowdhury, Wasimul B. and Bhuiyan, N. A. and Islam, Atiqul and Hasan, Nur A. and Nair, G. Balakrish and Watanabe, H. and Siddique, A. K. and Huq, Anwar and Sack, R. Bradley and Akhter, M. Z. and Grim, Christopher J. and Kam, K. M. and Luey, C. K. Y. and Endtz, Hubert P. and Cravioto, Alejandro and Rita R. Colwell} } @article {38528, title = {Three genomes from the phylum Acidobacteria provide insight into the lifestyles of these microorganisms in soils}, journal = {Applied and environmental microbiologyApplied and environmental microbiology}, volume = {75}, year = {2009}, note = {http://www.ncbi.nlm.nih.gov/pubmed/19201974?dopt=Abstract}, type = {10.1128/AEM.02294-08}, abstract = {The complete genomes of three strains from the phylum Acidobacteria were compared. Phylogenetic analysis placed them as a unique phylum. They share genomic traits with members of the Proteobacteria, the Cyanobacteria, and the Fungi. The three strains appear to be versatile heterotrophs. Genomic and culture traits indicate the use of carbon sources that span simple sugars to more complex substrates such as hemicellulose, cellulose, and chitin. The genomes encode low-specificity major facilitator superfamily transporters and high-affinity ABC transporters for sugars, suggesting that they are best suited to low-nutrient conditions. They appear capable of nitrate and nitrite reduction but not N(2) fixation or denitrification. The genomes contained numerous genes that encode siderophore receptors, but no evidence of siderophore production was found, suggesting that they may obtain iron via interaction with other microorganisms. The presence of cellulose synthesis genes and a large class of novel high-molecular-weight excreted proteins suggests potential traits for desiccation resistance, biofilm formation, and/or contribution to soil structure. Polyketide synthase and macrolide glycosylation genes suggest the production of novel antimicrobial compounds. Genes that encode a variety of novel proteins were also identified. The abundance of acidobacteria in soils worldwide and the breadth of potential carbon use by the sequenced strains suggest significant and previously unrecognized contributions to the terrestrial carbon cycle. Combining our genomic evidence with available culture traits, we postulate that cells of these isolates are long-lived, divide slowly, exhibit slow metabolic rates under low-nutrient conditions, and are well equipped to tolerate fluctuations in soil hydration.}, keywords = {Anti-Bacterial Agents, bacteria, Biological Transport, Carbohydrate Metabolism, Cyanobacteria, DNA, Bacterial, Fungi, Genome, Bacterial, Macrolides, Molecular Sequence Data, Nitrogen, Phylogeny, Proteobacteria, Sequence Analysis, DNA, Sequence Homology, Soil Microbiology}, author = {Ward, Naomi L. and Challacombe, Jean F. and Janssen, Peter H. and Henrissat, Bernard and Coutinho, Pedro M. and Wu, Martin and Xie, Gary and Haft, Daniel H. and Sait, Michelle and Badger, Jonathan and Barabote, Ravi D. and Bradley, Brent and Brettin, Thomas S. and Brinkac, Lauren M. and Bruce, David and Creasy, Todd and Daugherty, Sean C. and Davidsen, Tanja M. and DeBoy, Robert T. and Detter, J. Chris and Dodson, Robert J. and Durkin, A. Scott and Ganapathy, Anuradha and Gwinn-Giglio, Michelle and Han, Cliff S. and Khouri, Hoda and Kiss, Hajnalka and Kothari, Sagar P. and Madupu, Ramana and Nelson, Karen E. and Nelson, William C. and Paulsen, Ian and Penn, Kevin and Ren, Qinghu and Rosovitz, M. J. and J. Selengut and Shrivastava, Susmita and Sullivan, Steven A. and Tapia, Roxanne and Thompson, L. Sue and Watkins, Kisha L. and Yang, Qi and Yu, Chunhui and Zafar, Nikhat and Zhou, Liwei and Kuske, Cheryl R.} } @article {49780, title = {Three genomes from the phylum Acidobacteria provide insight into the lifestyles of these microorganisms in soils.}, journal = {Appl Environ Microbiol}, volume = {75}, year = {2009}, month = {2009 Apr}, pages = {2046-56}, abstract = {

The complete genomes of three strains from the phylum Acidobacteria were compared. Phylogenetic analysis placed them as a unique phylum. They share genomic traits with members of the Proteobacteria, the Cyanobacteria, and the Fungi. The three strains appear to be versatile heterotrophs. Genomic and culture traits indicate the use of carbon sources that span simple sugars to more complex substrates such as hemicellulose, cellulose, and chitin. The genomes encode low-specificity major facilitator superfamily transporters and high-affinity ABC transporters for sugars, suggesting that they are best suited to low-nutrient conditions. They appear capable of nitrate and nitrite reduction but not N(2) fixation or denitrification. The genomes contained numerous genes that encode siderophore receptors, but no evidence of siderophore production was found, suggesting that they may obtain iron via interaction with other microorganisms. The presence of cellulose synthesis genes and a large class of novel high-molecular-weight excreted proteins suggests potential traits for desiccation resistance, biofilm formation, and/or contribution to soil structure. Polyketide synthase and macrolide glycosylation genes suggest the production of novel antimicrobial compounds. Genes that encode a variety of novel proteins were also identified. The abundance of acidobacteria in soils worldwide and the breadth of potential carbon use by the sequenced strains suggest significant and previously unrecognized contributions to the terrestrial carbon cycle. Combining our genomic evidence with available culture traits, we postulate that cells of these isolates are long-lived, divide slowly, exhibit slow metabolic rates under low-nutrient conditions, and are well equipped to tolerate fluctuations in soil hydration.

}, keywords = {Anti-Bacterial Agents, bacteria, Biological Transport, Carbohydrate Metabolism, Cyanobacteria, DNA, Bacterial, Fungi, Genome, Bacterial, Macrolides, Molecular Sequence Data, Nitrogen, Phylogeny, Proteobacteria, Sequence Analysis, DNA, Sequence Homology, Soil Microbiology}, issn = {1098-5336}, doi = {10.1128/AEM.02294-08}, author = {Ward, Naomi L and Challacombe, Jean F and Janssen, Peter H and Henrissat, Bernard and Coutinho, Pedro M and Wu, Martin and Xie, Gary and Haft, Daniel H and Sait, Michelle and Badger, Jonathan and Barabote, Ravi D and Bradley, Brent and Brettin, Thomas S and Brinkac, Lauren M and Bruce, David and Creasy, Todd and Daugherty, Sean C and Davidsen, Tanja M and DeBoy, Robert T and Detter, J Chris and Dodson, Robert J and Durkin, A Scott and Ganapathy, Anuradha and Gwinn-Giglio, Michelle and Han, Cliff S and Khouri, Hoda and Kiss, Hajnalka and Kothari, Sagar P and Madupu, Ramana and Nelson, Karen E and Nelson, William C and Paulsen, Ian and Penn, Kevin and Ren, Qinghu and Rosovitz, M J and Selengut, Jeremy D and Shrivastava, Susmita and Sullivan, Steven A and Tapia, Roxanne and Thompson, L Sue and Watkins, Kisha L and Yang, Qi and Yu, Chunhui and Zafar, Nikhat and Zhou, Liwei and Kuske, Cheryl R} } @article {38533, title = {Toward reconstructing the evolution of advanced moths and butterflies (Lepidoptera: Ditrysia): an initial molecular study}, journal = {BMC Evol BiolBMC Evol Biol}, volume = {9}, year = {2009}, type = {10.1186/1471-2148-9-280}, abstract = {BACKGROUND: In the mega-diverse insect order Lepidoptera (butterflies and moths; 165,000 described species), deeper relationships are little understood within the clade Ditrysia, to which 98\% of the species belong. To begin addressing this problem, we tested the ability of five protein-coding nuclear genes (6.7 kb total), and character subsets therein, to resolve relationships among 123 species representing 27 (of 33) superfamilies and 55 (of 100) families of Ditrysia under maximum likelihood analysis. RESULTS: Our trees show broad concordance with previous morphological hypotheses of ditrysian phylogeny, although most relationships among superfamilies are weakly supported. There are also notable surprises, such as a consistently closer relationship of Pyraloidea than of butterflies to most Macrolepidoptera. Monophyly is significantly rejected by one or more character sets for the putative clades Macrolepidoptera as currently defined (P < 0.05) and Macrolepidoptera excluding Noctuoidea and Bombycoidea sensu lato (P < or = 0.005), and nearly so for the superfamily Drepanoidea as currently defined (P < 0.08). Superfamilies are typically recovered or nearly so, but usually without strong support. Relationships within superfamilies and families, however, are often robustly resolved. We provide some of the first strong molecular evidence on deeper splits within Pyraloidea, Tortricoidea, Geometroidea, Noctuoidea and others.Separate analyses of mostly synonymous versus non-synonymous character sets revealed notable differences (though not strong conflict), including a marked influence of compositional heterogeneity on apparent signal in the third codon position (nt3). As available model partitioning methods cannot correct for this variation, we assessed overall phylogeny resolution through separate examination of trees from each character set. Exploration of "tree space" with GARLI, using grid computing, showed that hundreds of searches are typically needed to find the best-feasible phylogeny estimate for these data. CONCLUSION: Our results (a) corroborate the broad outlines of the current working phylogenetic hypothesis for Ditrysia, (b) demonstrate that some prominent features of that hypothesis, including the position of the butterflies, need revision, and (c) resolve the majority of family and subfamily relationships within superfamilies as thus far sampled. Much further gene and taxon sampling will be needed, however, to strongly resolve individual deeper nodes.}, author = {Regier, J. C. and Zwick, A. and Michael P. Cummings and Kawahara, A. Y. and Cho, S. and Weller, S. and Roe, A. and Baixeras, J. and Brown, J. W. and Parr, C. and Davis, D. R. and Epstein, M. and Hallwachs, W. and Hausmann, A. and Janzen, D. H. and Kitching, I. J. and Solis, M. A. and Yen, S. H. and Adam L. Bazinet and Mitter, C.} } @article {38169, title = {The Complete Genome Sequence of Thermococcus Onnurineus NA1 Reveals a Mixed Heterotrophic and Carboxydotrophic Metabolism}, journal = {Journal of BacteriologyJ. Bacteriol.Journal of BacteriologyJ. Bacteriol.}, volume = {190}, year = {2008}, type = {10.1128/JB.00746-08}, abstract = {Members of the genus Thermococcus, sulfur-reducing hyperthermophilic archaea, are ubiquitously present in various deep-sea hydrothermal vent systems and are considered to play a significant role in the microbial consortia. We present the complete genome sequence and feature analysis of Thermococcus onnurineus NA1 isolated from a deep-sea hydrothermal vent area, which reveal clues to its physiology. Based on results of genomic analysis, T. onnurineus NA1 possesses the metabolic pathways for organotrophic growth on peptides, amino acids, or sugars. More interesting was the discovery that the genome encoded unique proteins that are involved in carboxydotrophy to generate energy by oxidation of CO to CO2, thereby providing a mechanistic basis for growth with CO as a substrate. This lithotrophic feature in combination with carbon fixation via RuBisCO (ribulose 1,5-bisphosphate carboxylase/oxygenase) introduces a new strategy with a complementing energy supply for T. onnurineus NA1 potentially allowing it to cope with nutrient stress in the surrounding of hydrothermal vents, providing the first genomic evidence for the carboxydotrophy in Thermococcus.}, isbn = {0021-9193, 1098-5530}, author = {Lee, Hyun Sook and Kang, Sung Gyun and Bae, Seung Seob and Lim, Jae Kyu and Cho, Yona and Kim, Yun Jae and Jeon, Jeong Ho and Cha, Sun-Shin and Kwon, Kae Kyoung and Kim, Hyung-Tae and Park, Cheol-Joo and Lee, Hee-Wook and Kim, Seung Il and Jongsik, Chun and Rita R. Colwell and Kim, Sang-Jin and Lee, Jung-Hyun} } @article {49676, title = {The draft genome of the transgenic tropical fruit tree papaya (Carica papaya Linnaeus).}, journal = {Nature}, volume = {452}, year = {2008}, month = {2008 Apr 24}, pages = {991-6}, abstract = {

Papaya, a fruit crop cultivated in tropical and subtropical regions, is known for its nutritional benefits and medicinal applications. Here we report a 3x draft genome sequence of {\textquoteright}SunUp{\textquoteright} papaya, the first commercial virus-resistant transgenic fruit tree to be sequenced. The papaya genome is three times the size of the Arabidopsis genome, but contains fewer genes, including significantly fewer disease-resistance gene analogues. Comparison of the five sequenced genomes suggests a minimal angiosperm gene set of 13,311. A lack of recent genome duplication, atypical of other angiosperm genomes sequenced so far, may account for the smaller papaya gene number in most functional groups. Nonetheless, striking amplifications in gene number within particular functional groups suggest roles in the evolution of tree-like habit, deposition and remobilization of starch reserves, attraction of seed dispersal agents, and adaptation to tropical daylengths. Transgenesis at three locations is closely associated with chloroplast insertions into the nuclear genome, and with topoisomerase I recognition sites. Papaya offers numerous advantages as a system for fruit-tree functional genomics, and this draft genome sequence provides the foundation for revealing the basis of Carica{\textquoteright}s distinguishing morpho-physiological, medicinal and nutritional properties.

}, keywords = {Arabidopsis, Carica, Contig Mapping, Databases, Genetic, Genes, Plant, Genome, Plant, Molecular Sequence Data, Plants, Genetically Modified, sequence alignment, Sequence Analysis, DNA, Transcription Factors, Tropical Climate}, issn = {1476-4687}, doi = {10.1038/nature06856}, author = {Ming, Ray and Hou, Shaobin and Feng, Yun and Yu, Qingyi and Dionne-Laporte, Alexandre and Saw, Jimmy H and Senin, Pavel and Wang, Wei and Ly, Benjamin V and Lewis, Kanako L T and Salzberg, Steven L and Feng, Lu and Jones, Meghan R and Skelton, Rachel L and Murray, Jan E and Chen, Cuixia and Qian, Wubin and Shen, Junguo and Du, Peng and Eustice, Moriah and Tong, Eric and Tang, Haibao and Lyons, Eric and Paull, Robert E and Michael, Todd P and Wall, Kerr and Rice, Danny W and Albert, Henrik and Wang, Ming-Li and Zhu, Yun J and Schatz, Michael and Nagarajan, Niranjan and Acob, Ricelle A and Guan, Peizhu and Blas, Andrea and Wai, Ching Man and Ackerman, Christine M and Ren, Yan and Liu, Chao and Wang, Jianmei and Wang, Jianping and Na, Jong-Kuk and Shakirov, Eugene V and Haas, Brian and Thimmapuram, Jyothi and Nelson, David and Wang, Xiyin and Bowers, John E and Gschwend, Andrea R and Delcher, Arthur L and Singh, Ratnesh and Suzuki, Jon Y and Tripathi, Savarni and Neupane, Kabi and Wei, Hairong and Irikura, Beth and Paidi, Maya and Jiang, Ning and Zhang, Wenli and Presting, Gernot and Windsor, Aaron and Navajas-P{\'e}rez, Rafael and Torres, Manuel J and Feltus, F Alex and Porter, Brad and Li, Yingjun and Burroughs, A Max and Luo, Ming-Cheng and Liu, Lei and Christopher, David A and Mount, Stephen M and Moore, Paul H and Sugimura, Tak and Jiang, Jiming and Schuler, Mary A and Friedman, Vikki and Mitchell-Olds, Thomas and Shippen, Dorothy E and dePamphilis, Claude W and Palmer, Jeffrey D and Freeling, Michael and Paterson, Andrew H and Gonsalves, Dennis and Wang, Lei and Alam, Maqsudul} } @article {38232, title = {Environmental signatures associated with cholera epidemics}, journal = {Proceedings of the National Academy of SciencesProceedings of the National Academy of Sciences}, volume = {105}, year = {2008}, type = {10.1073/pnas.0809654105}, abstract = {The causative agent of cholera, Vibrio cholerae, has been shown to be autochthonous to riverine, estuarine, and coastal waters along with its host, the copepod, a significant member of the zooplankton community. Temperature, salinity, rainfall and plankton have proven to be important factors in the ecology of V. cholerae, influencing the transmission of the disease in those regions of the world where the human population relies on untreated water as a source of drinking water. In this study, the pattern of cholera outbreaks during 1998{\textendash}2006 in Kolkata, India, and Matlab, Bangladesh, and the earth observation data were analyzed with the objective of developing a prediction model for cholera. Satellite sensors were used to measure chlorophyll a concentration (CHL) and sea surface temperature (SST). In addition, rainfall data were obtained from both satellite and in situ gauge measurements. From the analyses, a statistically significant relationship between the time series for cholera in Kolkata, India, and CHL and rainfall anomalies was determined. A statistically significant one month lag was observed between CHL anomaly and number of cholera cases in Matlab, Bangladesh. From the results of the study, it is concluded that ocean and climate patterns are useful predictors of cholera epidemics, with the dynamics of endemic cholera being related to climate and/or changes in the aquatic ecosystem. When the ecology of V. cholerae is considered in predictive models, a robust early warning system for cholera in endemic regions of the world can be developed for public health planning and decision making.ecology epidemiology microbiology remote sensing}, isbn = {0027-8424, 1091-6490}, author = {Constantin de Magny, G. and Murtugudde, R. and Sapiano, M. R. P. and Nizam, A. and Brown, C. W. and Busalacchi, A. J. and Yunus, M. and Nair, G. B. and Gil, A. I. and Lanata, C. F. and Rita R. Colwell} } @article {38233, title = {Environmental Vibrio spp., isolated in Mozambique, contain a polymorphic group of integrative conjugative elements and class 1 integrons}, journal = {FEMS Microbiology EcologyFEMS Microbiology Ecology}, volume = {64}, year = {2008}, type = {10.1111/j.1574-6941.2008.00455.x}, abstract = {Circulation of mobile genetic elements linked to drug resistance spread was studied in Vibrio strains isolated from surface urban water (river and sea) and shellfish samples in 2002{\textendash}2003 in Maputo, Mozambique. Class 1 integrons and integrating conjugative elements (ICE) were investigated by PCR and mating experiments in strains of major health interest: 10 Vibrio cholerae, six Vibrio parahaemolyticus, two Vibrio alginolyticus and one Vibrio fluvialis. Resistance to at least two antibiotics (predominantly β-lactams) was detected in all the strains, with additional resistances to sulfamethoxazole, spectinomycin, streptomycin and/or trimethoprim. Class 1 integrons contributed partially to the expression of drug resistance and were found in five isolates: four V. cholerae (blaP1 cassette, one strain also contained the dfrA15 cassette) and one V. alginolyticus (aadA2 cassette). ICEs, apparently devoid of resistance genes, were found in eight V. cholerae, three V. parahaemolyticus and one V. fluvialis isolates. A wide variability was observed by molecular characterization of ICEs. Five ICEs were included in the SXT/R391 family and seven ICEs were not classified. Our results indicate that the SXT/R391 family and related ICEs comprise a large class of polymorphic genetic elements widely circulating in environmental Vibrio strains in Africa, beside those evidently linked to drug resistance in clinical isolates.}, keywords = {ICE, integron, Mozambique, Vibrio}, isbn = {1574-6941}, author = {Taviani, Elisa and Ceccarelli, Daniela and Lazaro, Nivalda and Bani, Stefania and Cappuccinelli, Piero and Rita R. Colwell and Colombo, Mauro M.} } @article {38236, title = {Estimating Tree-Structured Covariance Matrices via Mixed-Integer Programming with an Application to Phylogenetic Analysis of Gene Expression}, volume = {1142}, year = {2008}, institution = {Department of Statistics, University of Wisconsin}, abstract = {We present a novel method for estimating tree-structured covariance matrices directly fromobserved continuous data. A representation of these classes of matrices as linear combinations of rank-one matrices indicating object partitions is used to formulate estimation as instances of well-studied numerical optimization problems. In particular, we present estimation based on projection where the covariance estimate is the nearest tree-structured covariance matrix to an observed sample covariance matrix. The problem is posed as a linear or quadratic mixed-integer program (MIP) where a setting of the integer variables in the MIP specifies a set of tree topologies of the structured covariance matrix. We solve these problems to optimality using efficient and robust existing MIP solvers. We also show that the least squares distance method of Fitch and Margoliash (1967) can be formulated as a quadratic MIP and thus solved exactly using existing, robust branch-and-bound MIP solvers. Our motivation for this method is the discovery of phylogenetic structure directly from gene expression data. Recent studies have adapted traditional phylogenetic comparative anal- ysis methods to expression data. Typically, these methods first estimate a phylogenetic tree from genomic sequence data and subsequently analyze expression data. A covariance matrix constructed from the sequence-derived tree is used to correct for the lack of independence in phy- logenetically related taxa. However, recent results have shown that the hierarchical structure of sequence-derived tree estimates are highly sensitive to the genomic region chosen to build them. To circumvent this difficulty, we propose a stable method for deriving tree-structured covariance matrices directly from gene expression as an exploratory step that can guide investigators in their modelling choices for these types of comparative analysis. We present a case study in phylogenetic analysis of expression in yeast gene families. Our method is able to corroborate the presence of phylogenetic structure in the response of expression in a subset of the gene families under particular experimental conditions. Additionally, when used in conjunction with transcription factor occupancy data, our methods show that alternative modelling choices should be considered when creating sequence-derived trees for this comparative analysis.}, author = {H{\'e}ctor Corrada Bravo and Eng, K. H. and Keles, S. and Wahba, G. and Wright, S.} } @inbook {38248, title = {Expanding the reach of Grid computing: combining Globus- and BOINC-based systems}, booktitle = {Grids for Bioinformatics and Computational BiologyGrids for Bioinformatics and Computational Biology}, series = {Wiley Book Series on Bioinformatics: Computational Techniques and Engineering}, year = {2008}, publisher = {Wiley-Interscience}, organization = {Wiley-Interscience}, address = {Hoboken}, author = {Myers, D. S. and Adam L. Bazinet and Michael P. Cummings}, editor = {Talbi, E. G. and Zomaya, A. Y.} } @inbook {38359, title = {The Lattice Project: a Grid research and production environment combining multiple Grid computing models}, booktitle = {Distributed \& Grid Computing {\textemdash} Science Made Transparent for Everyone. Principles, Applications and Supporting CommunitiesDistributed \& Grid Computing {\textemdash} Science Made Transparent for Everyone. Principles, Applications and Supporting Communities}, year = {2008}, publisher = {Rechenkraft.net}, organization = {Rechenkraft.net}, address = {Marburg}, author = {Adam L. Bazinet and Michael P. Cummings}, editor = {Weber, M. H. W.} } @article {38366, title = {Maternal depletion of CTCF reveals multiple functions during oocyte and preimplantation embryo development}, journal = {DevelopmentDevelopment}, volume = {135}, year = {2008}, publisher = {The Company of Biologists Limited}, author = {Wan, L. B. and Pan, H. and Sridhar Hannenhalli and Cheng, Y. and Ma, J. and Fedoriw, A. and Lobanenkov, V. and Latham, K. E. and Schultz, R. M. and Bartolomei, M. S.} } @article {38383, title = {The minimum information about a genome sequence (MIGS) specification}, journal = {Nature biotechnologyNature biotechnology}, volume = {26}, year = {2008}, note = {http://www.ncbi.nlm.nih.gov/pubmed/18464787?dopt=Abstract}, type = {10.1038/nbt1360}, abstract = {With the quantity of genomic data increasing at an exponential rate, it is imperative that these data be captured electronically, in a standard format. Standardization activities must proceed within the auspices of open-access and international working bodies. To tackle the issues surrounding the development of better descriptions of genomic investigations, we have formed the Genomic Standards Consortium (GSC). Here, we introduce the minimum information about a genome sequence (MIGS) specification with the intent of promoting participation in its development and discussing the resources that will be required to develop improved mechanisms of metadata capture and exchange. As part of its wider goals, the GSC also supports improving the {\textquoteright}transparency{\textquoteright} of the information contained in existing genomic databases.}, keywords = {Chromosome mapping, Databases, Factual, information dissemination, Information Storage and Retrieval, Information Theory, Internationality}, author = {Field, Dawn and Garrity, George and Gray, Tanya and Morrison, Norman and J. Selengut and Sterk, Peter and Tatusova, Tatiana and Thomson, Nicholas and Allen, Michael J. and Angiuoli, Samuel V. and Ashburner, Michael and Axelrod, Nelson and Baldauf, Sandra and Ballard, Stuart and Boore, Jeffrey and Cochrane, Guy and Cole, James and Dawyndt, Peter and De Vos, Paul and DePamphilis, Claude and Edwards, Robert and Faruque, Nadeem and Feldman, Robert and Gilbert, Jack and Gilna, Paul and Gl{\"o}ckner, Frank Oliver and Goldstein, Philip and Guralnick, Robert and Haft, Dan and Hancock, David and Hermjakob, Henning and Hertz-Fowler, Christiane and Hugenholtz, Phil and Joint, Ian and Kagan, Leonid and Kane, Matthew and Kennedy, Jessie and Kowalchuk, George and Kottmann, Renzo and Kolker, Eugene and Kravitz, Saul and Kyrpides, Nikos and Leebens-Mack, Jim and Lewis, Suzanna E. and Li, Kelvin and Lister, Allyson L. and Lord, Phillip and Maltsev, Natalia and Markowitz, Victor and Martiny, Jennifer and Methe, Barbara and Mizrachi, Ilene and Moxon, Richard and Nelson, Karen and Parkhill, Julian and Proctor, Lita and White, Owen and Sansone, Susanna-Assunta and Spiers, Andrew and Stevens, Robert and Swift, Paul and Taylor, Chris and Tateno, Yoshio and Tett, Adrian and Turner, Sarah and Ussery, David and Vaughan, Bob and Ward, Naomi and Whetzel, Trish and San Gil, Ingio and Wilson, Gareth and Wipat, Anil} } @article {38398, title = {New records of phytoplankton for Bangladesh. 2. Cryptophyceae and Synurophyceae}, journal = {Bangladesh Journal of BotanyBangladesh Journal of Botany}, volume = {36}, year = {2008}, type = {10.3329/bjb.v36i1.1549}, abstract = {This study presents two species of Rhodomonas, four species of Chroomonas, six species of Cryptomonas and Cryptochrysis minor, Cyanomonas coeruleus, Chrysodidymus synuroideus and Mallomonas akrokomos. These species have been reported from some ponds of Mathbaria in Pirojpur and Bakerganj of Barisal district in Bangladesh.}, isbn = {0253-5416}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38401, title = {New records of phytoplankton for Bangladesh. 5. Euglena, Euglenocapsa}, journal = {Bangladesh Journal of Plant TaxonomyBangladesh Journal of Plant Taxonomy}, volume = {15}, year = {2008}, type = {10.3329/bjpt.v15i1.910}, abstract = {This study presents 20 taxa of the genus Euglena and one species of the rare euglenoid genus Euglenocapsa. All these taxa are reported for the first time from some pond ecosystems of Mathbaria in Pirojpur and Bakerganj of Barisal districts of Bangladesh.}, isbn = {1028-2092}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38402, title = {New records of phytoplankton for Bangladesh. 7. Phacus spp}, journal = {Bangladesh Journal of BotanyBangladesh Journal of Botany}, volume = {37}, year = {2008}, type = {10.3329/bjb.v37i1.1564}, abstract = {Thirteen species of Phacus hitherto not reported from Bangladesh have been described and illustrated. Freshwater ponds at southern districts of Pirojpur and Barisal revealed these presence of the species.}, isbn = {0253-5416}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38403, title = {New records of phytoplankton for Bangladesh. 8. Trachelomonas Ehr. (Euglenophyceae)}, journal = {Bangladesh Journal of BotanyBangladesh Journal of Botany}, volume = {37}, year = {2008}, type = {10.3329/bjb.v37i2.1719}, abstract = {Investigation of pelagic plankton communities from some freshwater ponds of Pirojpur and Barisal districts revealed the presence of 17 species under the genus Trachelomonas Ehr. for the first time in Bangladesh.}, isbn = {0253-5416}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38463, title = {Resolving arthropod phylogeny: exploring phylogenetic signal within 41 kb of protein-coding nuclear gene sequence}, journal = {Syst BiolSyst Biol}, volume = {57}, year = {2008}, type = {10.1080/10635150802570791}, abstract = {This study attempts to resolve relationships among and within the four basal arthropod lineages (Pancrustacea, Myriapoda, Euchelicerata, Pycnogonida) and to assess the widespread expectation that remaining phylogenetic problems will yield to increasing amounts of sequence data. Sixty-eight regions of 62 protein-coding nuclear genes (approximately 41 kilobases (kb)/taxon) were sequenced for 12 taxonomically diverse arthropod taxa and a tardigrade outgroup. Parsimony, likelihood, and Bayesian analyses of total nucleotide data generally strongly supported the monophyly of each of the basal lineages represented by more than one species. Other relationships within the Arthropoda were also supported, with support levels depending on method of analysis and inclusion/exclusion of synonymous changes. Removing third codon positions, where the assumption of base compositional homogeneity was rejected, altered the results. Removing the final class of synonymous mutations{\textendash}first codon positions encoding leucine and arginine, which were also compositionally heterogeneous{\textendash}yielded a data set that was consistent with a hypothesis of base compositional homogeneity. Furthermore, under such a data-exclusion regime, all 68 gene regions individually were consistent with base compositional homogeneity. Restricting likelihood analyses to nonsynonymous change recovered trees with strong support for the basal lineages but not for other groups that were variably supported with more inclusive data sets. In a further effort to increase phylogenetic signal, three types of data exploration were undertaken. (1) Individual genes were ranked by their average rate of nonsynonymous change, and three rate categories were assigned{\textendash}fast, intermediate, and slow. Then, bootstrap analysis of each gene was performed separately to see which taxonomic groups received strong support. Five taxonomic groups were strongly supported independently by two or more genes, and these genes mostly belonged to the slow or intermediate categories, whereas groups supported only by a single gene region tended to be from genes of the fast category, arguing that fast genes provide a less consistent signal. (2) A sensitivity analysis was performed in which increasing numbers of genes were excluded, beginning with the fastest. The number of strongly supported nodes increased up to a point and then decreased slightly. Recovery of Hexapoda required removal of fast genes. Support for Mandibulata (Pancrustacea + Myriapoda) also increased, at times to "strong" levels, with removal of the fastest genes. (3) Concordance selection was evaluated by clustering genes according to their ability to recover Pancrustacea, Euchelicerata, or Myriapoda and analyzing the three clusters separately. All clusters of genes recovered the three concordance clades but were at times inconsistent in the relationships recovered among and within these clades, a result that indicates that the a priori concordance criteria may bias phylogenetic signal in unexpected ways. In a further attempt to increase support of taxonomic relationships, sequence data from 49 additional taxa for three slow genes (i.e., EF-1 alpha, EF-2, and Pol II) were combined with the various 13-taxon data sets. The 62-taxon analyses supported the results of the 13-taxon analyses and provided increased support for additional pancrustacean clades found in an earlier analysis including only EF-1 alpha, EF-2, and Pol II.}, author = {Regier, J. C. and Shultz, J. W. and Ganley, A. R. D. and Hussey, A. and Shi, D. and Ball, B. and Zwick, A. and Stajich, J. E. and Michael P. Cummings and Martin, J. W. and Cunningham, C. W.} } @article {38472, title = {Role of transposable elements in trypanosomatids}, journal = {Microbes and InfectionMicrobes and Infection}, volume = {10}, year = {2008}, type = {16/j.micinf.2008.02.009}, abstract = {Transposable elements constitute 2-5\% of the genome content in trypanosomatid parasites. Some of them are involved in critical cellular functions, such as the regulation of gene expression in Leishmania spp. In this review, we highlight the remarkable role extinct transposable elements can play as the source of potential new functions.}, keywords = {Cellular function, Domestication, Evolution, Gene expression, Leishmania, Regulation of mRNA stability, Retroposon, Transposable element, Trypanosoma}, isbn = {1286-4579}, author = {Bringaud, Frederic and Ghedin, Elodie and Najib M. El-Sayed and Papadopoulou, Barbara} } @article {38491, title = {Sequence diversity and evolution of multigene families in Trypanosoma cruzi}, journal = {Molecular and Biochemical ParasitologyMolecular and Biochemical Parasitology}, volume = {157}, year = {2008}, type = {16/j.molbiopara.2007.10.002}, abstract = {Several copies of genes belonging to three multigene families present in the genome of Trypanosoma cruzi were sequenced and comparatively analyzed across six different strains of the parasite belonging to the T. cruzi I lineage (Colombiana, Silvio X10 and Dm28c), the T. cruzi II lineage (Esmeraldo and JG) and a hybrid strain (CL Brener). For all three gene families analyzed, our results support the division in T. cruzi I and II lineages. Furthermore, in agreement with its hybrid nature, sequences derived from the CL Brener clone clustered together with T. cruzi II sequences as well as with a third group of sequences. Paralogous sequences encoding Amastin, an amastigote surface glycoprotein and TcAG48, an antigenic RNA binding protein, which are clustered in the parasite genome, present higher intragenomic variability in T. cruzi II and CL Brener strains, when compared to T. cruzi I strains. Paralogous sequences derived from the TcADC gene family, which encode various isoforms of adenylyl cyclases and are dispersed throughout the T. cruzi genome, exhibit similar degree of variability in all strains, except in the CL Brener strain, in which the sequences were more divergent. Several factors including mutation rates and gene conversion mechanisms, acting differently within the T. cruzi population, may contribute to create such distinct levels of sequence diversity in multigene families that are clustered in the T. cruzi genome.}, keywords = {Amastin, Gene conversion, Genetic diversity, Multigene families, Trypanosoma cruzi}, isbn = {0166-6851}, author = {Cerqueira, Gustavo C. and Bartholomeu, Daniella C. and DaRocha, Wanderson D. and Hou, Lihua and Freitas-Silva, Danielle M. and Machado, Carlos Renato and Najib M. El-Sayed and Teixeira, Santuza M. R.} } @article {38499, title = {Sex and age dimorphism of myocardial gene expression in nonischemic human heart failure}, journal = {Circulation: Cardiovascular GeneticsCirculation: Cardiovascular Genetics}, volume = {1}, year = {2008}, publisher = {Am Heart Assoc}, author = {Fermin, D. R. and Barac, A. and Lee, S. and Polster, S. P. and Sridhar Hannenhalli and Bergemann, T. L. and Grindle, S. and Dyke, D. B. and Pagani, F. and Miller, L. W. and others,} } @article {38152, title = {Cofactor-independent phosphoglycerate mutase is an essential gene in procyclic form Trypanosoma brucei}, journal = {Parasitology researchParasitology research}, volume = {100}, year = {2007}, author = {Djikeng, A. and Raverdy, S. and Foster, Jeffrey S. and Bartholomeu, D. and Zhang, Y. and Najib M. El-Sayed and Carlow, C.} } @article {38242, title = {Evolution of genes and genomes on the Drosophila phylogeny}, journal = {NatureNature}, volume = {450}, year = {2007}, note = {[szlig]}, type = {10.1038/nature06341}, abstract = {Comparative analysis of multiple genomes in a phylogenetic framework dramatically improves the precision and sensitivity of evolutionary inference, producing more robust results than single-genome analyses can provide. The genomes of 12 Drosophila species, ten of which are presented here for the first time (sechellia, simulans, yakuba, erecta, ananassae, persimilis, willistoni, mojavensis, virilis and grimshawi), illustrate how rates and patterns of sequence divergence across taxa can illuminate evolutionary processes on a genomic scale. These genome sequences augment the formidable genetic tools that have made Drosophila melanogaster a pre-eminent model for animal genetics, and will further catalyse fundamental research on mechanisms of development, cell biology, genetics, disease, neurobiology, behaviour, physiology and evolution. Despite remarkable similarities among these Drosophila species, we identified many putatively non-neutral changes in protein-coding genes, non-coding RNA genes, and cis-regulatory regions. These may prove to underlie differences in the ecology and behaviour of these diverse species.}, isbn = {0028-0836}, author = {Clark, Andrew G. and Eisen, Michael B. and Smith, Douglas R. and Bergman, Casey M. and Oliver, Brian and Markow, Therese A. and Kaufman, Thomas C. and Kellis, Manolis and Gelbart, William and Iyer, Venky N. and Pollard, Daniel A. and Sackton, Timothy B. and Larracuente, Amanda M. and Singh, Nadia D. and Abad, Jose P. and Abt, Dawn N. and Adryan, Boris and Aguade, Montserrat and Akashi, Hiroshi and Anderson, Wyatt W. and Aquadro, Charles F. and Ardell, David H. and Arguello, Roman and Artieri, Carlo G. and Barbash, Daniel A. and Barker, Daniel and Barsanti, Paolo and Batterham, Phil and Batzoglou, Serafim and Begun, Dave and Bhutkar, Arjun and Blanco, Enrico and Bosak, Stephanie A. and Bradley, Robert K. and Brand, Adrianne D. and Brent, Michael R. and Brooks, Angela N. and Brown, Randall H. and Butlin, Roger K. and Caggese, Corrado and Calvi, Brian R. and Carvalho, A. Bernardo de and Caspi, Anat and Castrezana, Sergio and Celniker, Susan E. and Chang, Jean L. and Chapple, Charles and Chatterji, Sourav and Chinwalla, Asif and Civetta, Alberto and Clifton, Sandra W. and Comeron, Josep M. and Costello, James C. and Coyne, Jerry A. and Daub, Jennifer and David, Robert G. and Delcher, Arthur L. and Delehaunty, Kim and Do, Chuong B. and Ebling, Heather and Edwards, Kevin and Eickbush, Thomas and Evans, Jay D. and Filipski, Alan and Findei, and Sven and Freyhult, Eva and Fulton, Lucinda and Fulton, Robert and Garcia, Ana C. L. and Gardiner, Anastasia and Garfield, David A. and Garvin, Barry E. and Gibson, Greg and Gilbert, Don and Gnerre, Sante and Godfrey, Jennifer and Good, Robert and Gotea, Valer and Gravely, Brenton and Greenberg, Anthony J. and Griffiths-Jones, Sam and Gross, Samuel and Guigo, Roderic and Gustafson, Erik A. and Haerty, Wilfried and Hahn, Matthew W. and Halligan, Daniel L. and Halpern, Aaron L. and Halter, Gillian M. and Han, Mira V. and Heger, Andreas and Hillier, LaDeana and Hinrichs, Angie S. and Holmes, Ian and Hoskins, Roger A. and Hubisz, Melissa J. and Hultmark, Dan and Huntley, Melanie A. and Jaffe, David B. and Jagadeeshan, Santosh and Jeck, William R. and Johnson, Justin and Jones, Corbin D. and Jordan, William C. and Karpen, Gary H. and Kataoka, Eiko and Keightley, Peter D. and Kheradpour, Pouya and Kirkness, Ewen F. and Koerich, Leonardo B. and Kristiansen, Karsten and Kudrna, Dave and Kulathinal, Rob J. and Kumar, Sudhir and Kwok, Roberta and Lander, Eric and Langley, Charles H. and Lapoint, Richard and Lazzaro, Brian P. and Lee, So-Jeong and Levesque, Lisa and Li, Ruiqiang and Lin, Chiao-Feng and Lin, Michael F. and Lindblad-Toh, Kerstin and Llopart, Ana and Long, Manyuan and Low, Lloyd and Lozovsky, Elena and Lu, Jian and Luo, Meizhong and Machado, Carlos A. and Makalowski, Wojciech and Marzo, Mar and Matsuda, Muneo and Matzkin, Luciano and McAllister, Bryant and McBride, Carolyn S. and McKernan, Brendan and McKernan, Kevin and Mendez-Lago, Maria and Minx, Patrick and Mollenhauer, Michael U. and Montooth, Kristi and Stephen M. Mount and Mu, Xu and Myers, Eugene and Negre, Barbara and Newfeld, Stuart and Nielsen, Rasmus and Noor, Mohamed A. F. and O{\textquoteright}Grady, Patrick and Pachter, Lior and Papaceit, Montserrat and Parisi, Matthew J. and Parisi, Michael and Parts, Leopold and Pedersen, Jakob S. and Pesole, Graziano and Phillippy, Adam M. and Ponting, Chris P. and M. Pop and Porcelli, Damiano and Powell, Jeffrey R. and Prohaska, Sonja and Pruitt, Kim and Puig, Marta and Quesneville, Hadi and Ram, Kristipati Ravi and Rand, David and Rasmussen, Matthew D. and Reed, Laura K. and Reenan, Robert and Reily, Amy and Remington, Karin A. and Rieger, Tania T. and Ritchie, Michael G. and Robin, Charles and Rogers, Yu-Hui and Rohde, Claudia and Rozas, Julio and Rubenfield, Marc J. and Ruiz, Alfredo and Russo, Susan and Salzberg, Steven L. and Sanchez-Gracia, Alejandro and Saranga, David J. and Sato, Hajime and Schaeffer, Stephen W. and Schatz, Michael C. and Schlenke, Todd and Schwartz, Russell and Segarra, Carmen and Singh, Rama S. and Sirot, Laura and Sirota, Marina and Sisneros, Nicholas B. and Smith, Chris D. and Smith, Temple F. and Spieth, John and Stage, Deborah E. and Stark, Alexander and Stephan, Wolfgang and Strausberg, Robert L. and Strempel, Sebastian and Sturgill, David and Sutton, Granger and Sutton, Granger G. and Tao, Wei and Teichmann, Sarah and Tobari, Yoshiko N. and Tomimura, Yoshihiko and Tsolas, Jason M. and Valente, Vera L. S. and Venter, Eli and Venter, J. Craig and Vicario, Saverio and Vieira, Filipe G. and Vilella, Albert J. and Villasante, Alfredo and Walenz, Brian and Wang, Jun and Wasserman, Marvin and Watts, Thomas and Wilson, Derek and Wilson, Richard K. and Wing, Rod A. and Wolfner, Mariana F. and Wong, Alex and Wong, Gane Ka-Shu and Wu, Chung- I. and Wu, Gabriel and Yamamoto, Daisuke and Yang, Hsiao-Pei and Yang, Shiaw-Pyng and Yorke, James A. and Yoshida, Kiyohito and Zdobnov, Evgeny and Zhang, Peili and Zhang, Yu and Zimin, Aleksey V. and Baldwin, Jennifer and Abdouelleil, Amr and Abdulkadir, Jamal and Abebe, Adal and Abera, Brikti and Abreu, Justin and Acer, St Christophe and Aftuck, Lynne and Alexander, Allen and An, Peter and Anderson, Erica and Anderson, Scott and Arachi, Harindra and Azer, Marc and Bachantsang, Pasang and Barry, Andrew and Bayul, Tashi and Berlin, Aaron and Bessette, Daniel and Bloom, Toby and Blye, Jason and Boguslavskiy, Leonid and Bonnet, Claude and Boukhgalter, Boris and Bourzgui, Imane and Brown, Adam and Cahill, Patrick and Channer, Sheridon and Cheshatsang, Yama and Chuda, Lisa and Citroen, Mieke and Collymore, Alville and Cooke, Patrick and Costello, Maura and D{\textquoteright}Aco, Katie and Daza, Riza and Haan, Georgius De and DeGray, Stuart and DeMaso, Christina and Dhargay, Norbu and Dooley, Kimberly and Dooley, Erin and Doricent, Missole and Dorje, Passang and Dorjee, Kunsang and Dupes, Alan and Elong, Richard and Falk, Jill and Farina, Abderrahim and Faro, Susan and Ferguson, Diallo and Fisher, Sheila and Foley, Chelsea D. and Franke, Alicia and Friedrich, Dennis and Gadbois, Loryn and Gearin, Gary and Gearin, Christina R. and Giannoukos, Georgia and Goode, Tina and Graham, Joseph and Grandbois, Edward and Grewal, Sharleen and Gyaltsen, Kunsang and Hafez, Nabil and Hagos, Birhane and Hall, Jennifer and Henson, Charlotte and Hollinger, Andrew and Honan, Tracey and Huard, Monika D. and Hughes, Leanne and Hurhula, Brian and Husby, M. Erii and Kamat, Asha and Kanga, Ben and Kashin, Seva and Khazanovich, Dmitry and Kisner, Peter and Lance, Krista and Lara, Marcia and Lee, William and Lennon, Niall and Letendre, Frances and LeVine, Rosie and Lipovsky, Alex and Liu, Xiaohong and Liu, Jinlei and Liu, Shangtao and Lokyitsang, Tashi and Lokyitsang, Yeshi and Lubonja, Rakela and Lui, Annie and MacDonald, Pen and Magnisalis, Vasilia and Maru, Kebede and Matthews, Charles and McCusker, William and McDonough, Susan and Mehta, Teena and Meldrim, James and Meneus, Louis and Mihai, Oana and Mihalev, Atanas and Mihova, Tanya and Mittelman, Rachel and Mlenga, Valentine and Montmayeur, Anna and Mulrain, Leonidas and Navidi, Adam and Naylor, Jerome and Negash, Tamrat and Nguyen, Thu and Nguyen, Nga and Nicol, Robert and Norbu, Choe and Norbu, Nyima and Novod, Nathaniel and O{\textquoteright}Neill, Barry and Osman, Sahal and Markiewicz, Eva and Oyono, Otero L. and Patti, Christopher and Phunkhang, Pema and Pierre, Fritz and Priest, Margaret and Raghuraman, Sujaa and Rege, Filip and Reyes, Rebecca and Rise, Cecil and Rogov, Peter and Ross, Keenan and Ryan, Elizabeth and Settipalli, Sampath and Shea, Terry and Sherpa, Ngawang and Shi, Lu and Shih, Diana and Sparrow, Todd and Spaulding, Jessica and Stalker, John and Stange-Thomann, Nicole and Stavropoulos, Sharon and Stone, Catherine and Strader, Christopher and Tesfaye, Senait and Thomson, Talene and Thoulutsang, Yama and Thoulutsang, Dawa and Topham, Kerri and Topping, Ira and Tsamla, Tsamla and Vassiliev, Helen and Vo, Andy and Wangchuk, Tsering and Wangdi, Tsering and Weiand, Michael and Wilkinson, Jane and Wilson, Adam and Yadav, Shailendra and Young, Geneva and Yu, Qing and Zembek, Lisa and Zhong, Danni and Zimmer, Andrew and Zwirko, Zac and Jaffe, David B. and Alvarez, Pablo and Brockman, Will and Butler, Jonathan and Chin, CheeWhye and Gnerre, Sante and Grabherr, Manfred and Kleber, Michael and Mauceli, Evan and MacCallum, Iain} } @article {49677, title = {Evolution of genes and genomes on the Drosophila phylogeny.}, journal = {Nature}, volume = {450}, year = {2007}, month = {2007 Nov 8}, pages = {203-18}, abstract = {

Comparative analysis of multiple genomes in a phylogenetic framework dramatically improves the precision and sensitivity of evolutionary inference, producing more robust results than single-genome analyses can provide. The genomes of 12 Drosophila species, ten of which are presented here for the first time (sechellia, simulans, yakuba, erecta, ananassae, persimilis, willistoni, mojavensis, virilis and grimshawi), illustrate how rates and patterns of sequence divergence across taxa can illuminate evolutionary processes on a genomic scale. These genome sequences augment the formidable genetic tools that have made Drosophila melanogaster a pre-eminent model for animal genetics, and will further catalyse fundamental research on mechanisms of development, cell biology, genetics, disease, neurobiology, behaviour, physiology and evolution. Despite remarkable similarities among these Drosophila species, we identified many putatively non-neutral changes in protein-coding genes, non-coding RNA genes, and cis-regulatory regions. These may prove to underlie differences in the ecology and behaviour of these diverse species.

}, keywords = {Animals, Codon, DNA Transposable Elements, Drosophila, Drosophila Proteins, Evolution, Molecular, Gene Order, Genes, Insect, Genome, Insect, Genome, Mitochondrial, Genomics, Immunity, Multigene Family, Phylogeny, Reproduction, RNA, Untranslated, sequence alignment, Sequence Analysis, DNA, Synteny}, issn = {1476-4687}, doi = {10.1038/nature06341}, author = {Clark, Andrew G and Eisen, Michael B and Smith, Douglas R and Bergman, Casey M and Oliver, Brian and Markow, Therese A and Kaufman, Thomas C and Kellis, Manolis and Gelbart, William and Iyer, Venky N and Pollard, Daniel A and Sackton, Timothy B and Larracuente, Amanda M and Singh, Nadia D and Abad, Jose P and Abt, Dawn N and Adryan, Boris and Aguade, Montserrat and Akashi, Hiroshi and Anderson, Wyatt W and Aquadro, Charles F and Ardell, David H and Arguello, Roman and Artieri, Carlo G and Barbash, Daniel A and Barker, Daniel and Barsanti, Paolo and Batterham, Phil and Batzoglou, Serafim and Begun, Dave and Bhutkar, Arjun and Blanco, Enrico and Bosak, Stephanie A and Bradley, Robert K and Brand, Adrianne D and Brent, Michael R and Brooks, Angela N and Brown, Randall H and Butlin, Roger K and Caggese, Corrado and Calvi, Brian R and Bernardo de Carvalho, A and Caspi, Anat and Castrezana, Sergio and Celniker, Susan E and Chang, Jean L and Chapple, Charles and Chatterji, Sourav and Chinwalla, Asif and Civetta, Alberto and Clifton, Sandra W and Comeron, Josep M and Costello, James C and Coyne, Jerry A and Daub, Jennifer and David, Robert G and Delcher, Arthur L and Delehaunty, Kim and Do, Chuong B and Ebling, Heather and Edwards, Kevin and Eickbush, Thomas and Evans, Jay D and Filipski, Alan and Findeiss, Sven and Freyhult, Eva and Fulton, Lucinda and Fulton, Robert and Garcia, Ana C L and Gardiner, Anastasia and Garfield, David A and Garvin, Barry E and Gibson, Greg and Gilbert, Don and Gnerre, Sante and Godfrey, Jennifer and Good, Robert and Gotea, Valer and Gravely, Brenton and Greenberg, Anthony J and Griffiths-Jones, Sam and Gross, Samuel and Guigo, Roderic and Gustafson, Erik A and Haerty, Wilfried and Hahn, Matthew W and Halligan, Daniel L and Halpern, Aaron L and Halter, Gillian M and Han, Mira V and Heger, Andreas and Hillier, LaDeana and Hinrichs, Angie S and Holmes, Ian and Hoskins, Roger A and Hubisz, Melissa J and Hultmark, Dan and Huntley, Melanie A and Jaffe, David B and Jagadeeshan, Santosh and Jeck, William R and Johnson, Justin and Jones, Corbin D and Jordan, William C and Karpen, Gary H and Kataoka, Eiko and Keightley, Peter D and Kheradpour, Pouya and Kirkness, Ewen F and Koerich, Leonardo B and Kristiansen, Karsten and Kudrna, Dave and Kulathinal, Rob J and Kumar, Sudhir and Kwok, Roberta and Lander, Eric and Langley, Charles H and Lapoint, Richard and Lazzaro, Brian P and Lee, So-Jeong and Levesque, Lisa and Li, Ruiqiang and Lin, Chiao-Feng and Lin, Michael F and Lindblad-Toh, Kerstin and Llopart, Ana and Long, Manyuan and Low, Lloyd and Lozovsky, Elena and Lu, Jian and Luo, Meizhong and Machado, Carlos A and Makalowski, Wojciech and Marzo, Mar and Matsuda, Muneo and Matzkin, Luciano and McAllister, Bryant and McBride, Carolyn S and McKernan, Brendan and McKernan, Kevin and Mendez-Lago, Maria and Minx, Patrick and Mollenhauer, Michael U and Montooth, Kristi and Mount, Stephen M and Mu, Xu and Myers, Eugene and Negre, Barbara and Newfeld, Stuart and Nielsen, Rasmus and Noor, Mohamed A F and O{\textquoteright}Grady, Patrick and Pachter, Lior and Papaceit, Montserrat and Parisi, Matthew J and Parisi, Michael and Parts, Leopold and Pedersen, Jakob S and Pesole, Graziano and Phillippy, Adam M and Ponting, Chris P and Pop, Mihai and Porcelli, Damiano and Powell, Jeffrey R and Prohaska, Sonja and Pruitt, Kim and Puig, Marta and Quesneville, Hadi and Ram, Kristipati Ravi and Rand, David and Rasmussen, Matthew D and Reed, Laura K and Reenan, Robert and Reily, Amy and Remington, Karin A and Rieger, Tania T and Ritchie, Michael G and Robin, Charles and Rogers, Yu-Hui and Rohde, Claudia and Rozas, Julio and Rubenfield, Marc J and Ruiz, Alfredo and Russo, Susan and Salzberg, Steven L and Sanchez-Gracia, Alejandro and Saranga, David J and Sato, Hajime and Schaeffer, Stephen W and Schatz, Michael C and Schlenke, Todd and Schwartz, Russell and Segarra, Carmen and Singh, Rama S and Sirot, Laura and Sirota, Marina and Sisneros, Nicholas B and Smith, Chris D and Smith, Temple F and Spieth, John and Stage, Deborah E and Stark, Alexander and Stephan, Wolfgang and Strausberg, Robert L and Strempel, Sebastian and Sturgill, David and Sutton, Granger and Sutton, Granger G and Tao, Wei and Teichmann, Sarah and Tobari, Yoshiko N and Tomimura, Yoshihiko and Tsolas, Jason M and Valente, Vera L S and Venter, Eli and Venter, J Craig and Vicario, Saverio and Vieira, Filipe G and Vilella, Albert J and Villasante, Alfredo and Walenz, Brian and Wang, Jun and Wasserman, Marvin and Watts, Thomas and Wilson, Derek and Wilson, Richard K and Wing, Rod A and Wolfner, Mariana F and Wong, Alex and Wong, Gane Ka-Shu and Wu, Chung-I and Wu, Gabriel and Yamamoto, Daisuke and Yang, Hsiao-Pei and Yang, Shiaw-Pyng and Yorke, James A and Yoshida, Kiyohito and Zdobnov, Evgeny and Zhang, Peili and Zhang, Yu and Zimin, Aleksey V and Baldwin, Jennifer and Abdouelleil, Amr and Abdulkadir, Jamal and Abebe, Adal and Abera, Brikti and Abreu, Justin and Acer, St Christophe and Aftuck, Lynne and Alexander, Allen and An, Peter and Anderson, Erica and Anderson, Scott and Arachi, Harindra and Azer, Marc and Bachantsang, Pasang and Barry, Andrew and Bayul, Tashi and Berlin, Aaron and Bessette, Daniel and Bloom, Toby and Blye, Jason and Boguslavskiy, Leonid and Bonnet, Claude and Boukhgalter, Boris and Bourzgui, Imane and Brown, Adam and Cahill, Patrick and Channer, Sheridon and Cheshatsang, Yama and Chuda, Lisa and Citroen, Mieke and Collymore, Alville and Cooke, Patrick and Costello, Maura and D{\textquoteright}Aco, Katie and Daza, Riza and De Haan, Georgius and DeGray, Stuart and DeMaso, Christina and Dhargay, Norbu and Dooley, Kimberly and Dooley, Erin and Doricent, Missole and Dorje, Passang and Dorjee, Kunsang and Dupes, Alan and Elong, Richard and Falk, Jill and Farina, Abderrahim and Faro, Susan and Ferguson, Diallo and Fisher, Sheila and Foley, Chelsea D and Franke, Alicia and Friedrich, Dennis and Gadbois, Loryn and Gearin, Gary and Gearin, Christina R and Giannoukos, Georgia and Goode, Tina and Graham, Joseph and Grandbois, Edward and Grewal, Sharleen and Gyaltsen, Kunsang and Hafez, Nabil and Hagos, Birhane and Hall, Jennifer and Henson, Charlotte and Hollinger, Andrew and Honan, Tracey and Huard, Monika D and Hughes, Leanne and Hurhula, Brian and Husby, M Erii and Kamat, Asha and Kanga, Ben and Kashin, Seva and Khazanovich, Dmitry and Kisner, Peter and Lance, Krista and Lara, Marcia and Lee, William and Lennon, Niall and Letendre, Frances and LeVine, Rosie and Lipovsky, Alex and Liu, Xiaohong and Liu, Jinlei and Liu, Shangtao and Lokyitsang, Tashi and Lokyitsang, Yeshi and Lubonja, Rakela and Lui, Annie and MacDonald, Pen and Magnisalis, Vasilia and Maru, Kebede and Matthews, Charles and McCusker, William and McDonough, Susan and Mehta, Teena and Meldrim, James and Meneus, Louis and Mihai, Oana and Mihalev, Atanas and Mihova, Tanya and Mittelman, Rachel and Mlenga, Valentine and Montmayeur, Anna and Mulrain, Leonidas and Navidi, Adam and Naylor, Jerome and Negash, Tamrat and Nguyen, Thu and Nguyen, Nga and Nicol, Robert and Norbu, Choe and Norbu, Nyima and Novod, Nathaniel and O{\textquoteright}Neill, Barry and Osman, Sahal and Markiewicz, Eva and Oyono, Otero L and Patti, Christopher and Phunkhang, Pema and Pierre, Fritz and Priest, Margaret and Raghuraman, Sujaa and Rege, Filip and Reyes, Rebecca and Rise, Cecil and Rogov, Peter and Ross, Keenan and Ryan, Elizabeth and Settipalli, Sampath and Shea, Terry and Sherpa, Ngawang and Shi, Lu and Shih, Diana and Sparrow, Todd and Spaulding, Jessica and Stalker, John and Stange-Thomann, Nicole and Stavropoulos, Sharon and Stone, Catherine and Strader, Christopher and Tesfaye, Senait and Thomson, Talene and Thoulutsang, Yama and Thoulutsang, Dawa and Topham, Kerri and Topping, Ira and Tsamla, Tsamla and Vassiliev, Helen and Vo, Andy and Wangchuk, Tsering and Wangdi, Tsering and Weiand, Michael and Wilkinson, Jane and Wilson, Adam and Yadav, Shailendra and Young, Geneva and Yu, Qing and Zembek, Lisa and Zhong, Danni and Zimmer, Andrew and Zwirko, Zac and Jaffe, David B and Alvarez, Pablo and Brockman, Will and Butler, Jonathan and Chin, CheeWhye and Gnerre, Sante and Grabherr, Manfred and Kleber, Michael and Mauceli, Evan and MacCallum, Iain} } @article {38273, title = {GATA and Nkx factors synergistically regulate tissue-specific gene expression and development in vivo}, journal = {DevelopmentDevelopment}, volume = {134}, year = {2007}, type = {10.1242/dev.02720}, abstract = {In vitro studies have suggested that members of the GATA and Nkx transcription factor families physically interact, and synergistically activate pulmonary epithelial- and cardiac-gene promoters. However, the relevance of this synergy has not been demonstrated in vivo. We show that Gata6-Titf1 (Gata6-Nkx2.1) double heterozygous (G6-Nkx DH) embryos and mice have severe defects in pulmonary epithelial differentiation and distal airway development, as well as reduced phospholipid production. The defects in G6-Nkx DH embryos and mice are similar to those observed in human neonates with respiratory distress syndromes, including bronchopulmonary dysplasia, and differential gene expression analysis reveals essential developmental pathways requiring synergistic regulation by both Gata6 and Titf1 (Nkx2.1). These studies indicate that Gata6 and Nkx2.1 act in a synergistic manner to direct pulmonary epithelial differentiation and development in vivo, providing direct evidence that interactions between these two transcription factor families are crucial for the development of the tissues in which they are co-expressed.}, author = {Zhang, Yuzhen and Rath, Nibedita and Sridhar Hannenhalli and Wang, Zhishan and Cappola, Thomas and Kimura, Shioko and Atochina-Vasserman, Elena and Lu, Min Min and Beers, Michael F. and Morrisey, Edward E.} } @article {38286, title = {Genome Analysis Linking Recent European and African Influenza (H5N1) Viruses}, journal = {Emerging Infectious DiseasesEmerg Infect DisEmerging Infectious DiseasesEmerg Infect Dis}, volume = {13}, year = {2007}, type = {10.3201/eid1305.070013}, abstract = {Although linked, these viruses are distinct from earlier outbreak strains., To better understand the ecology and epidemiology of the highly pathogenic avian influenza virus in its transcontinental spread, we sequenced and analyzed the complete genomes of 36 recent influenza A (H5N1) viruses collected from birds in Europe, northern Africa, and southeastern Asia. These sequences, among the first complete genomes of influenza (H5N1) viruses outside Asia, clearly depict the lineages now infecting wild and domestic birds in Europe and Africa and show the relationships among these isolates and other strains affecting both birds and humans. The isolates fall into 3 distinct lineages, 1 of which contains all known non-Asian isolates. This new Euro-African lineage, which was the cause of several recent (2006) fatal human infections in Egypt and Iraq, has been introduced at least 3 times into the European-African region and has split into 3 distinct, independently evolving sublineages. One isolate provides evidence that 2 of these sublineages have recently reassorted.}, isbn = {1080-6040}, author = {Salzberg, Steven L. and Kingsford, Carl and Cattoli, Giovanni and Spiro, David J. and Janies, Daniel A. and Aly, Mona Mehrez and Brown, Ian H. and Couacy-Hymann, Emmanuel and De Mia, Gian Mario and Dung, Do Huu and Guercio, Annalisa and Joannis, Tony and Ali, Ali Safar Maken and Osmani, Azizullah and Padalino, Iolanda and Saad, Magdi D. and Savi{\'c}, Vladimir and Sengamalay, Naomi A. and Yingst, Samuel and Zaborsky, Jennifer and Zorman-Rojs, Olga and Ghedin, Elodie and Capua, Ilaria} } @article {49782, title = {Genome sequence and identification of candidate vaccine antigens from the animal pathogen Dichelobacter nodosus.}, journal = {Nat Biotechnol}, volume = {25}, year = {2007}, month = {2007 May}, pages = {569-75}, abstract = {

Dichelobacter nodosus causes ovine footrot, a disease that leads to severe economic losses in the wool and meat industries. We sequenced its 1.4-Mb genome, the smallest known genome of an anaerobe. It differs markedly from small genomes of intracellular bacteria, retaining greater biosynthetic capabilities and lacking any evidence of extensive ongoing genome reduction. Comparative genomic microarray studies and bioinformatic analysis suggested that, despite its small size, almost 20\% of the genome is derived from lateral gene transfer. Most of these regions seem to be associated with virulence. Metabolic reconstruction indicated unsuspected capabilities, including carbohydrate utilization, electron transfer and several aerobic pathways. Global transcriptional profiling and bioinformatic analysis enabled the prediction of virulence factors and cell surface proteins. Screening of these proteins against ovine antisera identified eight immunogenic proteins that are candidate antigens for a cross-protective vaccine.

}, keywords = {Animals, Antigens, Chromosome mapping, Dichelobacter nodosus, Foot Rot, Genome, Bacterial, Sequence Analysis, DNA}, issn = {1087-0156}, doi = {10.1038/nbt1302}, author = {Myers, Garry S A and Parker, Dane and Al-Hasani, Keith and Kennan, Ruth M and Seemann, Torsten and Ren, Qinghu and Badger, Jonathan H and Selengut, Jeremy D and DeBoy, Robert T and Tettelin, Herv{\'e} and Boyce, John D and McCarl, Victoria P and Han, Xiaoyan and Nelson, William C and Madupu, Ramana and Mohamoud, Yasmin and Holley, Tara and Fedorova, Nadia and Khouri, Hoda and Bottomley, Steven P and Whittington, Richard J and Adler, Ben and Songer, J Glenn and Rood, Julian I and Paulsen, Ian T} } @article {38296, title = {Genome sequence and identification of candidate vaccine antigens from the animal pathogen Dichelobacter nodosus}, journal = {Nature biotechnologyNature biotechnology}, volume = {25}, year = {2007}, note = {http://www.ncbi.nlm.nih.gov/pubmed/17468768?dopt=Abstract}, type = {10.1038/nbt1302}, abstract = {Dichelobacter nodosus causes ovine footrot, a disease that leads to severe economic losses in the wool and meat industries. We sequenced its 1.4-Mb genome, the smallest known genome of an anaerobe. It differs markedly from small genomes of intracellular bacteria, retaining greater biosynthetic capabilities and lacking any evidence of extensive ongoing genome reduction. Comparative genomic microarray studies and bioinformatic analysis suggested that, despite its small size, almost 20\% of the genome is derived from lateral gene transfer. Most of these regions seem to be associated with virulence. Metabolic reconstruction indicated unsuspected capabilities, including carbohydrate utilization, electron transfer and several aerobic pathways. Global transcriptional profiling and bioinformatic analysis enabled the prediction of virulence factors and cell surface proteins. Screening of these proteins against ovine antisera identified eight immunogenic proteins that are candidate antigens for a cross-protective vaccine.}, keywords = {Animals, Antigens, Chromosome mapping, Dichelobacter nodosus, Foot Rot, Genome, Bacterial, Sequence Analysis, DNA}, author = {Myers, Garry S. A. and Parker, Dane and Al-Hasani, Keith and Kennan, Ruth M. and Seemann, Torsten and Ren, Qinghu and Badger, Jonathan H. and J. Selengut and DeBoy, Robert T. and Tettelin, Herv{\'e} and Boyce, John D. and McCarl, Victoria P. and Han, Xiaoyan and Nelson, William C. and Madupu, Ramana and Mohamoud, Yasmin and Holley, Tara and Fedorova, Nadia and Khouri, Hoda and Bottomley, Steven P. and Whittington, Richard J. and Adler, Ben and Songer, J. Glenn and Rood, Julian I. and Paulsen, Ian T.} } @article {38311, title = {Genome-wide expression profiling and bioinformatics analysis of diurnally regulated genes in the mouse prefrontal cortex}, journal = {Genome BiolGenome Biol}, volume = {8}, year = {2007}, author = {Yang, S. and Wang, K. and Valladares, O. and Sridhar Hannenhalli and Bucan, M. and others,} } @article {38323, title = {Grid Services Base Library: A high-level, procedural application programming interface for writing Globus-based Grid services}, journal = {Future Generation Comp SystFuture Generation Comp Syst}, volume = {23}, year = {2007}, abstract = {The Grid Services Base Library (GSBL) is a procedural application programming interface (API) that abstracts many of the high-level functions performed by Globus Grid services, thus dramatically lowering the barriers to writing Grid services. The library has been extensively tested and used for computational biology research in a Globus Toolkit-based Grid system, in which no fewer than twenty Grid services written with this API are deployed.}, author = {Adam L. Bazinet and Myers, D. S. and Fuetsch, J. and Michael P. Cummings} } @article {49642, title = {Members of a large retroposon family are determinants of post-transcriptional gene expression in Leishmania.}, journal = {PLoS Pathog}, volume = {3}, year = {2007}, month = {2007 Sep 7}, pages = {1291-307}, abstract = {

Trypanosomatids are unicellular protists that include the human pathogens Leishmania spp. (leishmaniasis), Trypanosoma brucei (sleeping sickness), and Trypanosoma cruzi (Chagas disease). Analysis of their recently completed genomes confirmed the presence of non-long-terminal repeat retrotransposons, also called retroposons. Using the 79-bp signature sequence common to all trypanosomatid retroposons as bait, we identified in the Leishmania major genome two new large families of small elements--LmSIDER1 (785 copies) and LmSIDER2 (1,073 copies)--that fulfill all the characteristics of extinct trypanosomatid retroposons. LmSIDERs are approximately 70 times more abundant in L. major compared to T. brucei and are found almost exclusively within the 3{\textquoteright}-untranslated regions (3{\textquoteright}UTRs) of L. major mRNAs. We provide experimental evidence that LmSIDER2 act as mRNA instability elements and that LmSIDER2-containing mRNAs are generally expressed at lower levels compared to the non-LmSIDER2 mRNAs. The considerable expansion of LmSIDERs within 3{\textquoteright}UTRs in an organism lacking transcriptional control and their role in regulating mRNA stability indicate that Leishmania have probably recycled these short retroposons to globally modulate the expression of a number of genes. To our knowledge, this is the first example in eukaryotes of the domestication and expansion of a family of mobile elements that have evolved to fulfill a critical cellular function.

}, keywords = {3{\textquoteright} Untranslated Regions, Animals, Base Sequence, Biological Evolution, Down-Regulation, Gene Expression Regulation, Genome, Protozoan, Leishmania, Leishmania major, Molecular Sequence Data, Retroelements, RNA, Messenger, sequence alignment, Trypanosoma brucei brucei, Trypanosoma cruzi}, issn = {1553-7374}, doi = {10.1371/journal.ppat.0030136}, author = {Bringaud, Frederic and M{\"u}ller, Michaela and Cerqueira, Gustavo Coutinho and Smith, Martin and Rochette, Annie and el-Sayed, Najib M A and Papadopoulou, Barbara and Ghedin, Elodie} } @book {49866, title = {Methods in Molecular BiologyComparative GenomicsAnalyzing Patterns of Microbial Evolution Using the Mauve Genome Alignment System}, volume = {396}, year = {2007}, pages = {135 - 152}, publisher = {Humana Press}, organization = {Humana Press}, address = {Totowa, NJ}, isbn = {978-1-934115-37-4}, issn = {1064-3745}, doi = {10.1007/978-1-59745-515-210.1007/978-1-59745-515-2_10}, url = {http://www.springerlink.com/index/10.1007/978-1-59745-515-2http://www.springerlink.com/index/pdf/10.1007/978-1-59745-515-2http://link.springer.com/10.1007/978-1-59745-515-2_10http://www.springerlink.com/index/pdf/10.1007/978-1-59745-515-2_10}, author = {Darling, Aaron E and Todd Treangen and Messeguer, Xavier and Perna, Nicole T}, editor = {Walker, John M. and Bergman, Nicholas H.} } @article {49783, title = {New developments in the InterPro database.}, journal = {Nucleic Acids Res}, volume = {35}, year = {2007}, month = {2007 Jan}, pages = {D224-8}, abstract = {

InterPro is an integrated resource for protein families, domains and functional sites, which integrates the following protein signature databases: PROSITE, PRINTS, ProDom, Pfam, SMART, TIGRFAMs, PIRSF, SUPERFAMILY, Gene3D and PANTHER. The latter two new member databases have been integrated since the last publication in this journal. There have been several new developments in InterPro, including an additional reading field, new database links, extensions to the web interface and additional match XML files. InterPro has always provided matches to UniProtKB proteins on the website and in the match XML file on the FTP site. Additional matches to proteins in UniParc (UniProt archive) are now available for download in the new match XML files only. The latest InterPro release (13.0) contains more than 13 000 entries, covering over 78\% of all proteins in UniProtKB. The database is available for text- and sequence-based searches via a webserver (http://www.ebi.ac.uk/interpro), and for download by anonymous FTP (ftp://ftp.ebi.ac.uk/pub/databases/interpro). The InterProScan search tool is now also available via a web service at http://www.ebi.ac.uk/Tools/webservices/WSInterProScan.html.

}, keywords = {Databases, Protein, Internet, Protein Structure, Tertiary, Proteins, Sequence Analysis, Protein, Systems Integration, User-Computer Interface}, issn = {1362-4962}, doi = {10.1093/nar/gkl841}, author = {Mulder, Nicola J and Apweiler, Rolf and Attwood, Teresa K and Bairoch, Amos and Bateman, Alex and Binns, David and Bork, Peer and Buillard, Virginie and Cerutti, Lorenzo and Copley, Richard and Courcelle, Emmanuel and Das, Ujjwal and Daugherty, Louise and Dibley, Mark and Finn, Robert and Fleischmann, Wolfgang and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Hunter, Sarah and Kahn, Daniel and Kanapin, Alexander and Kejariwal, Anish and Labarga, Alberto and Langendijk-Genevaux, Petra S and Lonsdale, David and Lopez, Rodrigo and Letunic, Ivica and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and Mistry, Jaina and Mitchell, Alex and Nikolskaya, Anastasia N and Orchard, Sandra and Orengo, Christine and Petryszak, Robert and Selengut, Jeremy D and Sigrist, Christian J A and Thomas, Paul D and Valentin, Franck and Wilson, Derek and Wu, Cathy H and Yeats, Corin} } @article {38399, title = {New records of phytoplankton for Bangladesh. 3. Volvocales}, journal = {Bangladesh Journal of Plant TaxonomyBangladesh Journal of Plant Taxonomy}, volume = {14}, year = {2007}, type = {10.3329/bjpt.v14i1.518}, abstract = {This study presents 21 species of Chlamydomonas, four species of Carteria, two species of each of Nephroselmis, Pyramidomonas and Scherffelia, and Collodictyon triciliatum, Polytoma minus, Tetrachloridium ? allorgei and Tetraselmis cordiformis. These species have been reported from some ponds of Mathbaria of Pirojpur and Bakerganj of Barisal districts in Bangladesh.}, isbn = {1028-2092}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasmin, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @article {38400, title = {New records of phytoplankton for Bangladesh. 4. Chlorococcales}, journal = {Bangladesh Journal of Plant TaxonomyBangladesh Journal of Plant Taxonomy}, volume = {14}, year = {2007}, type = {10.3329/bjpt.v14i2.528}, abstract = {This study presents three species from each of Schroederia, Monoraphidium and Ankistrodesmus, two species and one variety of Dictyosphaerium, two varieties of Pediastrum, and Tetraedron arthrodesmiforme var. contorta, Chlorotetraedron polymorphum, Myrmecia aquatica, Oocystis tainoensis, Nephrocytium spirale, Kirchneriella irregularis, Coelastrum indicum and Scenedesmus similagineus. These taxa have been reported from some ponds of Mathbaria of Pirojpur and Bakerganj of Barisal Districts in Bangladesh.}, isbn = {1028-2092}, author = {Khondker, Moniruzzaman and Bhuiyan, Rauf Ahmed and Yeasim, Jenat and Alam, Munirul and Sack, R. Bradley and Huq, Anwar and Rita R. Colwell} } @proceedings {38416, title = {Optimizing mpf queries}, year = {2007}, month = {2007}, type = {10.1145/1247480.1247558}, address = {Beijing, China}, author = {H{\'e}ctor Corrada Bravo and Ramakrishnan, Raghu} } @article {38481, title = {Schistosoma mansoni genome: Closing in on a final gene set}, journal = {Experimental ParasitologyExperimental Parasitology}, volume = {117}, year = {2007}, type = {16/j.exppara.2007.06.005}, abstract = {The Schistosoma mansoni genome sequencing consortium has recently released the latest versions of the genome assembly as well as an automated preliminary gene structure annotation. The combined datasets constitute a vast resource for researchers to exploit in a variety of post-genomic studies with an emphasis of transcriptomic and proteomic tools. Here we present an innovative method used for combining diverse sources of evidence including ab initio gene predictions, protein and transcript sequence homologies, and cross-genome sequence homologies between S. mansoni and Schistosoma japonicum to define a comprehensive list of protein-coding genes.}, keywords = {Annotation, Gene finding, Genome, Schistosoma mansoni}, isbn = {0014-4894}, author = {Haas, Brian J. and Berriman, Matthew and Hirai, Hirohisa and Cerqueira, Gustavo G. and LoVerde, Philip T. and Najib M. El-Sayed} } @article {38563, title = {Variola virus topoisomerase: DNA cleavage specificity and distribution of sites in Poxvirus genomes}, journal = {VirologyVirology}, volume = {365}, year = {2007}, type = {16/j.virol.2007.02.037}, abstract = {Topoisomerase enzymes regulate superhelical tension in DNA resulting from transcription, replication, repair, and other molecular transactions. Poxviruses encode an unusual type IB topoisomerase that acts only at conserved DNA sequences containing the core pentanucleotide 5{\textquoteright}-(T/C)CCTT-3{\textquoteright}. In X-ray structures of the variola virus topoisomerase bound to DNA, protein-DNA contacts were found to extend beyond the core pentanucleotide, indicating that the full recognition site has not yet been fully defined in functional studies. Here we report quantitation of DNA cleavage rates for an optimized 13~bp site and for all possible single base substitutions (40 total sites), with the goals of understanding the molecular mechanism of recognition and mapping topoisomerase sites in poxvirus genome sequences. The data allow a precise definition of enzyme-DNA interactions and the energetic contributions of each. We then used the resulting "action matrix" to show that favorable topoisomerase sites are distributed all along the length of poxvirus DNA sequences, consistent with a requirement for local release of superhelical tension in constrained topological domains. In orthopox genomes, an additional central cluster of sites was also evident. A negative correlation of predicted topoisomerase sites was seen relative to early terminators, but no correlation was seen with early or late promoters. These data define the full variola virus topoisomerase recognition site and provide a new window on topoisomerase function in vivo.}, keywords = {Annotation of topoisomerase sites, Sequence specific recognition, Topoisomerase IB, Variola virus}, isbn = {0042-6822}, author = {Minkah, Nana and Hwang, Young and Perry, Kay and Van Duyne, Gregory D. and Hendrickson, Robert and Lefkowitz, Elliot J. and Sridhar Hannenhalli and Bushman, Frederic D.} } @article {49641, title = {Analysis of fat body transcriptome from the adult tsetse fly, Glossina morsitans morsitans.}, journal = {Insect Mol Biol}, volume = {15}, year = {2006}, month = {2006 Aug}, pages = {411-24}, abstract = {

Tsetse flies (Diptera: Glossinidia) are vectors of pathogenic African trypanosomes. To develop a foundation for tsetse physiology, a normalized expressed sequence tag (EST) library was constructed from fat body tissue of immune-stimulated Glossina morsitans morsitans. Analysis of 20,257 high-quality ESTs yielded 6372 unique genes comprised of 3059 tentative consensus (TC) sequences and 3313 singletons (available at http://aksoylab.yale.edu). We analysed the putative fat body transcriptome based on homology to other gene products with known functions available in the public domain. In particular, we describe the immune-related products, reproductive function related yolk proteins and milk-gland protein, iron metabolism regulating ferritins and transferrin, and tsetse{\textquoteright}s major energy source proline biosynthesis. Expression analysis of the three yolk proteins indicates that all are detected in females, while only the yolk protein with similarity to lipases, is expressed in males. Milk gland protein, apparently important for larval nutrition, however, is primarily synthesized by accessory milk gland tissue.

}, keywords = {Adipose Tissue, Animals, Base Sequence, Computational Biology, DNA Primers, Egg Proteins, Expressed Sequence Tags, Female, Gene Expression Profiling, Insect Vectors, Male, Molecular Sequence Data, Reverse Transcriptase Polymerase Chain Reaction, Sequence Analysis, DNA, Sex Factors, Tsetse Flies}, issn = {0962-1075}, doi = {10.1111/j.1365-2583.2006.00649.x}, author = {Attardo, G M and Strickler-Dinglasan, P and Perkin, S A H and Caler, E and Bonaldo, M F and Soares, M B and El-Sayeed, N and Aksoy, S} } @article {38159, title = {Comparative genomic evidence for a close relationship between the dimorphic prosthecate bacteria Hyphomonas neptunium and Caulobacter crescentus}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {188}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16980487?dopt=Abstract}, type = {10.1128/JB.00111-06}, abstract = {The dimorphic prosthecate bacteria (DPB) are alpha-proteobacteria that reproduce in an asymmetric manner rather than by binary fission and are of interest as simple models of development. Prior to this work, the only member of this group for which genome sequence was available was the model freshwater organism Caulobacter crescentus. Here we describe the genome sequence of Hyphomonas neptunium, a marine member of the DPB that differs from C. crescentus in that H. neptunium uses its stalk as a reproductive structure. Genome analysis indicates that this organism shares more genes with C. crescentus than it does with Silicibacter pomeroyi (a closer relative according to 16S rRNA phylogeny), that it relies upon a heterotrophic strategy utilizing a wide range of substrates, that its cell cycle is likely to be regulated in a similar manner to that of C. crescentus, and that the outer membrane complements of H. neptunium and C. crescentus are remarkably similar. H. neptunium swarmer cells are highly motile via a single polar flagellum. With the exception of cheY and cheR, genes required for chemotaxis were absent in the H. neptunium genome. Consistent with this observation, H. neptunium swarmer cells did not respond to any chemotactic stimuli that were tested, which suggests that H. neptunium motility is a random dispersal mechanism for swarmer cells rather than a stimulus-controlled navigation system for locating specific environments. In addition to providing insights into bacterial development, the H. neptunium genome will provide an important resource for the study of other interesting biological processes including chromosome segregation, polar growth, and cell aging.}, keywords = {Alphaproteobacteria, Bacterial Outer Membrane Proteins, Caulobacter crescentus, cell cycle, Chemotaxis, DNA, Bacterial, Flagella, Genome, Bacterial, Microbial Viability, Molecular Sequence Data, Movement, Sequence Analysis, DNA, Sequence Homology, signal transduction}, author = {Badger, Jonathan H. and Hoover, Timothy R. and Brun, Yves V. and Weiner, Ronald M. and Laub, Michael T. and Alexandre, Gladys and Mr{\'a}zek, Jan and Ren, Qinghu and Paulsen, Ian T. and Nelson, Karen E. and Khouri, Hoda M. and Radune, Diana and Sosa, Julia and Dodson, Robert J. and Sullivan, Steven A. and Rosovitz, M. J. and Madupu, Ramana and Brinkac, Lauren M. and Durkin, A. Scott and Daugherty, Sean C. and Kothari, Sagar P. and Giglio, Michelle Gwinn and Zhou, Liwei and Haft, Daniel H. and J. Selengut and Davidsen, Tanja M. and Yang, Qi and Zafar, Nikhat and Ward, Naomi L.} } @article {38161, title = {Comparative genomics of emerging human ehrlichiosis agents}, journal = {PLoS geneticsPLoS genetics}, volume = {2}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16482227?dopt=Abstract}, type = {10.1371/journal.pgen.0020021}, abstract = {Anaplasma (formerly Ehrlichia) phagocytophilum, Ehrlichia chaffeensis, and Neorickettsia (formerly Ehrlichia) sennetsu are intracellular vector-borne pathogens that cause human ehrlichiosis, an emerging infectious disease. We present the complete genome sequences of these organisms along with comparisons to other organisms in the Rickettsiales order. Ehrlichia spp. and Anaplasma spp. display a unique large expansion of immunodominant outer membrane proteins facilitating antigenic variation. All Rickettsiales have a diminished ability to synthesize amino acids compared to their closest free-living relatives. Unlike members of the Rickettsiaceae family, these pathogenic Anaplasmataceae are capable of making all major vitamins, cofactors, and nucleotides, which could confer a beneficial role in the invertebrate vector or the vertebrate host. Further analysis identified proteins potentially involved in vacuole confinement of the Anaplasmataceae, a life cycle involving a hematophagous vector, vertebrate pathogenesis, human pathogenesis, and lack of transovarial transmission. These discoveries provide significant insights into the biology of these obligate intracellular pathogens.}, keywords = {Animals, Biotin, DNA Repair, Ehrlichia, Ehrlichiosis, Genome, Genomics, HUMANS, Models, Biological, Phylogeny, Rickettsia, Ticks}, author = {Dunning Hotopp, Julie C. and Lin, Mingqun and Madupu, Ramana and Crabtree, Jonathan and Angiuoli, Samuel V. and Eisen, Jonathan A. and Eisen, Jonathan and Seshadri, Rekha and Ren, Qinghu and Wu, Martin and Utterback, Teresa R. and Smith, Shannon and Lewis, Matthew and Khouri, Hoda and Zhang, Chunbin and Niu, Hua and Lin, Quan and Ohashi, Norio and Zhi, Ning and Nelson, William and Brinkac, Lauren M. and Dodson, Robert J. and Rosovitz, M. J. and Sundaram, Jaideep and Daugherty, Sean C. and Davidsen, Tanja and Durkin, Anthony S. and Gwinn, Michelle and Haft, Daniel H. and J. Selengut and Sullivan, Steven A. and Zafar, Nikhat and Zhou, Liwei and Benahmed, Faiza and Forberger, Heather and Halpin, Rebecca and Mulligan, Stephanie and Robinson, Jeffrey and White, Owen and Rikihisa, Yasuko and Tettelin, Herv{\'e}} } @article {49682, title = {Comprehensive analysis of alternative splicing in rice and comparative analyses with Arabidopsis.}, journal = {BMC Genomics}, volume = {7}, year = {2006}, month = {2006}, pages = {327}, abstract = {

BACKGROUND: Recently, genomic sequencing efforts were finished for Oryza sativa (cultivated rice) and Arabidopsis thaliana (Arabidopsis). Additionally, these two plant species have extensive cDNA and expressed sequence tag (EST) libraries. We employed the Program to Assemble Spliced Alignments (PASA) to identify and analyze alternatively spliced isoforms in both species.

RESULTS: A comprehensive analysis of alternative splicing was performed in rice that started with >1.1 million publicly available spliced ESTs and over 30,000 full length cDNAs in conjunction with the newly enhanced PASA software. A parallel analysis was performed with Arabidopsis to compare and ascertain potential differences between monocots and dicots. Alternative splicing is a widespread phenomenon (observed in greater than 30\% of the loci with transcript support) and we have described nine alternative splicing variations. While alternative splicing has the potential to create many RNA isoforms from a single locus, the majority of loci generate only two or three isoforms and transcript support indicates that these isoforms are generally not rare events. For the alternate donor (AD) and acceptor (AA) classes, the distance between the splice sites for the majority of events was found to be less than 50 basepairs (bp). In both species, the most frequent distance between AA is 3 bp, consistent with reports in mammalian systems. Conversely, the most frequent distance between AD is 4 bp in both plant species, as previously observed in mouse. Most alternative splicing variations are localized to the protein coding sequence and are predicted to significantly alter the coding sequence.

CONCLUSION: Alternative splicing is widespread in both rice and Arabidopsis and these species share many common features. Interestingly, alternative splicing may play a role beyond creating novel combinations of transcripts that expand the proteome. Many isoforms will presumably have negative consequences for protein structure and function, suggesting that their biological role involves post-transcriptional regulation of gene expression.

}, keywords = {Alternative Splicing, Arabidopsis, DNA, Complementary, Expressed Sequence Tags, Oryza}, issn = {1471-2164}, doi = {10.1186/1471-2164-7-327}, author = {Campbell, Matthew A and Haas, Brian J and Hamilton, John P and Mount, Stephen M and Buell, C Robin} } @inbook {38177, title = {Conservation Patterns in cis-Elements Reveal Compensatory Mutations}, booktitle = {Comparative GenomicsComparative Genomics}, series = {Lecture Notes in Computer Science}, volume = {4205}, year = {2006}, publisher = {Springer Berlin / Heidelberg}, organization = {Springer Berlin / Heidelberg}, abstract = {Transcriptional regulation critically depends on proper interactions between transcription factors (TF) and their cognate DNA binding sites or cis elements. A better understanding and modelling of the TF-DNA interaction is an important area of research. The Positional Weight Matrix (PWM) is the most common model of TF-DNA binding and it presumes that the nucleotide preferences at individual positions within the binding site are independent. However, studies have shown that this independence assumption does not always hold. If the nucleotide preference at one position depends on the nucleotide at another position, a chance mutation at one position should exert selection pressures at the other position. By comparing the patterns of evolutionary conservation at individual positions within cis elements, here we show that positional dependence within binding sites is highly prevalent. We also show that dependent positions are more likely to be functional, as evidenced by a higher information content and higher conservation. We discuss two examples{\textemdash}Elk-1 and SAP-1 where the inferred compensatory mutation is consistent with known TF-DNA crystal structure.}, isbn = {978-3-540-44529-6}, author = {Evans, Perry and Donahue, Greg and Sridhar Hannenhalli}, editor = {Bourque, Guillaume and El-Mabrouk, Nadia} } @article {38221, title = {Effect of transport at ambient temperature on detection and isolation of Vibrio cholerae from environmental samples}, journal = {Applied and environmental microbiologyApplied and environmental microbiology}, volume = {72}, year = {2006}, abstract = {It has long been assumed that prolonged holding of environmental samples at the ambient air temperature prior to bacteriological analysis is detrimental to isolation and detection of Vibrio cholerae, the causative agent of pandemic cholera. The present study was aimed at understanding the effect of transporting environmental samples at the ambient air temperature on isolation and enumeration of V. cholerae. For water and plankton samples held at ambient temperatures ranging from 31{\textdegree}C to 35{\textdegree}C for 20 h, the total counts did not increase significantly but the number of culturable V. cholerae increased significantly compared to samples processed within 1 h of collection, as measured by culture, acridine orange direct count, direct fluorescent-antibody-direct viable count (DFA-DVC), and multiplex PCR analyses. For total coliform counts, total bacterial counts, and DFA-DVC counts, the numbers did not increase significantly, but the culturable plate counts for V. cholerae increased significantly after samples were held at the ambient temperature during transport to the laboratory for analysis. An increase in the recovery of V. cholerae O1 and improved detection of V. cholerae O1 rfb and ctxA also occurred when samples were enriched after they were kept for 20 h at the ambient temperature during transport. Improved detection and isolation of toxigenic V. cholerae from freshwater ecosystems can be achieved by holding samples at the ambient temperature, an observation that has significant implications for tracking this pathogen in diverse aquatic environments.}, author = {Alam, M. and Sadique, A. and Bhuiyan, N. A. and Nair, G. B. and Siddique, A. K. and Sack, D. A. and Ahsan, S. and Huq, A. and Sack, R. B. and Rita R. Colwell and others,} } @article {38243, title = {Evolution of non-LTR retrotransposons in the trypanosomatid genomes: Leishmania major has lost the active elements}, journal = {Molecular and Biochemical ParasitologyMolecular and Biochemical Parasitology}, volume = {145}, year = {2006}, type = {16/j.molbiopara.2005.09.017}, abstract = {The ingi and L1Tc non-LTR retrotransposons - which constitute the ingi clade - are abundant in the genome of the trypanosomatid species Trypanosoma brucei and Trypanosoma cruzi, respectively. The corresponding retroelements, however, are not present in the genome of a closely related trypanosomatid, Leishmania major. To study the evolution of non-LTR retrotransposons in trypanosomatids, we have analyzed all ingi/L1Tc elements and highly degenerate ingi/L1Tc-related sequences identified in the recently completed T. brucei, T. cruzi and L. major genomes. The coding sequences of 242 degenerate ingi/L1Tc-related elements (DIREs) in all three genomes were reconstituted by removing the numerous frame shifts. Three independent phylogenetic analyses conducted on the conserved domains encoded by these elements show that all DIREs, including the 52 L. major DIREs, form a monophyletic group belonging to the ingi clade. This indicates that the trypanosomatid ancestor contained active mobile elements that have been retained in the Trypanosoma species, but were lost from L. major genome, where only remnants (DIRE) are detectable. All 242 DIREs analyzed group together according to their species origin with the exception of 11 T. cruzi DIREs which are close to the T. brucei ingi/DIRE families. Considering the absence of known horizontal transfer between the African T. brucei and the South-American T. cruzi, this suggests that this group of elements evolved at a lower rate when compared to the other trypanosomatid elements. Interestingly, the only nucleotide sequence conserved between ingi and L1Tc (the first 79 residues) is also present at the 5{\textquoteright}-extremity of all the full length DIREs and suggests a possible role for this conserved motif, as well as for DIREs.}, keywords = {Degenerate retroelement, Evolution, Ingi, L1Tc, Leishmania major, Non-LTR retrotransposon, Retroposon, Trypanosoma brucei, Trypanosoma cruzi}, isbn = {0166-6851}, author = {Bringaud, Frederic and Ghedin, Elodie and Blandin, Ga{\"e}lle and Bartholomeu, Daniella C. and Caler, Elisabet and Levin, Mariano J. and Baltz, Th{\'e}o and Najib M. El-Sayed} } @article {49751, title = {How A.I. and multi-robot systems research will accelerate our understanding of social animal behavior}, volume = {94}, year = {2006}, pages = {1445-1463}, author = {Tucker Balch and Frank Dellaert and Adam Feldman and Andrew Guillory and Charles Isbell and Zia Khan and Andrew Stein and Hank Wilde} } @article {49561, title = {How Multirobot Systems Research will Accelerate our Understanding of Social Animal Behavior}, volume = {94}, year = {2006}, month = {Jan-07-2006}, pages = {1445 - 1463}, issn = {0018-9219}, doi = {10.1109/JPROC.2006.876969}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1677955}, author = {Balch, T. and Dellaert, F. and Feldman, A. and Guillory, A. and Isbell, C.L. and Khan, Z. and Pratt, S.C. and Stein, A.N. and Wilde, H.} } @article {49750, title = {MCMC Data Association and Sparse Factorization Updating for Real Time Multitarget Tracking with Merged and Multiple Measurements}, journal = { IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {28}, year = {2006}, month = {12/2006}, pages = {1960-1972}, author = {Zia Khan and Tucker Balch and Frank Dellaert} } @article {49560, title = {MCMC data association and sparse factorization updating for real time multitarget tracking with merged and multiple measurements.}, volume = {28}, year = {2006}, month = {2006 Dec}, pages = {1960-72}, abstract = {

In several multitarget tracking applications, a target may return more than one measurement per target and interacting targets may return multiple merged measurements between targets. Existing algorithms for tracking and data association, initially applied to radar tracking, do not adequately address these types of measurements. Here, we introduce a probabilistic model for interacting targets that addresses both types of measurements simultaneously. We provide an algorithm for approximate inference in this model using a Markov chain Monte Carlo (MCMC)-based auxiliary variable particle filter. We Rao-Blackwellize the Markov chain to eliminate sampling over the continuous state space of the targets. A major contribution of this work is the use of sparse least squares updating and downdating techniques, which significantly reduce the computational cost per iteration of the Markov chain. Also, when combined with a simple heuristic, they enable the algorithm to correctly focus computation on interacting targets. We include experimental results on a challenging simulation sequence. We test the accuracy of the algorithm using two sensor modalities, video, and laser range data. We also show the algorithm exhibits real time performance on a conventional PC.

}, keywords = {algorithms, Artificial Intelligence, Image Enhancement, Image Interpretation, Computer-Assisted, Information Storage and Retrieval, Movement, Pattern Recognition, Automated, Reproducibility of Results, Sensitivity and Specificity, Subtraction Technique}, issn = {0162-8828}, doi = {10.1109/TPAMI.2006.247}, author = {Khan, Zia and Balch, Tucker and Dellaert, Frank} } @article {38387, title = {Molecular Characterization of Serine-, Alanine-, and Proline-Rich Proteins of Trypanosoma cruzi and Their Possible Role in Host Cell Infection}, journal = {Infect. Immun.Infect. Immun.}, volume = {74}, year = {2006}, type = {

10.1128/IAI.74.3.1537-1546.2006

}, abstract = {We previously reported the isolation of a novel protein gene family, termed SAP (serine-, alanine-, and proline-rich protein), from Trypanosoma cruzi. Aided by the availability of the completed genome sequence of T. cruzi, we have now identified 39 full-length sequences of SAP, six pseudogenes and four partial genes. SAPs share a central domain of about 55 amino acids and can be divided into four groups based on their amino (N)- and carboxy (C)-terminal sequences. Some SAPs have conserved N- and C-terminal domains encoding a signal peptide and a glycosylphosphatidylinositol anchor addition site, respectively. Analysis of the expression of SAPs in metacyclic trypomastigotes by two-dimensional electrophoresis and immunoblotting revealed that they are likely to be posttranslationally modified in vivo. We have also demonstrated that some SAPs are shed into the extracellular medium. The recombinant SAP exhibited an adhesive capacity toward mammalian cells, where binding was dose dependent and saturable, indicating a possible ligand-receptor interaction. SAP triggered the host cell Ca2+ response required for parasite internalization. A cell invasion assay performed in the presence of SAP showed inhibition of internalization of the metacyclic forms of the CL strain. Taken together, these results show that SAP is involved in the invasion of mammalian cells by metacyclic trypomastigotes, and they confirm the hypothesis that infective trypomastigotes exploit an arsenal of surface glycoproteins and shed proteins to induce signaling events required for their internalization.}, author = {Baida, Renata C. P. and Santos, Marcia R. M. and Carmo, Mirian S. and Yoshida, Nobuko and Ferreira, Danielle and Ferreira, Alice Teixeira and El Sayed, Najib M. and Andersson, Bj{\"o}rn and da Silveira, Jose Franco} } @conference {49564, title = {Multitarget Tracking with Split and Merged Measurements}, booktitle = {2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR{\textquoteright}05)2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR{\textquoteright}05)}, year = {2006}, publisher = {IEEE}, organization = {IEEE}, address = {San Diego, CA, USA}, doi = {10.1109/CVPR.2005.245}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1467323}, author = {Khan, Z. and Balch, T. and Dellaert, F.} } @book {38414, title = {Oceans And Health: Pathogens In The Marine Environment}, year = {2006}, publisher = {Springer}, organization = {Springer}, abstract = {The release of non-disinfected wastewaters into the marine environment is a common worldwide practice, in under-developed as well as in highly developed countries. Consequently, the seas are constantly infused with wastewater bacteria, among them highly pathogenic ones. In view of the public health significance of this phenomenon, it is surprising how little is actually known concerning the fate of such bacteria once they enter the sea. While numerous studies have addressed the effects of various environmental parameters on colony formation, many of them actually ignore the fact that bacteria can retain viability and infectivity while losing colony-forming ability. Only in recent years have efforts also been directed at unraveling the mechanisms determining bacterial sensitivity or survival under these conditions. This, therefore, is one subject of Oceans and Health: Pathogens in the Marine Environment: the survival, infectivity, pathogenicity and viability of enteric bacteria in the sea. Chapters also detail the public health aspects of wastewater release, civil engineering and economic considerations, other sources of pathogens, and much more.}, keywords = {Electronic books, Marine microbiology, Medical / Epidemiology, Medical / Microbiology, Nature / Animals / Marine Life, Pathogenic microorganisms, Science / Environmental Science, Science / Life Sciences / Biology, Science / Life Sciences / Marine Biology, Science / Life Sciences / Microbiology, Seawater/ microbiology}, isbn = {9780387237084}, author = {Belkin, Shimshon and Rita R. Colwell} } @article {38415, title = {An optimized system for expression and purification of secreted bacterial proteins}, journal = {Protein Expression and PurificationProtein Expression and Purification}, volume = {46}, year = {2006}, type = {10.1016/j.pep.2005.09.003}, abstract = {In this report, we describe an optimized system for the efficient overexpression, purification, and refolding of secreted bacterial proteins. Candidate secreted proteins were produced recombinantly in Escherichia coli as Tobacco Etch Virus protease-cleavable hexahistidine-c-myc eptiope fusion proteins. Without regard to their initial solubility, recombinant fusion proteins were extracted from whole cells with guanidium chloride, purified under denaturing conditions by immobilized metal affinity chromatography, and refolded by rapid dilution into a solution containing only Tris buffer and sodium chloride. Following concentration on the same resin under native conditions, each protein was eluted for further purification and/or characterization. Preliminary studies on a test set of 12 secreted proteins ranging in size from 13 to 130\&$\#$xa0;kDa yielded between 10 and 50\&$\#$xa0;mg of fusion protein per liter of induced culture at greater than 90\% purity, as judged by Coomassie-stained SDS{\textendash}PAGE. Of the nine proteins further purified, analytical gel filtration chromatography indicated that each was a monomer in solution and circular dichroism spectroscopy revealed that each had adopted a well-defined secondary structure. While there are many potential applications for this system, the results presented here suggest that it will be particularly useful for investigators employing structural approaches to understand protein function, as attested to by the crystal structures of three proteins purified using this methodology (B.V. Geisbrecht, B.Y. Hamaoka, B. Perman, A. Zemla, D.J. Leahy, J. Biol. Chem. 280 (2005) 17243{\textendash}17250).}, keywords = {Pathogens, Secreted proteins, Toxins, Virulence factors}, isbn = {1046-5928}, author = {Geisbrecht, Brian V. and Bouyain, Samuel and M. Pop} } @article {38427, title = {Patterns of sequence conservation in presynaptic neural genes}, journal = {Genome BiolGenome Biol}, volume = {7}, year = {2006}, author = {Hadley, D. and Murphy, T. and Valladares, O. and Sridhar Hannenhalli and Ungar, L. and Kim, J. and Bucan, M. and others,} } @article {38479, title = {Schistosoma mansoni (Platyhelminthes, Trematoda) nuclear receptors: Sixteen new members and a novel subfamily}, journal = {GeneGene}, volume = {366}, year = {2006}, type = {16/j.gene.2005.09.013}, abstract = {Nuclear receptors (NRs) are important transcriptional modulators in metazoans. Sixteen new NRs were identified in the Platyhelminth trematode, Schistosoma mansoni. Three were found to possess novel tandem DNA-binding domains that identify a new subfamily of NR. Two NRs are homologues of the thyroid hormone receptor that previously were thought to be restricted to chordates. This study brings the total number of identified NR in S. mansoni to 21. Phylogenetic and comparative genomic analyses demonstrate that S. mansoni NRs share an evolutionary lineage with that of arthropods and vertebrates. Phylogenic analysis shows that more than half of the S. mansoni nuclear receptors evolved from a second gene duplication. As the second gene duplication of NRs was thought to be specific to vertebrates, our data challenge the current theory of NR evolution.}, keywords = {Nuclear receptors, Schistosoma mansoni}, isbn = {0378-1119}, author = {Wu, Wenjie and Niles, Edward G. and Najib M. El-Sayed and Berriman, Matthew and LoVerde, Philip T.} } @article {38483, title = {Seasonal Cholera Caused by Vibrio Cholerae Serogroups O1 and O139 in the Coastal Aquatic Environment of Bangladesh}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, volume = {72}, year = {2006}, type = {10.1128/AEM.00066-06}, abstract = {Since Vibrio cholerae O139 first appeared in 1992, both O1 El Tor and O139 have been recognized as the epidemic serogroups, although their geographic distribution, endemicity, and reservoir are not fully understood. To address this lack of information, a study of the epidemiology and ecology of V. cholerae O1 and O139 was carried out in two coastal areas, Bakerganj and Mathbaria, Bangladesh, where cholera occurs seasonally. The results of a biweekly clinical study (January 2004 to May 2005), employing culture methods, and of an ecological study (monthly in Bakerganj and biweekly in Mathbaria from March 2004 to May 2005), employing direct and enrichment culture, colony blot hybridization, and direct fluorescent-antibody methods, showed that cholera is endemic in both Bakerganj and Mathbaria and that V. cholerae O1, O139, and non-O1/non-O139 are autochthonous to the aquatic environment. Although V. cholerae O1 and O139 were isolated from both areas, most noteworthy was the isolation of V. cholerae O139 in March, July, and September 2004 in Mathbaria, where seasonal cholera was clinically linked only to V. cholerae O1. In Mathbaria, V. cholerae O139 emerged as the sole cause of a significant outbreak of cholera in March 2005. V. cholerae O1 reemerged clinically in April 2005 and established dominance over V. cholerae O139, continuing to cause cholera in Mathbaria. In conclusion, the epidemic potential and coastal aquatic reservoir for V. cholerae O139 have been demonstrated. Based on the results of this study, the coastal ecosystem of the Bay of Bengal is concluded to be a significant reservoir for the epidemic serogroups of V. cholerae.}, isbn = {0099-2240, 1098-5336}, author = {Alam, Munirul and Hasan, Nur A. and Sadique, Abdus and Bhuiyan, N. A. and Ahmed, Kabir U. and Nusrin, Suraia and Nair, G. Balakrish and Siddique, A. K. and Sack, R. Bradley and Sack, David A. and Huq, Anwar and Rita R. Colwell} } @article {38486, title = {Selection of Target Sites for Mobile DNA Integration in the Human Genome}, journal = {PLoS Comput BiolPLoS Comput BiolPLoS Comput BiolPLoS Comput Biol}, volume = {2}, year = {2006}, type = {10.1371/journal.pcbi.0020157}, abstract = {DNA sequences from retroviruses, retrotransposons, DNA transposons, and parvoviruses can all become integrated into the human genome. Accumulation of such sequences accounts for at least 40\% of our genome today. These integrating elements are also of interest as gene-delivery vectors for human gene therapy. Here we present a comprehensive bioinformatic analysis of integration targeting by HIV, MLV, ASLV, SFV, L1, SB, and AAV. We used a mathematical method which allowed annotation of each base pair in the human genome for its likelihood of hosting an integration event by each type of element, taking advantage of more than 200 types of genomic annotation. This bioinformatic resource documents a wealth of new associations between genomic features and integration targeting. The study also revealed that the length of genomic intervals analyzed strongly affected the conclusions drawn{\textemdash}thus, answering the question {\textquotedblleft}What genomic features affect integration?{\textquotedblright} requires carefully specifying the length scale of interest.}, author = {Berry, Charles and Sridhar Hannenhalli and Leipzig, Jeremy and Bushman, Frederic D.} } @article {38549, title = {The Trypanosoma cruzi L1Tc and NARTc non-LTR retrotransposons show relative site specificity for insertion}, journal = {Molecular biology and evolutionMolecular biology and evolution}, volume = {23}, year = {2006}, author = {Bringaud, F. and Bartholomeu, D. C. and Blandin, G. and Delcher, A. and Baltz, T. and Najib M. El-Sayed and Ghedin, E.} } @article {49639, title = {The Trypanosoma cruzi L1Tc and NARTc non-LTR retrotransposons show relative site specificity for insertion.}, journal = {Mol Biol Evol}, volume = {23}, year = {2006}, month = {2006 Feb}, pages = {411-20}, abstract = {

The trypanosomatid protozoan Trypanosoma cruzi contains long autonomous (L1Tc) and short nonautonomous (NARTc) non-long terminal repeat retrotransposons. NARTc (0.25 kb) probably derived from L1Tc (4.9 kb) by 3{\textquoteright}-deletion. It has been proposed that their apparent random distribution in the genome is related to the L1Tc-encoded apurinic/apyrimidinic endonuclease (APE) activity, which repairs modified residues. To address this question we used the T. cruzi (CL-Brener strain) genome data to analyze the distribution of all the L1Tc/NARTc elements present in contigs larger than 10 kb. This data set, which represents 0.91x sequence coverage of the haploid nuclear genome ( approximately 55 Mb), contains 419 elements, including 112 full-length L1Tc elements (14 of which are potentially functional) and 84 full-length NARTc. Approximately half of the full-length elements are flanked by a target site duplication, most of them (87\%) are 12 bp long. Statistical analyses of sequences flanking the full-length elements show the same highly conserved pattern upstream of both the L1Tc and NARTc retrotransposons. The two most conserved residues are a guanine and an adenine, which flank the site where first-strand cleavage is performed by the element-encoded endonuclease activity. This analysis clearly indicates that the L1Tc and NARTc elements display relative site specificity for insertion, which suggests that the APE activity is not responsible for first-strand cleavage of the target site.

}, keywords = {Animals, DNA, Protozoan, DNA-(Apurinic or Apyrimidinic Site) Lyase, Mutagenesis, Insertional, Retroelements, Sequence Deletion, Trypanosoma cruzi}, issn = {0737-4038}, doi = {10.1093/molbev/msj046}, author = {Bringaud, Frederic and Bartholomeu, Daniella C and Blandin, Ga{\"e}lle and Delcher, Arthur and Baltz, Th{\'e}o and el-Sayed, Najib M A and Ghedin, Elodie} } @article {38131, title = {Bioinformatic Prediction of mRNA Targets of the Fragile X Mental Retardation Protein}, year = {2005}, author = {Simola, D. F. and Bucan, M. and Dalva, M. and Sridhar Hannenhalli and Liebhaber, S. and Ungar, L.} } @article {38162, title = {Comparative Genomics of Trypanosomatid Parasitic Protozoa}, journal = {ScienceScience}, volume = {309}, year = {2005}, type = {10.1126/science.1112181}, abstract = {A comparison of gene content and genome architecture of Trypanosoma brucei, Trypanosoma cruzi, and Leishmania major, three related pathogens with different life cycles and disease pathology, revealed a conserved core proteome of about 6200 genes in large syntenic polycistronic gene clusters. Many species-specific genes, especially large surface antigen families, occur at nonsyntenic chromosome-internal and subtelomeric regions. Retroelements, structural RNAs, and gene family expansion are often associated with syntenic discontinuities that{\textemdash}along with gene divergence, acquisition and loss, and rearrangement within the syntenic regions{\textemdash}have shaped the genomes of each parasite. Contrary to recent reports, our analyses reveal no evidence that these species are descended from an ancestor that contained a photosynthetic endosymbiont.}, author = {Najib M. El-Sayed and Myler, Peter J. and Blandin, Ga{\"e}lle and Berriman, Matthew and Crabtree, Jonathan and Aggarwal, Gautam and Caler, Elisabet and Renauld, Hubert and Worthey, Elizabeth A. and Hertz-Fowler, Christiane and Ghedin, Elodie and Peacock, Christopher and Bartholomeu, Daniella C. and Haas, Brian J. and Tran, Anh-Nhi and Wortman, Jennifer R. and Alsmark, U. Cecilia M. and Angiuoli, Samuel and Anupama, Atashi and Badger, Jonathan and Bringaud, Frederic and Cadag, Eithon and Carlton, Jane M. and Cerqueira, Gustavo C. and Creasy, Todd and Delcher, Arthur L. and Djikeng, Appolinaire and Embley, T. Martin and Hauser, Christopher and Ivens, Alasdair C. and Kummerfeld, Sarah K. and Pereira-Leal, Jose B. and Nilsson, Daniel and Peterson, Jeremy and Salzberg, Steven L. and Shallom, Joshua and Silva, Joana C. and Sundaram, Jaideep and Westenberger, Scott and White, Owen and Melville, Sara E. and Donelson, John E. and Andersson, Bj{\"o}rn and Stuart, Kenneth D. and Hall, Neil} } @article {38219, title = {Dynamic Querying for Pattern Identification in Microarray and Genomic Data (2003)}, journal = {Institute for Systems Research Technical ReportsInstitute for Systems Research Technical Reports}, year = {2005}, abstract = {Data sets involving linear ordered sequences are a recurring theme in bioinformatics. Dynamic query tools that support exploration of these data sets can be useful for identifying patterns of interest. This paper describes the use of one such tool TimeSearcher - to interactively explore linear sequence data sets taken from two bioinformatics problems. Microarray time course data sets involve expression levels for large numbers of genes over multiple time points. TimeSearcher can be used to interactively search these data sets for genes with expression profiles of interest. The occurrence frequencies of short sequences of DNA in aligned exons can be used to identify sequences that play a role in the pre-mRNA splicing. TimeSearcher can be used to search these data sets for candidate splicing signals.}, keywords = {Technical Report}, author = {Hochheiser, Harry and Baehrecke, Eric H. and Stephen M. Mount and Shneiderman, Ben} } @article {38265, title = {A framework for set-oriented computation in inductive logic programming and its application in generalizing inverse entailment}, journal = {Inductive Logic ProgrammingInductive Logic Programming}, year = {2005}, author = {H{\'e}ctor Corrada Bravo and Page, D. and Ramakrishnan, R. and Shavlik, J. and Costa, V. S.} } @article {38285, title = {The genetic map and comparative analysis with the physical map of Trypanosoma brucei}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {33}, year = {2005}, author = {MacLeod, A. and Tweedie, A. and McLellan, S. and Taylor, S. and Hall, N. and Berriman, M. and Najib M. El-Sayed and Hope, M. and Turner, C. M. R. and Tait, A.} } @article {38287, title = {Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial "pan-genome"}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {102}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16172379?dopt=Abstract}, type = {10.1073/pnas.0506758102}, abstract = {The development of efficient and inexpensive genome sequencing methods has revolutionized the study of human bacterial pathogens and improved vaccine design. Unfortunately, the sequence of a single genome does not reflect how genetic variability drives pathogenesis within a bacterial species and also limits genome-wide screens for vaccine candidates or for antimicrobial targets. We have generated the genomic sequence of six strains representing the five major disease-causing serotypes of Streptococcus agalactiae, the main cause of neonatal infection in humans. Analysis of these genomes and those available in databases showed that the S. agalactiae species can be described by a pan-genome consisting of a core genome shared by all isolates, accounting for approximately 80\% of any single genome, plus a dispensable genome consisting of partially shared and strain-specific genes. Mathematical extrapolation of the data suggests that the gene reservoir available for inclusion in the S. agalactiae pan-genome is vast and that unique genes will continue to be identified even after sequencing hundreds of genomes.}, keywords = {Amino Acid Sequence, Bacterial Capsules, Base Sequence, Gene expression, Genes, Bacterial, Genetic Variation, Genome, Bacterial, Molecular Sequence Data, Phylogeny, sequence alignment, Sequence Analysis, DNA, Streptococcus agalactiae, virulence}, author = {Tettelin, Herv{\'e} and Masignani, Vega and Cieslewicz, Michael J. and Donati, Claudio and Medini, Duccio and Ward, Naomi L. and Angiuoli, Samuel V. and Crabtree, Jonathan and Jones, Amanda L. and Durkin, A. Scott and DeBoy, Robert T. and Davidsen, Tanja M. and Mora, Marirosa and Scarselli, Maria and Margarit y Ros, Immaculada and Peterson, Jeremy D. and Hauser, Christopher R. and Sundaram, Jaideep P. and Nelson, William C. and Madupu, Ramana and Brinkac, Lauren M. and Dodson, Robert J. and Rosovitz, Mary J. and Sullivan, Steven A. and Daugherty, Sean C. and Haft, Daniel H. and J. Selengut and Gwinn, Michelle L. and Zhou, Liwei and Zafar, Nikhat and Khouri, Hoda and Radune, Diana and Dimitrov, George and Watkins, Kisha and O{\textquoteright}Connor, Kevin J. B. and Smith, Shannon and Utterback, Teresa R. and White, Owen and Rubens, Craig E. and Grandi, Guido and Madoff, Lawrence C. and Kasper, Dennis L. and Telford, John L. and Wessels, Michael R. and Rappuoli, Rino and Fraser, Claire M.} } @article {38292, title = {The genome of the African trypanosome Trypanosoma brucei}, journal = {ScienceScience}, volume = {309}, year = {2005}, author = {Berriman, M. and Ghedin, E. and Hertz-Fowler, C. and Blandin, G. and Renauld, H. and Bartholomeu, D. C. and Lennard, N. J. and Caler, E. and Hamlin, N. E. and Haas, B. and others,} } @article {38293, title = {The genome of the protist parasite Entamoeba histolytica}, journal = {NatureNature}, volume = {433}, year = {2005}, publisher = {Nature Publishing Group}, author = {Loftus, B. and Anderson, I. and Davies, R. and Alsmark, U. C. M. and Samuelson, J. and Amedeo, P. and Roncaglia, P. and Berriman, M. and Hirt, R. P. and Mann, B. J. and others,} } @article {38294, title = {Genome Properties: a system for the investigation of prokaryotic genetic content for microbiology, genome annotation and comparative genomics}, journal = {Bioinformatics (Oxford, England)Bioinformatics (Oxford, England)}, volume = {21}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15347579?dopt=Abstract}, type = {10.1093/bioinformatics/bti015}, abstract = {MOTIVATION: The presence or absence of metabolic pathways and structures provide a context that makes protein annotation far more reliable. Compiling such information across microbial genomes improves the functional classification of proteins and provides a valuable resource for comparative genomics. RESULTS: We have created a Genome Properties system to present key aspects of prokaryotic biology using standardized computational methods and controlled vocabularies. Properties reflect gene content, phenotype, phylogeny and computational analyses. The results of searches using hidden Markov models allow many properties to be deduced automatically, especially for families of proteins (equivalogs) conserved in function since their last common ancestor. Additional properties are derived from curation, published reports and other forms of evidence. Genome Properties system was applied to 156 complete prokaryotic genomes, and is easily mined to find differences between species, correlations between metabolic features and families of uncharacterized proteins, or relationships among properties. AVAILABILITY: Genome Properties can be found at http://www.tigr.org/Genome_Properties SUPPLEMENTARY INFORMATION: http://www.tigr.org/tigr-scripts/CMR2/genome_properties_references.spl.}, keywords = {Chromosome mapping, database management systems, Databases, Genetic, documentation, Gene Expression Profiling, Gene Expression Regulation, Genomics, Information Storage and Retrieval, Microbiological Techniques, natural language processing, Prokaryotic Cells, Proteome, signal transduction, software, User-Computer Interface, Vocabulary, Controlled}, author = {Haft, Daniel H. and J. Selengut and Brinkac, Lauren M. and Zafar, Nikhat and White, Owen} } @article {38305, title = {The genome sequence of Trypanosoma cruzi, etiologic agent of Chagas disease}, journal = {ScienceScience}, volume = {309}, year = {2005}, publisher = {American Association for the Advancement of Science}, author = {Najib M. El-Sayed and Myler, P. J. and Bartholomeu, D. C. and Nilsson, D. and Aggarwal, G. and Tran, A. N. and Ghedin, E. and Worthey, E. A. and Delcher, A. L. and Blandin, G. and others,} } @article {38307, title = {Genome-Wide Analysis of Chromosomal Features Repressing Human Immunodeficiency Virus Transcription}, journal = {Journal of VirologyJ. Virol.Journal of VirologyJ. Virol.}, volume = {79}, year = {2005}, type = {10.1128/JVI.79.11.6610-6619.2005}, abstract = {We have investigated regulatory sequences in noncoding human DNA that are associated with repression of an integrated human immunodeficiency virus type 1 (HIV-1) promoter. HIV-1 integration results in the formation of precise and homogeneous junctions between viral and host DNA, but integration takes place at many locations. Thus, the variation in HIV-1 gene expression at different integration sites reports the activity of regulatory sequences at nearby chromosomal positions. Negative regulation of HIV transcription is of particular interest because of its association with maintaining HIV in a latent state in cells from infected patients. To identify chromosomal regulators of HIV transcription, we infected Jurkat T cells with an HIV-based vector transducing green fluorescent protein (GFP) and separated cells into populations containing well-expressed (GFP-positive) or poorly expressed (GFP-negative) proviruses. We then determined the chromosomal locations of the two classes by sequencing 971 junctions between viral and cellular DNA. Possible effects of endogenous cellular transcription were characterized by transcriptional profiling. Low-level GFP expression correlated with integration in (i) gene deserts, (ii) centromeric heterochromatin, and (iii) very highly expressed cellular genes. These data provide a genome-wide picture of chromosomal features that repress transcription and suggest models for transcriptional latency in cells from HIV-infected patients.}, isbn = {0022-538X, 1098-5514}, author = {Lewinski, M. K. and Bisgrove, D. and Shinn, P. and Chen, H. and Hoffmann, C. and Sridhar Hannenhalli and Verdin, E. and Berry, C. C. and Ecker, J. R. and Bushman, F. D.} } @article {38310, title = {Genome-wide analysis of retroviral DNA integration}, journal = {Nat Rev MicroNat Rev MicroNat Rev MicroNat Rev Micro}, volume = {3}, year = {2005}, type = {10.1038/nrmicro1263}, isbn = {1740-1526}, author = {Bushman, Frederic and Lewinski, Mary and Ciuffi, Angela and Barr, Stephen and Leipzig, Jeremy and Sridhar Hannenhalli and Hoffmann, Christian} } @article {49752, title = {MCMC-Based Particle Filtering for Tracking a Variable Number of Interacting Targets}, volume = {27}, year = {2005}, pages = {1805-1918}, author = {Zia Khan and Tucker Balch and Frank Dellaert} } @article {49562, title = {MCMC-based particle filtering for tracking a variable number of interacting targets.}, volume = {27}, year = {2005}, month = {2005 Nov}, pages = {1805-19}, abstract = {

We describe a particle filter that effectively deals with interacting targets--targets that are influenced by the proximity and/or behavior of other targets. The particle filter includes a Markov random field (MRF) motion prior that helps maintain the identity of targets throughout an interaction, significantly reducing tracker failures. We show that this MRF prior can be easily implemented by including an additional interaction factor in the importance weights of the particle filter. However, the computational requirements of the resulting multitarget filter render it unusable for large numbers of targets. Consequently, we replace the traditional importance sampling step in the particle filter with a novel Markov chain Monte Carlo (MCMC) sampling step to obtain a more efficient MCMC-based multitarget filter. We also show how to extend this MCMC-based filter to address a variable number of interacting targets. Finally, we present both qualitative and quantitative experimental results, demonstrating that the resulting particle filters deal efficiently and effectively with complicated target interactions.

}, keywords = {algorithms, Animals, Artificial Intelligence, Computer simulation, HUMANS, Image Enhancement, Image Interpretation, Computer-Assisted, Information Storage and Retrieval, Markov chains, Models, Biological, Models, Statistical, Monte Carlo Method, Motion, Movement, Pattern Recognition, Automated, Subtraction Technique, Video Recording}, issn = {0162-8828}, doi = {10.1109/TPAMI.2005.223}, author = {Khan, Zia and Balch, Tucker and Dellaert, Frank} } @article {49753, title = {An Outdoor 3-d Visual Tracking System for the Study of Spatial Navigation and Memory in Rhesus Monkeys}, journal = {Behavior Research Methods,Instruments \& Computers}, volume = {37}, year = {2005}, month = {08/2005}, pages = {453-463}, author = {Zia Khan and Rebecca A. Herman and Kim Wallen and Tucker Balch} } @article {49563, title = {An outdoor 3-D visual tracking system for the study of spatial navigation and memory in rhesus monkeys.}, volume = {37}, year = {2005}, month = {2005 Aug}, pages = {453-63}, abstract = {

Previous studies of the navigational abilities of nonhuman primates have largely been limited to what could be described by a human observer with a pen and paper. Consequently, we have developed a system that uses a pair of cameras to automatically obtain the three-dimensional trajectory of rhesus monkeys performing an outdoor spatial navigation and memory task. The system provides trajectories, path length, speed, and other variables that would be impossible for an unaided observer to note. From trajectory data, we computed and validated a path-length measurement. We use this measurement to compare the navigation abilities of several animals. In addition, we provide quantitative data on the accuracy of a method for automatic behavior detection. Currently, the system is being used to examine the sex differences in spatial navigation of rhesus monkeys. We expect that measures derived from the trajectory data will reveal strategies used by animals to solve spatial problems.

}, keywords = {Animals, Behavior, Animal, Macaca mulatta, Memory, Models, Biological, Space Perception, Visual Perception}, issn = {1554-351X}, author = {Khan, Zia and Herman, Rebecca A and Wallen, Kim and Balch, Tucker} } @article {38443, title = {Post-transcriptional Control in Mammalian Dendrites}, year = {2005}, author = {Simola, D. F. and Dalva, M. and Sridhar Hannenhalli and Liebhaber, S. and Bucan, M. and Ungar, L.} } @article {49637, title = {Transcriptional profiling of the hyperthermophilic methanarchaeon Methanococcus jannaschii in response to lethal heat and non-lethal cold shock.}, journal = {Environ Microbiol}, volume = {7}, year = {2005}, month = {2005 Jun}, pages = {789-97}, abstract = {

Temperature shock of the hyperthermophilic methanarchaeon Methanococcus jannaschii from its optimal growth temperature of 85 degrees C to 65 degrees C and 95 degrees C resulted in different transcriptional responses characteristic of both the direction of shock (heat or cold shock) and whether the shock was lethal. Specific outcomes of lethal heat shock to 95 degrees C included upregulation of genes encoding chaperones, and downregulation of genes encoding subunits of the H+ transporting ATP synthase. A gene encoding an alpha subunit of a putative prefoldin was also upregulated, which may comprise a novel element in the protein processing pathway in M. jannaschii. Very different responses were observed upon cold shock to 65 degrees C. These included upregulation of a gene encoding an RNA helicase and other genes involved in transcription and translation, and upregulation of genes coding for proteases and transport proteins. Also upregulated was a gene that codes for an 18 kDa FKBP-type PPIase, which may facilitate protein folding at low temperatures. Transcriptional profiling also revealed several hypothetical proteins that respond to temperature stress conditions.

}, keywords = {Adaptation, Physiological, Archaeal Proteins, Cold Temperature, Gene Expression Profiling, Gene Expression Regulation, Archaeal, Heat-Shock Proteins, Hot Temperature, Methanococcus, Temperature, Transcription, Genetic}, issn = {1462-2912}, doi = {10.1111/j.1462-2920.2005.00751.x}, author = {Boonyaratanakornkit, Boonchai B and Simpson, Anjana J and Whitehead, Timothy A and Fraser, Claire M and el-Sayed, Najib M A and Clark, Douglas S} } @article {38538, title = {Transcriptional profiling of the hyperthermophilic methanarchaeon Methanococcus jannaschii in response to lethal heat and non-lethal cold shock}, journal = {Environmental MicrobiologyEnvironmental Microbiology}, volume = {7}, year = {2005}, type = {10.1111/j.1462-2920.2005.00751.x}, abstract = {Temperature shock of the hyperthermophilic methanarchaeon Methanococcus jannaschii from its optimal growth temperature of 85{\textdegree}C to 65{\textdegree}C and 95{\textdegree}C resulted in different transcriptional responses characteristic of both the direction of shock (heat or cold shock) and whether the shock was lethal. Specific outcomes of lethal heat shock to 95{\textdegree}C included upregulation of genes encoding chaperones, and downregulation of genes encoding subunits of the H+ transporting ATP synthase. A gene encoding an α subunit of a putative prefoldin was also upregulated, which may comprise a novel element in the protein processing pathway in M. jannaschii. Very different responses were observed upon cold shock to 65{\textdegree}C. These included upregulation of a gene encoding an RNA helicase and other genes involved in transcription and translation, and upregulation of genes coding for proteases and transport proteins. Also upregulated was a gene that codes for an 18~kDa FKBP-type PPIase, which may facilitate protein folding at low temperatures. Transcriptional profiling also revealed several hypothetical proteins that respond to temperature stress conditions.}, isbn = {1462-2920}, author = {Boonyaratanakornkit, Boonchai B. and Simpson, Anjana J. and Whitehead, Timothy A. and Fraser, Claire M. and Najib M. El-Sayed and Clark, Douglas S.} } @conference {49565, title = {What Are the Ants Doing? Vision-Based Tracking and Reconstruction of Control Programs}, booktitle = {2005 IEEE International Conference on Robotics and AutomationProceedings of the 2005 IEEE International Conference on Robotics and Automation}, year = {2005}, publisher = {IEEE}, organization = {IEEE}, address = {Barcelona, Spain}, doi = {10.1109/ROBOT.2005.1570762}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1570762}, author = {Egerstedt, M. and Balch, T. and Dellaert, F. and Delmotte, F. and Khan, Z.} } @article {49638, title = {What the genome sequence is revealing about trypanosome antigenic variation.}, journal = {Biochem Soc Trans}, volume = {33}, year = {2005}, month = {2005 Nov}, pages = {986-9}, abstract = {

African trypanosomes evade humoral immunity through antigenic variation, whereby they switch expression of the gene encoding their VSG (variant surface glycoprotein) coat. Switching proceeds by duplication of silent VSG genes into a transcriptionally active locus. The genome project has revealed that most of the silent archive consists of hundreds of subtelomeric VSG tandem arrays, and that most of these are not functional genes. Precedent suggests that they can contribute combinatorially to the formation of expressed, functional genes through segmental gene conversion. These findings from the genome project have major implications for evolution of the VSG archive and for transmission of the parasite in the field.

}, keywords = {Animals, Antigens, Protozoan, Evolution, Molecular, Genetic Variation, Genome, Trypanosomatina, Variant Surface Glycoproteins, Trypanosoma}, issn = {0300-5127}, doi = {10.1042/BST20050986}, author = {Barry, J D and Marcello, L and Morrison, L J and Read, A F and Lythgoe, K and Jones, N and Carrington, M and Blandin, G and B{\"o}hme, U and Caler, E and Hertz-Fowler, C and Renauld, H and El-Sayed, N and Berriman, M} } @article {38575, title = {Whole-genome sequence analysis of Pseudomonas syringae pv. phaseolicola 1448A reveals divergence among pathovars in genes involved in virulence and transposition}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {187}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16159782?dopt=Abstract}, type = {10.1128/JB.187.18.6488-6498.2005}, abstract = {Pseudomonas syringae pv. phaseolicola, a gram-negative bacterial plant pathogen, is the causal agent of halo blight of bean. In this study, we report on the genome sequence of P. syringae pv. phaseolicola isolate 1448A, which encodes 5,353 open reading frames (ORFs) on one circular chromosome (5,928,787 bp) and two plasmids (131,950 bp and 51,711 bp). Comparative analyses with a phylogenetically divergent pathovar, P. syringae pv. tomato DC3000, revealed a strong degree of conservation at the gene and genome levels. In total, 4,133 ORFs were identified as putative orthologs in these two pathovars using a reciprocal best-hit method, with 3,941 ORFs present in conserved, syntenic blocks. Although these two pathovars are highly similar at the physiological level, they have distinct host ranges; 1448A causes disease in beans, and DC3000 is pathogenic on tomato and Arabidopsis. Examination of the complement of ORFs encoding virulence, fitness, and survival factors revealed a substantial, but not complete, overlap between these two pathovars. Another distinguishing feature between the two pathovars is their distinctive sets of transposable elements. With access to a fifth complete pseudomonad genome sequence, we were able to identify 3,567 ORFs that likely comprise the core Pseudomonas genome and 365 ORFs that are P. syringae specific.}, keywords = {Bacterial Proteins, DNA, Bacterial, Genes, Bacterial, Genome, Bacterial, Molecular Sequence Data, Pseudomonas syringae, Species Specificity, virulence}, author = {Joardar, Vinita and Lindeberg, Magdalen and Jackson, Robert W. and J. Selengut and Dodson, Robert and Brinkac, Lauren M. and Daugherty, Sean C. and Deboy, Robert and Durkin, A. Scott and Giglio, Michelle Gwinn and Madupu, Ramana and Nelson, William C. and Rosovitz, M. J. and Sullivan, Steven and Crabtree, Jonathan and Creasy, Todd and Davidsen, Tanja and Haft, Dan H. and Zafar, Nikhat and Zhou, Liwei and Halpin, Rebecca and Holley, Tara and Khouri, Hoda and Feldblyum, Tamara and White, Owen and Fraser, Claire M. and Chatterjee, Arun K. and Cartinhour, Sam and Schneider, David J. and Mansfield, John and Collmer, Alan and Buell, C. Robin} } @article {38104, title = {Advances in schistosome genomics}, journal = {Trends in ParasitologyTrends in Parasitology}, volume = {20}, year = {2004}, type = {16/j.pt.2004.02.002}, abstract = {In Spring 2004, the first draft of the 270~Mb genome of Schistosoma mansoni will be released. This sequence is based on the assembly and annotation of a >7.5-fold coverage, shotgun sequencing project. The key stages involved in the international collaborative efforts that have led to the generation of these sequencing data for the parasite S. mansoni are discussed here.}, isbn = {1471-4922}, author = {Najib M. El-Sayed and Bartholomeu, Daniella and Ivens, Alasdair and Johnston, David A. and LoVerde, Philip T.} } @article {38165, title = {Comparison of the genome of the oral pathogen Treponema denticola with other spirochete genomes}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {101}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15064399?dopt=Abstract}, type = {10.1073/pnas.0307639101}, abstract = {We present the complete 2,843,201-bp genome sequence of Treponema denticola (ATCC 35405) an oral spirochete associated with periodontal disease. Analysis of the T. denticola genome reveals factors mediating coaggregation, cell signaling, stress protection, and other competitive and cooperative measures, consistent with its pathogenic nature and lifestyle within the mixed-species environment of subgingival dental plaque. Comparisons with previously sequenced spirochete genomes revealed specific factors contributing to differences and similarities in spirochete physiology as well as pathogenic potential. The T. denticola genome is considerably larger in size than the genome of the related syphilis-causing spirochete Treponema pallidum. The differences in gene content appear to be attributable to a combination of three phenomena: genome reduction, lineage-specific expansions, and horizontal gene transfer. Genes lost due to reductive evolution appear to be largely involved in metabolism and transport, whereas some of the genes that have arisen due to lineage-specific expansions are implicated in various pathogenic interactions, and genes acquired via horizontal gene transfer are largely phage-related or of unknown function.}, keywords = {ATP-Binding Cassette Transporters, Bacterial Proteins, Base Sequence, Borrelia burgdorferi, Genes, Bacterial, Genome, Bacterial, Leptospira interrogans, Models, Genetic, Molecular Sequence Data, Mouth, Sequence Homology, Amino Acid, Treponema, Treponema pallidum}, author = {Seshadri, Rekha and Myers, Garry S. A. and Tettelin, Herv{\'e} and Eisen, Jonathan A. and Heidelberg, John F. and Dodson, Robert J. and Davidsen, Tanja M. and DeBoy, Robert T. and Fouts, Derrick E. and Haft, Dan H. and J. Selengut and Ren, Qinghu and Brinkac, Lauren M. and Madupu, Ramana and Kolonay, Jamie and Durkin, A. Scott and Daugherty, Sean C. and Shetty, Jyoti and Shvartsbeyn, Alla and Gebregeorgis, Elizabeth and Geer, Keita and Tsegaye, Getahun and Malek, Joel and Ayodeji, Bola and Shatsman, Sofiya and McLeod, Michael P. and Smajs, David and Howell, Jerrilyn K. and Pal, Sangita and Amin, Anita and Vashisth, Pankaj and McNeill, Thomas Z. and Xiang, Qin and Sodergren, Erica and Baca, Ernesto and Weinstock, George M. and Norris, Steven J. and Fraser, Claire M. and Paulsen, Ian T.} } @article {49635, title = {Gene synteny and evolution of genome architecture in trypanosomatids.}, journal = {Mol Biochem Parasitol}, volume = {134}, year = {2004}, month = {2004 Apr}, pages = {183-91}, abstract = {

The trypanosomatid protozoa Trypanosoma brucei, Trypanosoma cruzi and Leishmania major are related human pathogens that cause markedly distinct diseases. Using information from genome sequencing projects currently underway, we have compared the sequences of large chromosomal fragments from each species. Despite high levels of divergence at the sequence level, these three species exhibit a striking conservation of gene order, suggesting that selection has maintained gene order among the trypanosomatids over hundreds of millions of years of evolution. The few sites of genome rearrangement between these species are marked by the presence of retrotransposon-like elements, suggesting that retrotransposons may have played an important role in shaping trypanosomatid genome organization. A degenerate retroelement was identified in L. major by examining the regions near breakage points of the synteny. This is the first such element found in L. major suggesting that retroelements were found in the common ancestor of all three species.

}, keywords = {Animals, Computational Biology, Evolution, Molecular, Gene Order, Genome, Protozoan, Genomics, Leishmania major, Multigene Family, Recombination, Genetic, Retroelements, Selection, Genetic, Synteny, Trypanosoma brucei brucei, Trypanosoma cruzi, Trypanosomatina}, issn = {0166-6851}, doi = {10.1016/j.molbiopara.2003.11.012}, author = {Ghedin, Elodie and Bringaud, Frederic and Peterson, Jeremy and Myler, Peter and Berriman, Matthew and Ivens, Alasdair and Andersson, Bj{\"o}rn and Bontempi, Esteban and Eisen, Jonathan and Angiuoli, Sam and Wanless, David and Von Arx, Anna and Murphy, Lee and Lennard, Nicola and Salzberg, Steven and Adams, Mark D and White, Owen and Hall, Neil and Stuart, Kenneth and Fraser, Claire M and el-Sayed, Najib M A} } @article {38302, title = {Genome sequence of Silicibacter pomeroyi reveals adaptations to the marine environment}, journal = {NatureNature}, volume = {432}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15602564?dopt=Abstract}, type = {10.1038/nature03170}, abstract = {Since the recognition of prokaryotes as essential components of the oceanic food web, bacterioplankton have been acknowledged as catalysts of most major biogeochemical processes in the sea. Studying heterotrophic bacterioplankton has been challenging, however, as most major clades have never been cultured or have only been grown to low densities in sea water. Here we describe the genome sequence of Silicibacter pomeroyi, a member of the marine Roseobacter clade (Fig. 1), the relatives of which comprise approximately 10-20\% of coastal and oceanic mixed-layer bacterioplankton. This first genome sequence from any major heterotrophic clade consists of a chromosome (4,109,442 base pairs) and megaplasmid (491,611 base pairs). Genome analysis indicates that this organism relies upon a lithoheterotrophic strategy that uses inorganic compounds (carbon monoxide and sulphide) to supplement heterotrophy. Silicibacter pomeroyi also has genes advantageous for associations with plankton and suspended particles, including genes for uptake of algal-derived compounds, use of metabolites from reducing microzones, rapid growth and cell-density-dependent regulation. This bacterium has a physiology distinct from that of marine oligotrophs, adding a new strategy to the recognized repertoire for coping with a nutrient-poor ocean.}, keywords = {Adaptation, Physiological, Carrier Proteins, Genes, Bacterial, Genome, Bacterial, marine biology, Molecular Sequence Data, Oceans and Seas, Phylogeny, plankton, RNA, Ribosomal, 16S, Roseobacter, Seawater}, author = {Moran, Mary Ann and Buchan, Alison and Gonz{\'a}lez, Jos{\'e} M. and Heidelberg, John F. and Whitman, William B. and Kiene, Ronald P. and Henriksen, James R. and King, Gary M. and Belas, Robert and Fuqua, Clay and Brinkac, Lauren and Lewis, Matt and Johri, Shivani and Weaver, Bruce and Pai, Grace and Eisen, Jonathan A. and Rahe, Elisha and Sheldon, Wade M. and Ye, Wenying and Miller, Todd R. and Carlton, Jane and Rasko, David A. and Paulsen, Ian T. and Ren, Qinghu and Daugherty, Sean C. and DeBoy, Robert T. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Nelson, William C. and Sullivan, Steven A. and Rosovitz, M. J. and Haft, Daniel H. and J. Selengut and Ward, Naomi} } @article {38303, title = {The genome sequence of the anaerobic, sulfate-reducing bacterium Desulfovibrio vulgaris Hildenborough}, journal = {Nature biotechnologyNature biotechnology}, volume = {22}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15077118?dopt=Abstract}, type = {10.1038/nbt959}, abstract = {Desulfovibrio vulgaris Hildenborough is a model organism for studying the energy metabolism of sulfate-reducing bacteria (SRB) and for understanding the economic impacts of SRB, including biocorrosion of metal infrastructure and bioremediation of toxic metal ions. The 3,570,858 base pair (bp) genome sequence reveals a network of novel c-type cytochromes, connecting multiple periplasmic hydrogenases and formate dehydrogenases, as a key feature of its energy metabolism. The relative arrangement of genes encoding enzymes for energy transduction, together with inferred cellular location of the enzymes, provides a basis for proposing an expansion to the {\textquoteright}hydrogen-cycling{\textquoteright} model for increasing energy efficiency in this bacterium. Plasmid-encoded functions include modification of cell surface components, nitrogen fixation and a type-III protein secretion system. This genome sequence represents a substantial step toward the elucidation of pathways for reduction (and bioremediation) of pollutants such as uranium and chromium and offers a new starting point for defining this organism{\textquoteright}s complex anaerobic respiration.}, keywords = {Desulfovibrio vulgaris, Energy Metabolism, Genome, Bacterial, Molecular Sequence Data}, author = {Heidelberg, John F. and Seshadri, Rekha and Haveman, Shelley A. and Hemme, Christopher L. and Paulsen, Ian T. and Kolonay, James F. and Eisen, Jonathan A. and Ward, Naomi and Methe, Barbara and Brinkac, Lauren M. and Daugherty, Sean C. and DeBoy, Robert T. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Nelson, William C. and Sullivan, Steven A. and Fouts, Derrick and Haft, Daniel H. and J. Selengut and Peterson, Jeremy D. and Davidsen, Tanja M. and Zafar, Nikhat and Zhou, Liwei and Radune, Diana and Dimitrov, George and Hance, Mark and Tran, Kevin and Khouri, Hoda and Gill, John and Utterback, Terry R. and Feldblyum, Tamara V. and Wall, Judy D. and Voordouw, Gerrit and Fraser, Claire M.} } @article {38348, title = {The ingi and RIME non-LTR retrotransposons are not randomly distributed in the genome of Trypanosoma brucei}, journal = {Molecular biology and evolutionMolecular biology and evolution}, volume = {21}, year = {2004}, author = {Bringaud, F. and Biteau, N. and Zuiderwijk, E. and Berriman, M. and Najib M. El-Sayed and Ghedin, E. and Melville, S. E. and Hall, N. and Baltz, T.} } @article {49634, title = {The ingi and RIME non-LTR retrotransposons are not randomly distributed in the genome of Trypanosoma brucei.}, journal = {Mol Biol Evol}, volume = {21}, year = {2004}, month = {2004 Mar}, pages = {520-8}, abstract = {

The ingi (long and autonomous) and RIME (short and nonautonomous) non--long-terminal repeat retrotransposons are the most abundant mobile elements characterized to date in the genome of the African trypanosome Trypanosoma brucei. These retrotransposons were thought to be randomly distributed, but a detailed and comprehensive analysis of their genomic distribution had not been performed until now. To address this question, we analyzed the ingi/RIME sequences and flanking sequences from the ongoing T. brucei genome sequencing project (TREU927/4 strain). Among the 81 ingi/RIME elements analyzed, 60\% are complete, and 7\% of the ingi elements (approximately 15 copies per haploid genome) appear to encode for their own transposition. The size of the direct repeat flanking the ingi/RIME retrotransposons is conserved (i.e., 12-bp), and a strong 11-bp consensus pattern precedes the 5{\textquoteright}-direct repeat. The presence of a consensus pattern upstream of the retroelements was confirmed by the analysis of the base occurrence in 294 GSS containing 5{\textquoteright}-adjacent ingi/RIME sequences. The conserved sequence is present upstream of ingis and RIMEs, suggesting that ingi-encoded enzymatic activities are used for retrotransposition of RIMEs, which are short nonautonomous retroelements. In conclusion, the ingi and RIME retroelements are not randomly distributed in the genome of T. brucei and are preceded by a conserved sequence, which may be the recognition site of the ingi-encoded endonuclease.

}, keywords = {Amino Acid Sequence, Animals, Base Sequence, Consensus Sequence, Genome, Protozoan, Molecular Sequence Data, Retroelements, Sequence Analysis, Trypanosoma brucei brucei}, issn = {0737-4038}, doi = {10.1093/molbev/msh045}, author = {Bringaud, Frederic and Biteau, Nicolas and Zuiderwijk, Eduard and Berriman, Matthew and El-Sayed, Najib M and Ghedin, Elodie and Melville, Sara E and Hall, Neil and Baltz, Th{\'e}o} } @book {49567, title = {Lecture Notes in Computer ScienceComputer Vision - ECCV 2004An MCMC-Based Particle Filter for Tracking Multiple Interacting Targets}, volume = {3024}, year = {2004}, pages = {279 - 290}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, isbn = {978-3-540-21981-1}, issn = {0302-9743}, doi = {10.1007/b9787310.1007/978-3-540-24673-2_23}, url = {http://www.springerlink.com/index/10.1007/b97873http://www.springerlink.com/index/pdf/10.1007/b97873http://link.springer.com/10.1007/978-3-540-24673-2_23http://www.springerlink.com/index/pdf/10.1007/978-3-540-24673-2_23}, author = {Khan, Zia and Balch, Tucker and Dellaert, Frank}, editor = {Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Dough and Vardi, Moshe Y. and Weikum, Gerhard and Pajdla, {\'a}s and Matas, {\v r}{\'\i}} } @article {38408, title = {A Note on Efficient Computation of Haplotypes via Perfect Phylogeny}, journal = {Journal of Computational BiologyJournal of Computational Biology}, volume = {11}, year = {2004}, type = {10.1089/cmb.2004.11.858}, abstract = {The problem of inferring haplotype phase from a population of genotypes has received a lot of attention recently. This is partly due to the observation that there are many regions on human genomic DNA where genetic recombination is rare (Helmuth, 2001; Daly et al., 2001; Stephens et al., 2001; Friss et al., 2001). A Haplotype Map project has been announced by NIH to identify and characterize populations in terms of these haplotypes. Recently, Gusfield introduced the perfect phylogeny haplotyping problem, as an algorithmic implication of the no-recombination in long blocks observation, together with the standard population-genetic assumption of infinite sites. Gusfield{\textquoteright}s solution based on matroid theory was followed by direct θ(nm2 ) solutions that use simpler techniques (Bafna et al., 2003; Eskin et al., 2003), and also bound the number of solutions to the PPH problem. In this short note, we address two questions that were left open. First, can the algorithms of Bafna et al. (2003) and Eskin et al. (2003) be sped-up to O(nm + m2 ) time, which would imply an O(nm) time-bound for the PPH problem? Second, if there are multiple solutions, can we find one that is most parsimonious in terms of the number of distinct haplotypes.We give reductions that suggests that the answer to both questions is "no." For the first problem, we show that computing the output of the first step (in either method) is equivalent to Boolean matrix multiplication. Therefore, the best bound we can presently achieve is O(nmω{\textendash}1), where ω <= 2.52 is the exponent of matrix multiplication. Thus, any linear time solution to the PPH problem likely requires a different approach. For the second problem of computing a PPH solution that minimizes the number of distinct haplotypes, we show that the problem is NP-hard using a reduction from Vertex Cover (Garey and Johnson, 1979).}, isbn = {1066-5277, 1557-8666}, author = {Bafna, Vineet and Gusfield, Dan and Sridhar Hannenhalli and Yooseph, Shibu} } @article {38418, title = {Pandemic strains of O3:K6 Vibrio parahaemolyticus in the aquatic environment of Bangladesh}, journal = {Canadian Journal of MicrobiologyCanadian Journal of Microbiology}, volume = {50}, year = {2004}, abstract = {A total of 1500 environmental strains of Vibrio parahaemolyticus, isolated from the aquatic environment of Bangladesh, were screened for the presence of a major V. parahaemolyticus virulence factor, the thermostable direct haemolysin (tdh) gene, by the colony blot hybridization method using a digoxigenin-labeled tdh gene probe. Of 1500 strains, 5 carried the tdh sequence, which was further confirmed by PCR using primers specific for the tdh gene. Examination by PCR confirmed that the 5 strains were V. parahamolyticus and lacked the thermostable direct haemolysin-related haemolysin (trh) gene, the alternative major virulence gene known to be absent in pandemic strains. All 5 strains gave positive Kanagawa phenomenon reaction with characteristic beta-haemolysis on Wagatsuma agar medium. Southern blot analysis of the HindIII-digested chromosomal DNA demonstrated, in all 5 strains, the presence of 2 tdh genes common to strains positive for Kanagawa phenomenon. However, the 5 strains were found to belong to 3 different serotypes (O3:K29, O4:K37, and O3:K6). The 2 with pandemic serotype O3:K6 gave positive results in group-specific PCR and ORF8 PCR assays, characteristics unique to the pandemic clone. Clonal variations among the 5 isolates were analyzed by comparing RAPD and ribotyping patterns. Results showed different patterns for the 3 serotypes, but the pattern was identical among the O3:K6 strains. This is the first report on the isolation of pandemic O3:K6 strains of V. parahaemolyticus from the aquatic environment of Bangladesh.}, author = {Islam, M. S. and Tasmin, Rizwana and Khan, Sirajul I. s l a m and Bakht, Habibul B. M. and Mahmood, Zahid H. a y a t and Rahman, M. Z. i a u r and Bhuiyan, Nurul A. m i n and Nishibuchi, Mitsuaki and Nair, G. B. a l a k r i s h and Sack, R. B. r a d l e y and Huq, Anwar and Rita R. Colwell and Sack, David A.} } @conference {49566, title = {A Rao-Blackwellized particle filter for eigentracking}, booktitle = {Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004.Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004.}, year = {2004}, publisher = {IEEE}, organization = {IEEE}, address = {Washington, DC, USA}, doi = {10.1109/CVPR.2004.1315271}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1315271}, author = {Khan, Z. and Balch, T. and Dellaert, F.} } @article {49664, title = {Sequencing strategies for parasite genomes.}, journal = {Methods Mol Biol}, volume = {270}, year = {2004}, month = {2004}, pages = {1-16}, abstract = {

Recent advances in the field of sequencing have enabled the determination of the complete nucleotide sequence of a large number of complex genomes. The complete genome sequence of the parasite Plasmodium falciparum has been published recently, and many other parasite genome initiatives are underway. Parasite genomes vary in size, nucleotide composition, polymorphism level, content, and distribution of repetitive elements. These genomic features affect the performance of sequencing strategies. As a consequence, each of the ongoing parasite genome projects has adopted distinct sequencing approaches. The degree of completeness and accuracy desired as well as available funds should be considered carefully when choosing the most appropriate sequencing strategy.

}, keywords = {Animals, Chromosome Walking, Chromosomes, Artificial, Bacterial, Genetic Markers, Genome, Protozoan, Plasmodium falciparum}, issn = {1064-3745}, doi = {10.1385/1-59259-793-9:001}, author = {Bartholomeu, Daniella and El-Sayed, Najib M} } @article {38494, title = {Sequencing Strategies for Parasite Genomes}, journal = {METHODS IN MOLECULAR BIOLOGY-CLIFTON THEN TOTOWA-METHODS IN MOLECULAR BIOLOGY-CLIFTON THEN TOTOWA-}, volume = {270}, year = {2004}, author = {Bartholomeu, D. and Najib M. El-Sayed and Melville, S. E.} } @article {38514, title = {Structural flexibility in the Burkholderia mallei genome}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {101}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15377793?dopt=Abstract}, type = {10.1073/pnas.0403306101}, abstract = {The complete genome sequence of Burkholderia mallei ATCC 23344 provides insight into this highly infectious bacterium{\textquoteright}s pathogenicity and evolutionary history. B. mallei, the etiologic agent of glanders, has come under renewed scientific investigation as a result of recent concerns about its past and potential future use as a biological weapon. Genome analysis identified a number of putative virulence factors whose function was supported by comparative genome hybridization and expression profiling of the bacterium in hamster liver in vivo. The genome contains numerous insertion sequence elements that have mediated extensive deletions and rearrangements of the genome relative to Burkholderia pseudomallei. The genome also contains a vast number (>12,000) of simple sequence repeats. Variation in simple sequence repeats in key genes can provide a mechanism for generating antigenic variation that may account for the mammalian host{\textquoteright}s inability to mount a durable adaptive immune response to a B. mallei infection.}, keywords = {Animals, Base Composition, Base Sequence, Burkholderia mallei, Chromosomes, Bacterial, Cricetinae, Genome, Bacterial, Glanders, Liver, Mesocricetus, Molecular Sequence Data, Multigene Family, Oligonucleotide Array Sequence Analysis, Open Reading Frames, virulence}, author = {Nierman, William C. and DeShazer, David and Kim, H. Stanley and Tettelin, Herv{\'e} and Nelson, Karen E. and Feldblyum, Tamara and Ulrich, Ricky L. and Ronning, Catherine M. and Brinkac, Lauren M. and Daugherty, Sean C. and Davidsen, Tanja D. and DeBoy, Robert T. and Dimitrov, George and Dodson, Robert J. and Durkin, A. Scott and Gwinn, Michelle L. and Haft, Daniel H. and Khouri, Hoda and Kolonay, James F. and Madupu, Ramana and Mohammoud, Yasmin and Nelson, William C. and Radune, Diana and Romero, Claudia M. and Sarria, Saul and J. Selengut and Shamblin, Christine and Sullivan, Steven A. and White, Owen and Yu, Yan and Zafar, Nikhat and Zhou, Liwei and Fraser, Claire M.} } @article {38096, title = {Viable but Nonculturable Vibrio Cholerae O1 in the Aquatic Environment of Argentina}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, volume = {70}, year = {2004}, type = {10.1128/AEM.70.12.7481-7486.2004}, abstract = {In Argentina, as in other countries of Latin America, cholera has occurred in an epidemic pattern. Vibrio cholerae O1 is native to the aquatic environment, and it occurs in both culturable and viable but nonculturable (VNC) forms, the latter during interepidemic periods. This is the first report of the presence of VNC V. cholerae O1 in the estuarine and marine waters of the R{\'\i}o de la Plata and the Argentine shelf of the Atlantic Ocean, respectively. Employing immunofluorescence and PCR methods, we were able to detect reservoirs of V. cholerae O1 carrying the virulence-associated genes ctxA and tcpA. The VNC forms of V. cholerae O1 were identified in samples of water, phytoplankton, and zooplankton; the latter organisms were mainly the copepods Acartia tonsa, Diaptomus sp., Paracalanus crassirostris, and Paracalanus parvus. We found that under favorable conditions, the VNC form of V. cholerae can revert to the pathogenic, transmissible state. We concluded that V. cholerae O1 is a resident of Argentinean waters, as has been shown to be the case in other geographic regions of the world.}, isbn = {0099-2240, 1098-5336}, author = {Binsztein, Norma and Costagliola, Marcela C. and Pichel, Mariana and Jurquiza, Ver{\'o}nica and Ram{\'\i}rez, Fernando C. and Akselman, Rut and Vacchino, Marta and Huq, Anwarul and Rita R. Colwell} } @article {38574, title = {Whole genome comparisons of serotype 4b and 1/2a strains of the food-borne pathogen Listeria monocytogenes reveal new insights into the core genome components of this species}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {32}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15115801?dopt=Abstract}, type = {10.1093/nar/gkh562}, abstract = {The genomes of three strains of Listeria monocytogenes that have been associated with food-borne illness in the USA were subjected to whole genome comparative analysis. A total of 51, 97 and 69 strain-specific genes were identified in L.monocytogenes strains F2365 (serotype 4b, cheese isolate), F6854 (serotype 1/2a, frankfurter isolate) and H7858 (serotype 4b, meat isolate), respectively. Eighty-three genes were restricted to serotype 1/2a and 51 to serotype 4b strains. These strain- and serotype-specific genes probably contribute to observed differences in pathogenicity, and the ability of the organisms to survive and grow in their respective environmental niches. The serotype 1/2a-specific genes include an operon that encodes the rhamnose biosynthetic pathway that is associated with teichoic acid biosynthesis, as well as operons for five glycosyl transferases and an adenine-specific DNA methyltransferase. A total of 8603 and 105 050 high quality single nucleotide polymorphisms (SNPs) were found on the draft genome sequences of strain H7858 and strain F6854, respectively, when compared with strain F2365. Whole genome comparative analyses revealed that the L.monocytogenes genomes are essentially syntenic, with the majority of genomic differences consisting of phage insertions, transposable elements and SNPs.}, keywords = {Base Composition, Chromosomes, Bacterial, DNA Transposable Elements, Food Microbiology, Genes, Bacterial, Genome, Bacterial, Genomics, Listeria monocytogenes, Meat, Open Reading Frames, Physical Chromosome Mapping, Polymorphism, Single Nucleotide, Prophages, Serotyping, Species Specificity, Synteny, virulence}, author = {Nelson, Karen E. and Fouts, Derrick E. and Mongodin, Emmanuel F. and Ravel, Jacques and DeBoy, Robert T. and Kolonay, James F. and Rasko, David A. and Angiuoli, Samuel V. and Gill, Steven R. and Paulsen, Ian T. and Peterson, Jeremy and White, Owen and Nelson, William C. and Nierman, William and Beanan, Maureen J. and Brinkac, Lauren M. and Daugherty, Sean C. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Haft, Daniel H. and J. Selengut and Van Aken, Susan and Khouri, Hoda and Fedorova, Nadia and Forberger, Heather and Tran, Bao and Kathariou, Sophia and Wonderling, Laura D. and Uhlich, Gaylen A. and Bayles, Darrell O. and Luchansky, John B. and Fraser, Claire M.} } @article {38168, title = {The complete genome sequence of the Arabidopsis and tomato pathogen Pseudomonas syringae pv. tomato DC3000}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {100}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12928499?dopt=Abstract}, type = {10.1073/pnas.1731982100}, abstract = {We report the complete genome sequence of the model bacterial pathogen Pseudomonas syringae pathovar tomato DC3000 (DC3000), which is pathogenic on tomato and Arabidopsis thaliana. The DC3000 genome (6.5 megabases) contains a circular chromosome and two plasmids, which collectively encode 5,763 ORFs. We identified 298 established and putative virulence genes, including several clusters of genes encoding 31 confirmed and 19 predicted type III secretion system effector proteins. Many of the virulence genes were members of paralogous families and also were proximal to mobile elements, which collectively comprise 7\% of the DC3000 genome. The bacterium possesses a large repertoire of transporters for the acquisition of nutrients, particularly sugars, as well as genes implicated in attachment to plant surfaces. Over 12\% of the genes are dedicated to regulation, which may reflect the need for rapid adaptation to the diverse environments encountered during epiphytic growth and pathogenesis. Comparative analyses confirmed a high degree of similarity with two sequenced pseudomonads, Pseudomonas putida and Pseudomonas aeruginosa, yet revealed 1,159 genes unique to DC3000, of which 811 lack a known function.}, keywords = {Arabidopsis, Base Sequence, Biological Transport, Genome, Bacterial, Lycopersicon esculentum, Molecular Sequence Data, Plant Growth Regulators, Plasmids, Pseudomonas, Reactive Oxygen Species, Siderophores, virulence}, author = {Buell, C. Robin and Joardar, Vinita and Lindeberg, Magdalen and J. Selengut and Paulsen, Ian T. and Gwinn, Michelle L. and Dodson, Robert J. and DeBoy, Robert T. and Durkin, A. Scott and Kolonay, James F. and Madupu, Ramana and Daugherty, Sean and Brinkac, Lauren and Beanan, Maureen J. and Haft, Daniel H. and Nelson, William C. and Davidsen, Tanja and Zafar, Nikhat and Zhou, Liwei and Liu, Jia and Yuan, Qiaoping and Khouri, Hoda and Fedorova, Nadia and Tran, Bao and Russell, Daniel and Berry, Kristi and Utterback, Teresa and Aken, Susan E. van and Feldblyum, Tamara V. and D{\textquoteright}Ascenzo, Mark and Deng, Wen-Ling and Ramos, Adela R. and Alfano, James R. and Cartinhour, Samuel and Chatterjee, Arun K. and Delaney, Terrence P. and Lazarowitz, Sondra G. and Martin, Gregory B. and Schneider, David J. and Tang, Xiaoyan and Bender, Carol L. and White, Owen and Fraser, Claire M. and Collmer, Alan} } @article {38214, title = {The dog genome: survey sequencing and comparative analysis}, journal = {ScienceScience}, volume = {301}, year = {2003}, publisher = {American Association for the Advancement of Science}, author = {Kirkness, E. F. and Bafna, V. and Halpern, A. L. and Levy, S. and Remington, K. and Rusch, D. B. and Delcher, A. L. and M. Pop and Wang, W. and Fraser, C. M. and others,} } @proceedings {38218, title = {Dynamic querying for pattern identification in microarray and genomic data}, volume = {3}, year = {2003}, month = {2003}, publisher = {IEEE}, type = {10.1109/ICME.2003.1221346}, abstract = {Data sets involving linear ordered sequences are a recurring theme in bioinformatics. Dynamic query tools that support exploration of these data sets can be useful for identifying patterns of interest. This paper describes the use of one such tool - timesearcher - to interactively explore linear sequence data sets taken from two bioinformatics problems. Microarray time course data sets involve expression levels for large numbers of genes over multiple time points. Timesearcher can be used to interactively search these data sets for genes with expression profiles of interest. The occurrence frequencies of short sequences of DNA in aligned exons can be used to identify sequences that play a role in the pre-mRNA splicing. Timesearcher can be used to search these data sets for candidate splicing signals.}, keywords = {Bioinformatics, data sets, Displays, dynamic querying, expression profiles, Frequency, Gene expression, genes, Genetics, genomic data, Genomics, linear ordered sequences, macromolecules, medical signal processing, Mice, Microarray, pattern identification, pattern recognition, premRNA splicing, Query processing, sequences, Signal processing, splicing, TimeSearcher}, isbn = {0-7803-7965-9}, author = {Hochheiser, H. and Baehrecke, E. H. and Stephen M. Mount and Shneiderman, Ben} } @conference {49568, title = {Efficient particle filter-based tracking of multiple interacting targets using an mrf-based motion model}, booktitle = {2003 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2003) (Cat. No.03CH37453)Proceedings 2003 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS 2003) (Cat. No.03CH37453)}, year = {2003}, publisher = {IEEE}, organization = {IEEE}, address = {Las Vegas, Nevada, USA}, doi = {10.1109/IROS.2003.1250637}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1250637}, author = {Khan, Z. and Balch, T. and Dellaert, F.} } @article {38291, title = {Genome of Geobacter sulfurreducens: metal reduction in subsurface environments}, journal = {Science (New York, N.Y.)Science (New York, N.Y.)}, volume = {302}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/14671304?dopt=Abstract}, type = {10.1126/science.1088727}, abstract = {The complete genome sequence of Geobacter sulfurreducens, a delta-proteobacterium, reveals unsuspected capabilities, including evidence of aerobic metabolism, one-carbon and complex carbon metabolism, motility, and chemotactic behavior. These characteristics, coupled with the possession of many two-component sensors and many c-type cytochromes, reveal an ability to create alternative, redundant, electron transport networks and offer insights into the process of metal ion reduction in subsurface environments. As well as playing roles in the global cycling of metals and carbon, this organism clearly has the potential for use in bioremediation of radioactive metals and in the generation of electricity.}, keywords = {Acetates, Acetyl Coenzyme A, Aerobiosis, Anaerobiosis, Bacterial Proteins, Carbon, Chemotaxis, Chromosomes, Bacterial, Cytochromes c, Electron Transport, Energy Metabolism, Genes, Bacterial, Genes, Regulator, Genome, Bacterial, Geobacter, Hydrogen, Metals, Movement, Open Reading Frames, Oxidation-Reduction, Phylogeny}, author = {Meth{\'e}, B. A. and Nelson, K. E. and Eisen, J. A. and Paulsen, I. T. and Nelson, W. and Heidelberg, J. F. and Wu, D. and Wu, M. and Ward, N. and Beanan, M. J. and Dodson, R. J. and Madupu, R. and Brinkac, L. M. and Daugherty, S. C. and DeBoy, R. T. and Durkin, A. S. and Gwinn, M. and Kolonay, J. F. and Sullivan, S. A. and Haft, D. H. and J. Selengut and Davidsen, T. M. and Zafar, N. and White, O. and Tran, B. and Romero, C. and Forberger, H. A. and Weidman, J. and Khouri, H. and Feldblyum, T. V. and Utterback, T. R. and Van Aken, S. E. and Lovley, D. R. and Fraser, C. M.} } @article {38300, title = {The genome sequence of Bacillus anthracis Ames and comparison to closely related bacteria}, journal = {NatureNature}, volume = {423}, year = {2003}, note = {[eacute]
[Oslash]}, type = {10.1038/nature01586}, abstract = {Bacillus anthracis is an endospore-forming bacterium that causes inhalational anthrax1. Key virulence genes are found on plasmids (extra-chromosomal, circular, double-stranded DNA molecules) pXO1 (ref. 2) and pXO2 (ref. 3). To identify additional genes that might contribute to virulence, we analysed the complete sequence of the chromosome of B. anthracis Ames (about 5.23 megabases). We found several chromosomally encoded proteins that may contribute to pathogenicity{\textemdash}including haemolysins, phospholipases and iron acquisition functions{\textemdash}and identified numerous surface proteins that might be important targets for vaccines and drugs. Almost all these putative chromosomal virulence and surface proteins have homologues in Bacillus cereus, highlighting the similarity of B. anthracis to near-neighbours that are not associated with anthrax4. By performing a comparative genome hybridization of 19 B. cereus and Bacillus thuringiensis strains against a B. anthracis DNA microarray, we confirmed the general similarity of chromosomal genes among this group of close relatives. However, we found that the gene sequences of pXO1 and pXO2 were more variable between strains, suggesting plasmid mobility in the group. The complete sequence of B. anthracis is a step towards a better understanding of anthrax pathogenesis.}, isbn = {0028-0836}, author = {Read, Timothy D. and Peterson, Scott N. and Tourasse, Nicolas and Baillie, Les W. and Paulsen, Ian T. and Nelson, Karen E. and Tettelin, Herv and Fouts, Derrick E. and Eisen, Jonathan A. and Gill, Steven R. and Holtzapple, Erik K. and kstad, Ole Andreas and Helgason, Erlendur and Rilstone, Jennifer and Wu, Martin and Kolonay, James F. and Beanan, Maureen J. and Dodson, Robert J. and Brinkac, Lauren M. and Gwinn, Michelle and DeBoy, Robert T. and Madpu, Ramana and Daugherty, Sean C. and Durkin, A. Scott and Haft, Daniel H. and Nelson, William C. and Peterson, Jeremy D. and M. Pop and Khouri, Hoda M. and Radune, Diana and Benton, Jonathan L. and Mahamoud, Yasmin and Jiang, Lingxia and Hance, Ioana R. and Weidman, Janice F. and Berry, Kristi J. and Plaut, Roger D. and Wolf, Alex M. and Watkins, Kisha L. and Nierman, William C. and Hazen, Alyson and Cline, Robin and Redmond, Caroline and Thwaite, Joanne E. and White, Owen and Salzberg, Steven L. and Thomason, Brendan and Friedlander, Arthur M. and Koehler, Theresa M. and Hanna, Philip C. and Kolst, and Anne-Brit and Fraser, Claire M.} } @article {38489, title = {The sequence and analysis of Trypanosoma brucei chromosome II}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {31}, year = {2003}, author = {Najib M. El-Sayed and Ghedin, E. and Song, J. and MacLeod, A. and Bringaud, F. and Larkin, C. and Wanless, D. and Peterson, J. and Hou, L. and Taylor, S. and others,} } @article {49633, title = {The sequence and analysis of Trypanosoma brucei chromosome II.}, journal = {Nucleic Acids Res}, volume = {31}, year = {2003}, month = {2003 Aug 15}, pages = {4856-63}, abstract = {

We report here the sequence of chromosome II from Trypanosoma brucei, the causative agent of African sleeping sickness. The 1.2-Mb pairs encode about 470 predicted genes organised in 17 directional clusters on either strand, the largest cluster of which has 92 genes lined up over a 284-kb region. An analysis of the GC skew reveals strand compositional asymmetries that coincide with the distribution of protein-coding genes, suggesting these asymmetries may be the result of transcription-coupled repair on coding versus non-coding strand. A 5-cM genetic map of the chromosome reveals recombinational {\textquoteright}hot{\textquoteright} and {\textquoteright}cold{\textquoteright} regions, the latter of which is predicted to include the putative centromere. One end of the chromosome consists of a 250-kb region almost exclusively composed of RHS (pseudo)genes that belong to a newly characterised multigene family containing a hot spot of insertion for retroelements. Interspersed with the RHS genes are a few copies of truncated RNA polymerase pseudogenes as well as expression site associated (pseudo)genes (ESAGs) 3 and 4, and 76 bp repeats. These features are reminiscent of a vestigial variant surface glycoprotein (VSG) gene expression site. The other end of the chromosome contains a 30-kb array of VSG genes, the majority of which are pseudogenes, suggesting that this region may be a site for modular de novo construction of VSG gene diversity during transposition/gene conversion events.

}, keywords = {Animals, Antigens, Protozoan, Chromosome mapping, Chromosomes, DNA, Protozoan, Gene Duplication, Genes, Protozoan, Molecular Sequence Data, Pseudogenes, Recombination, Genetic, Sequence Analysis, DNA, Trypanosoma brucei brucei}, issn = {1362-4962}, author = {el-Sayed, Najib M A and Ghedin, Elodie and Song, Jinming and MacLeod, Annette and Bringaud, Frederic and Larkin, Christopher and Wanless, David and Peterson, Jeremy and Hou, Lihua and Taylor, Sonya and Tweedie, Alison and Biteau, Nicolas and Khalak, Hanif G and Lin, Xiaoying and Mason, Tanya and Hannick, Linda and Caler, Elisabet and Blandin, Ga{\"e}lle and Bartholomeu, Daniella and Simpson, Anjana J and Kaul, Samir and Zhao, Hong and Pai, Grace and Van Aken, Susan and Utterback, Teresa and Haas, Brian and Koo, Hean L and Umayam, Lowell and Suh, Bernard and Gerrard, Caroline and Leech, Vanessa and Qi, Rong and Zhou, Shiguo and Schwartz, David and Feldblyum, Tamara and Salzberg, Steven and Tait, Andrew and Turner, C Michael R and Ullu, Elisabetta and White, Owen and Melville, Sara and Adams, Mark D and Fraser, Claire M and Donelson, John E} } @article {38097, title = {1.375-approximation algorithm for sorting by reversals}, journal = {Algorithms{\textemdash}ESA 2002Algorithms{\textemdash}ESA 2002}, year = {2002}, publisher = {Springer}, author = {Berman, P. and Sridhar Hannenhalli and Karpinski, M.} } @inbook {38153, title = {Combinatorial Algorithms for Design of DNA Arrays}, booktitle = {Chip TechnologyChip Technology}, series = {Advances in Biochemical Engineering/Biotechnology}, volume = {77}, year = {2002}, publisher = {Springer Berlin / Heidelberg}, organization = {Springer Berlin / Heidelberg}, abstract = {Optimal design of DNA arrays requires the development of algorithms with two-fold goals: reducing the effects caused by unintended illumination ( border length minimization problem ) and reducing the complexity of masks ( mask decomposition problem ). We describe algorithms that reduce the number of rectangles in mask decomposition by 20{\textendash}30\% as compared to a standard array design under the assumption that the arrangement of oligonucleotides on the array is fixed. This algorithm produces provably optimal solution for all studied real instances of array design. We also address the difficult problem of finding an arrangement which minimizes the border length and come up with a new idea of threading that significantly reduces the border length as compared to standard designs.}, isbn = {978-3-540-43215-9}, author = {Sridhar Hannenhalli and Hubbell, Earl and Lipshutz, Robert and Pevzner, Pavel}, editor = {Hoheisel, J{\"o}rg and Brazma, A. and B{\"u}ssow, K. and Cantor, C. and Christians, F. and Chui, G. and Diaz, R. and Drmanac, R. and Drmanac, S. and Eickhoff, H. and Fellenberg, K. and Sridhar Hannenhalli and Hoheisel, J. and Hou, A. and Hubbell, E. and Jin, H. and Jin, P. and Jurinke, C. and Konthur, Z. and K{\"o}ster, H. and Kwon, S. and Lacy, S. and Lehrach, H. and Lipshutz, R. and Little, D. and Lueking, A. and McGall, G. and Moeur, B. and Nordhoff, E. and Nyarsik, L. and Pevzner, P. and Robinson, A. and Sarkans, U. and Shafto, J. and Sohail, M. and Southern, E. and Swanson, D. and Ukrainczyk, T. and van den Boom, D. and Vilo, J. and Vingron, M. and Walter, G. and Xu, C.} } @article {38157, title = {Comparative Genome Sequencing for Discovery of Novel Polymorphisms in Bacillus Anthracis}, journal = {ScienceScienceScienceScience}, volume = {296}, year = {2002}, type = {10.1126/science.1071837}, abstract = {Comparison of the whole-genome sequence ofBacillus anthracis isolated from a victim of a recent bioterrorist anthrax attack with a reference reveals 60 new markers that include single nucleotide polymorphisms (SNPs), inserted or deleted sequences, and tandem repeats. Genome comparison detected four high-quality SNPs between the two sequenced B. anthracischromosomes and seven differences among different preparations of the reference genome. These markers have been tested on a collection of anthrax isolates and were found to divide these samples into distinct families. These results demonstrate that genome-based analysis of microbial pathogens will provide a powerful new tool for investigation of infectious disease outbreaks.}, isbn = {0036-8075, 1095-9203}, author = {Read, Timothy D. and Salzberg, Steven L. and M. Pop and Shumway, Martin and Umayam, Lowell and Jiang, Lingxia and Holtzapple, Erik and Busch, Joseph D. and Smith, Kimothy L. and Schupp, James M. and Solomon, Daniel and Keim, Paul and Fraser, Claire M.} } @article {49687, title = {The draft genome of Ciona intestinalis: insights into chordate and vertebrate origins.}, journal = {Science}, volume = {298}, year = {2002}, month = {2002 Dec 13}, pages = {2157-67}, abstract = {

The first chordates appear in the fossil record at the time of the Cambrian explosion, nearly 550 million years ago. The modern ascidian tadpole represents a plausible approximation to these ancestral chordates. To illuminate the origins of chordate and vertebrates, we generated a draft of the protein-coding portion of the genome of the most studied ascidian, Ciona intestinalis. The Ciona genome contains approximately 16,000 protein-coding genes, similar to the number in other invertebrates, but only half that found in vertebrates. Vertebrate gene families are typically found in simplified form in Ciona, suggesting that ascidians contain the basic ancestral complement of genes involved in cell signaling and development. The ascidian genome has also acquired a number of lineage-specific innovations, including a group of genes engaged in cellulose metabolism that are related to those in bacteria and fungi.

}, keywords = {Alleles, Animals, Apoptosis, Base Sequence, Cellulose, Central Nervous System, Ciona intestinalis, Computational Biology, Endocrine System, Gene Dosage, Gene Duplication, genes, Genes, Homeobox, Genome, Heart, Immunity, Molecular Sequence Data, Multigene Family, Muscle Proteins, Organizers, Embryonic, Phylogeny, Polymorphism, Genetic, Proteins, Sequence Analysis, DNA, Sequence Homology, Nucleic Acid, Species Specificity, Thyroid Gland, Urochordata, Vertebrates}, issn = {1095-9203}, doi = {10.1126/science.1080049}, author = {Dehal, Paramvir and Satou, Yutaka and Campbell, Robert K and Chapman, Jarrod and Degnan, Bernard and De Tomaso, Anthony and Davidson, Brad and Di Gregorio, Anna and Gelpke, Maarten and Goodstein, David M and Harafuji, Naoe and Hastings, Kenneth E M and Ho, Isaac and Hotta, Kohji and Huang, Wayne and Kawashima, Takeshi and Lemaire, Patrick and Martinez, Diego and Meinertzhagen, Ian A and Necula, Simona and Nonaka, Masaru and Putnam, Nik and Rash, Sam and Saiga, Hidetoshi and Satake, Masanobu and Terry, Astrid and Yamada, Lixy and Wang, Hong-Gang and Awazu, Satoko and Azumi, Kaoru and Boore, Jeffrey and Branno, Margherita and Chin-Bow, Stephen and DeSantis, Rosaria and Doyle, Sharon and Francino, Pilar and Keys, David N and Haga, Shinobu and Hayashi, Hiroko and Hino, Kyosuke and Imai, Kaoru S and Inaba, Kazuo and Kano, Shungo and Kobayashi, Kenji and Kobayashi, Mari and Lee, Byung-In and Makabe, Kazuhiro W and Manohar, Chitra and Matassi, Giorgio and Medina, Monica and Mochizuki, Yasuaki and Mount, Steve and Morishita, Tomomi and Miura, Sachiko and Nakayama, Akie and Nishizaka, Satoko and Nomoto, Hisayo and Ohta, Fumiko and Oishi, Kazuko and Rigoutsos, Isidore and Sano, Masako and Sasaki, Akane and Sasakura, Yasunori and Shoguchi, Eiichi and Shin-i, Tadasu and Spagnuolo, Antoinetta and Stainier, Didier and Suzuki, Miho M and Tassy, Olivier and Takatori, Naohito and Tokuoka, Miki and Yagi, Kasumi and Yoshizaki, Fumiko and Wada, Shuichi and Zhang, Cindy and Hyatt, P Douglas and Larimer, Frank and Detter, Chris and Doggett, Norman and Glavina, Tijana and Hawkins, Trevor and Richardson, Paul and Lucas, Susan and Kohara, Yuji and Levine, Michael and Satoh, Nori and Rokhsar, Daniel S} } @article {38295, title = {Genome sequence and comparative analysis of the model rodent malaria parasite Plasmodium yoelii yoelii}, journal = {NatureNature}, volume = {419}, year = {2002}, type = {10.1038/nature01099}, abstract = {Species of malaria parasite that infect rodents have long been used as models for malaria disease research. Here we report the whole-genome shotgun sequence of one species, Plasmodium yoelii yoelii, and comparative studies with the genome of the human malaria parasite Plasmodium falciparum clone 3D7. A synteny map of 2,212 P. y. yoelii contiguous DNA sequences (contigs) aligned to 14 P. falciparum chromosomes reveals marked conservation of gene synteny within the body of each chromosome. Of about 5,300 P. falciparum genes, more than 3,300 P. y. yoelii orthologues of predominantly metabolic function were identified. Over 800 copies of a variant antigen gene located in subtelomeric regions were found. This is the first genome sequence of a model eukaryotic parasite, and it provides insight into the use of such systems in the modelling of Plasmodium biology and disease.}, isbn = {0028-0836}, author = {Carlton, Jane M. and Angiuoli, Samuel V. and Suh, Bernard B. and Kooij, Taco W. and Pertea, Mihaela and Silva, Joana C. and Ermolaeva, Maria D. and Allen, Jonathan E. and J. Selengut and Koo, Hean L. and Peterson, Jeremy D. and M. Pop and Kosack, Daniel S. and Shumway, Martin F. and Bidwell, Shelby L. and Shallom, Shamira J. and Aken, Susan E. van and Riedmuller, Steven B. and Feldblyum, Tamara V. and Cho, Jennifer K. and Quackenbush, John and Sedegah, Martha and Shoaibi, Azadeh and Cummings, Leda M. and Florens, Laurence and Yates, John R. and Raine, J. Dale and Sinden, Robert E. and Harris, Michael A. and Cunningham, Deirdre A. and Preiser, Peter R. and Bergman, Lawrence W. and Vaidya, Akhil B. and Lin, Leo H. van and Janse, Chris J. and Waters, Andrew P. and Smith, Hamilton O. and White, Owen R. and Salzberg, Steven L. and Venter, J. Craig and Fraser, Claire M. and Hoffman, Stephen L. and Gardner, Malcolm J. and Carucci, Daniel J.} } @article {38304, title = {Genome sequence of the human malaria parasite Plasmodium falciparum}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368864?dopt=Abstract}, type = {10.1038/nature01097}, abstract = {The parasite Plasmodium falciparum is responsible for hundreds of millions of cases of malaria, and kills more than one million African children annually. Here we report an analysis of the genome sequence of P. falciparum clone 3D7. The 23-megabase nuclear genome consists of 14 chromosomes, encodes about 5,300 genes, and is the most (A + T)-rich genome sequenced to date. Genes involved in antigenic variation are concentrated in the subtelomeric regions of the chromosomes. Compared to the genomes of free-living eukaryotic microbes, the genome of this intracellular parasite encodes fewer enzymes and transporters, but a large proportion of genes are devoted to immune evasion and host-parasite interactions. Many nuclear-encoded proteins are targeted to the apicoplast, an organelle involved in fatty-acid and isoprenoid metabolism. The genome sequence provides the foundation for future studies of this organism, and is being exploited in the search for new drugs and vaccines to fight malaria.}, keywords = {Animals, Chromosome Structures, DNA Repair, DNA Replication, DNA, Protozoan, Evolution, Molecular, Genome, Protozoan, HUMANS, Malaria Vaccines, Malaria, Falciparum, Membrane Transport Proteins, Molecular Sequence Data, Plasmodium falciparum, Plastids, Proteome, Protozoan Proteins, Recombination, Genetic, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Hall, Neil and Fung, Eula and White, Owen and Berriman, Matthew and Hyman, Richard W. and Carlton, Jane M. and Pain, Arnab and Nelson, Karen E. and Bowman, Sharen and Paulsen, Ian T. and James, Keith and Eisen, Jonathan A. and Rutherford, Kim and Salzberg, Steven L. and Craig, Alister and Kyes, Sue and Chan, Man-Suen and Nene, Vishvanath and Shallom, Shamira J. and Suh, Bernard and Peterson, Jeremy and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and Haft, Daniel and Mather, Michael W. and Vaidya, Akhil B. and Martin, David M. A. and Fairlamb, Alan H. and Fraunholz, Martin J. and Roos, David S. and Ralph, Stuart A. and McFadden, Geoffrey I. and Cummings, Leda M. and Subramanian, G. Mani and Mungall, Chris and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Newbold, Chris and Davis, Ronald W. and Fraser, Claire M. and Barrell, Bart} } @article {38334, title = {Identification of non-autonomous non-LTR retrotransposons in the genome of Trypanosoma cruzi}, journal = {Molecular and Biochemical ParasitologyMolecular and Biochemical Parasitology}, volume = {124}, year = {2002}, type = {16/S0166-6851(02)00167-6}, abstract = {As observed for most eukaryotic cells, trypanosomatids contains non-LTR retrotransposons randomly inserted in the nuclear genome. Autonomous retroelements which, code for their own transposition, have been characterized in Trypanosoma brucei (ingi) and Trypanosoma cruzi (L1Tc), whereas non-autonomous retroelements have only been characterized in T. brucei (RIME). Here, we have characterized in the genome of Trypanosoma cruzi four complete copies of a non-autonomous non-LTR retrotransposon, called NARTc. This 0.26 kb NARTc element has the characteristics of non-LTR retrotransposons: the presence a poly(dA) tail and of a short flanking duplicated motif. Analysis of the Genome Survey Sequence databases indicated that the Trypanosoma cruzi haploid genome contains about 140 NARTc copies and about twice as many L1Tc copies. Interestingly, the NARTc and L1Tc retroelements share, with the Trypanosoma brucei ingi and RIME retrotransposons, a common sequence (the first 45 bp with 91\% identity), whereas the remaining sequences are very divergent. This suggests that these four trypanosome non-LTR retrotransposons were derived from the same common ancester and the sequence of their 5{\textquoteright}-extremity may have a functional role. In addition, the genome of Leishmania major contains the same conserved motif present in the trypanosome retroelements, whicle no transposable elements have been detected so far in Leishmania sp.}, keywords = {Ingi, L1Tc, Non-LTR retrotransposon, RIME, Trypanosoma brucei, Trypanosoma cruzi}, isbn = {0166-6851}, author = {Bringaud, Frederic and Garc{\'\i}a-P{\'e}rez, Jos{\'e} Luis and Heras, Sara R. and Ghedin, Elodie and Najib M. El-Sayed and Andersson, Bj{\"o}rn and Baltz, Th{\'e}o and Lopez, Manuel C.} } @article {49630, title = {Identification of non-autonomous non-LTR retrotransposons in the genome of Trypanosoma cruzi.}, journal = {Mol Biochem Parasitol}, volume = {124}, year = {2002}, month = {2002 Sep-Oct}, pages = {73-8}, abstract = {

As observed for most eukaryotic cells, trypanosomatids contains non-LTR retrotransposons randomly inserted in the nuclear genome. Autonomous retroelements which, code for their own transposition, have been characterized in Trypanosoma brucei (ingi) and Trypanosoma cruzi (L1Tc), whereas non-autonomous retroelements have only been characterized in T. brucei (RIME). Here, we have characterized in the genome of Trypanosoma cruzi four complete copies of a non-autonomous non-LTR retrotransposon, called NARTc. This 0.26 kb NARTc element has the characteristics of non-LTR retrotransposons: the presence a poly(dA) tail and of a short flanking duplicated motif. Analysis of the Genome Survey Sequence databases indicated that the Trypanosoma cruzi haploid genome contains about 140 NARTc copies and about twice as many L1Tc copies. Interestingly, the NARTc and L1Tc retroelements share, with the Trypanosoma brucei ingi and RIME retrotransposons, a common sequence (the first 45 bp with 91\% identity), whereas the remaining sequences are very divergent. This suggests that these four trypanosome non-LTR retrotransposons were derived from the same common ancester and the sequence of their 5{\textquoteright}-extremity may have a functional role. In addition, the genome of Leishmania major contains the same conserved motif present in the trypanosome retroelements, whicle no transposable elements have been detected so far in Leishmania sp.

}, keywords = {Animals, Base Sequence, Computational Biology, Genome, Protozoan, Long Interspersed Nucleotide Elements, Molecular Sequence Data, Retroelements, Short Interspersed Nucleotide Elements, Trypanosoma cruzi}, issn = {0166-6851}, author = {Bringaud, Frederic and Garc{\'\i}a-P{\'e}rez, Jos{\'e} Luis and Heras, Sara R and Ghedin, Elodie and El-Sayed, Najib M and Andersson, Bj{\"o}rn and Baltz, Th{\'e}o and Lopez, Manuel C} } @article {38406, title = {A new, expressed multigene family containing a hot spot for insertion of retroelements is associated with polymorphic subtelomeric regions of Trypanosoma brucei}, journal = {Eukaryotic cellEukaryotic Cell}, volume = {1}, year = {2002}, author = {Bringaud, F. and Biteau, N. and Melville, S. E. and Hez, S. and Najib M. El-Sayed and Leech, V. and Berriman, M. and Hall, N. and Donelson, J. E. and Baltz, T.} } @article {49631, title = {A new, expressed multigene family containing a hot spot for insertion of retroelements is associated with polymorphic subtelomeric regions of Trypanosoma brucei.}, journal = {Eukaryot Cell}, volume = {1}, year = {2002}, month = {2002 Feb}, pages = {137-51}, abstract = {

We describe a novel gene family that forms clusters in subtelomeric regions of Trypanosoma brucei chromosomes and partially accounts for the observed clustering of retrotransposons. The ingi and ribosomal inserted mobile element (RIME) non-LTR retrotransposons share 250 bp at both extremities and are the most abundant putatively mobile elements, with about 500 copies per haploid genome. From cDNA clones and subsequently in the T. brucei genomic DNA databases, we identified 52 homologous gene and pseudogene sequences, 16 of which contain a RIME and/or ingi retrotransposon inserted at exactly the same relative position. Here these genes are called the RHS family, for retrotransposon hot spot. Comparison of the protein sequences encoded by RHS genes (21 copies) and pseudogenes (24 copies) revealed a conserved central region containing an ATP/GTP-binding motif and the RIME/ingi insertion site. The RHS proteins share between 13 and 96\% identity, and six subfamilies, RHS1 to RHS6, can be defined on the basis of their divergent C-terminal domains. Immunofluorescence and Western blot analyses using RHS subfamily-specific immune sera show that RHS proteins are constitutively expressed and occur mainly in the nucleus. Analysis of Genome Survey Sequence databases indicated that the Trypanosoma brucei diploid genome contains about 280 RHS (pseudo)genes. Among the 52 identified RHS (pseudo)genes, 48 copies are in three RHS clusters located in subtelomeric regions of chromosomes Ia and II and adjacent to the active bloodstream form expression site in T. brucei strain TREU927/4 GUTat10.1. RHS genes comprise the remaining sequence of the size-polymorphic "repetitive region" described for T. brucei chromosome I, and a homologous gene family is present in the Trypanosoma cruzi genome.

}, keywords = {Amino Acid Sequence, Animals, Base Sequence, Cloning, Molecular, DNA Primers, DNA, Protozoan, Escherichia coli, Genes, Protozoan, Molecular Sequence Data, Multigene Family, Mutagenesis, Insertional, Phylogeny, Polymorphism, Genetic, Protozoan Proteins, Pseudogenes, Retroelements, sequence alignment, Sequence Homology, Amino Acid, Telomere, Trypanosoma brucei brucei, Trypanosoma cruzi}, issn = {1535-9778}, author = {Bringaud, Frederic and Biteau, Nicolas and Melville, Sara E and Hez, St{\'e}phanie and El-Sayed, Najib M and Leech, Vanessa and Berriman, Matthew and Hall, Neil and Donelson, John E and Baltz, Th{\'e}o} } @article {38492, title = {Sequence of Plasmodium falciparum chromosomes 2, 10, 11 and 14}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368868?dopt=Abstract}, type = {10.1038/nature01094}, abstract = {The mosquito-borne malaria parasite Plasmodium falciparum kills an estimated 0.7-2.7 million people every year, primarily children in sub-Saharan Africa. Without effective interventions, a variety of factors-including the spread of parasites resistant to antimalarial drugs and the increasing insecticide resistance of mosquitoes-may cause the number of malaria cases to double over the next two decades. To stimulate basic research and facilitate the development of new drugs and vaccines, the genome of Plasmodium falciparum clone 3D7 has been sequenced using a chromosome-by-chromosome shotgun strategy. We report here the nucleotide sequences of chromosomes 10, 11 and 14, and a re-analysis of the chromosome 2 sequence. These chromosomes represent about 35\% of the 23-megabase P. falciparum genome.}, keywords = {Animals, Chromosomes, DNA, Protozoan, Genome, Protozoan, Plasmodium falciparum, Proteome, Protozoan Proteins, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Shallom, Shamira J. and Carlton, Jane M. and Salzberg, Steven L. and Nene, Vishvanath and Shoaibi, Azadeh and Ciecko, Anne and Lynn, Jeffery and Rizzo, Michael and Weaver, Bruce and Jarrahi, Behnam and Brenner, Michael and Parvizi, Babak and Tallon, Luke and Moazzez, Azita and Granger, David and Fujii, Claire and Hansen, Cheryl and Pederson, James and Feldblyum, Tamara and Peterson, Jeremy and Suh, Bernard and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and White, Owen and Cummings, Leda M. and Smith, Hamilton O. and Adams, Mark D. and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Fraser, Claire M.} } @article {38551, title = {Trypanosoma cruzi: RNA structure and post-transcriptional control of tubulin gene expression}, journal = {Experimental ParasitologyExperimental Parasitology}, volume = {102}, year = {2002}, type = {16/S0014-4894(03)00034-1}, abstract = {Changes in tubulin expression are among the biochemical and morphological adaptations that occur during the life cycle of Trypanosomatids. To investigate the mechanism responsible for the differential accumulation of tubulin mRNAs in Trypanosoma cruzi, we determine the sequences of [alpha]- and [beta]-tubulin transcripts and analyzed their expression during the life cycle of the parasite. Two [beta]-tubulin mRNAs of 1.9 and 2.3~kb were found to differ mainly by an additional 369 nucleotides at the end of the 3{\textquoteright} untranslated region (UTR). Although their transcription rates are similar in epimastigotes and amastigotes, [alpha]- and [beta]-tubulin transcripts are 3- to 6-fold more abundant in epimastigotes than in trypomastigotes and amastigotes. Accordingly, the half-lives of [alpha]- and [beta]-tubulin mRNAs are significantly higher in epimastigotes than in amastigotes. Transient transfection experiments indicated that positive regulatory elements occur in the 3{\textquoteright} UTR plus downstream intergenic region of the [alpha]-tubulin gene and that both positive and negative elements occur in the equivalent regions of the [beta]-tubulin gene.Index Descriptions and Abbreviations: Kinetoplastida; Trypanosoma cruzi; tubulin; gene regulation; PCR, polymerase chain reaction; UTR, untranslated region; IR, intergenic region; SL, spliced leader; BAC, bacterial artificial chromosome.}, isbn = {0014-4894}, author = {Bartholomeu, Daniella C. and Silva, Rosiane A. and Galv{\~a}o, Lucia M. C. and Najib M. El-Sayed and Donelson, John E. and Teixeira, Santuza M. R.} } @article {49632, title = {Trypanosoma cruzi: RNA structure and post-transcriptional control of tubulin gene expression.}, journal = {Exp Parasitol}, volume = {102}, year = {2002}, month = {2002 Nov-Dec}, pages = {123-33}, abstract = {

Changes in tubulin expression are among the biochemical and morphological adaptations that occur during the life cycle of Trypanosomatids. To investigate the mechanism responsible for the differential accumulation of tubulin mRNAs in Trypanosoma cruzi, we determine the sequences of alpha- and beta-tubulin transcripts and analyzed their expression during the life cycle of the parasite. Two beta-tubulin mRNAs of 1.9 and 2.3 kb were found to differ mainly by an additional 369 nucleotides at the end of the 3{\textquoteright} untranslated region (UTR). Although their transcription rates are similar in epimastigotes and amastigotes, alpha- and beta-tubulin transcripts are 3- to 6-fold more abundant in epimastigotes than in trypomastigotes and amastigotes. Accordingly, the half-lives of alpha- and beta-tubulin mRNAs are significantly higher in epimastigotes than in amastigotes. Transient transfection experiments indicated that positive regulatory elements occur in the 3{\textquoteright} UTR plus downstream intergenic region of the alpha-tubulin gene and that both positive and negative elements occur in the equivalent regions of the beta-tubulin gene.

}, keywords = {Animals, Base Sequence, Blotting, Northern, DNA, Complementary, DNA, Protozoan, Gene Expression Regulation, Half-Life, Life Cycle Stages, Molecular Sequence Data, RNA Processing, Post-Transcriptional, RNA, Messenger, RNA, Protozoan, Transcription, Genetic, Transfection, Trypanosoma cruzi, Tubulin}, issn = {0014-4894}, author = {Bartholomeu, Daniella C and Silva, Rosiane A and Galv{\~a}o, Lucia M C and el-Sayed, Najib M A and Donelson, John E and Teixeira, Santuza M R} } @conference {49569, title = {Automatically tracking and analyzing the behavior of live insect colonies}, booktitle = {the fifth international conferenceProceedings of the fifth international conference on Autonomous agents - AGENTS {\textquoteright}01}, year = {2001}, publisher = {ACM Press}, organization = {ACM Press}, address = {Montreal, Quebec, CanadaNew York, New York, USA}, isbn = {158113326X}, doi = {10.1145/37573510.1145/375735.376434}, url = {http://portal.acm.org/citation.cfm?doid=375735http://portal.acm.org/citation.cfm?doid=375735.376434}, author = {Balch, Tucker and Khan, Zia and Veloso, Manuela} } @proceedings {38226, title = {Efficient perspective-accurate silhouette computation and applications}, year = {2001}, month = {2001}, publisher = {ACM}, type = {10.1145/378583.378618}, address = {New York, NY, USA}, abstract = {Silhouettes are perceptually and geometrically salient features of geo metric models. Hence a number of graphics and visualization applications need to find them to aid further processing. The efficient computation of silhouettes, especially in the context of perspective projection, is known to be difficult. This paper presents a novel efficient and practical algorithm to compute silhouettes from a sequence of viewpoints under perspective projection. Parallel projection is a special case of this algorithm. Our approach is based on a point-plane duality in three dimensions, which allows an efficient computation of the \emph{changes} in the silhouette of a polygonal model between consecutive frames. In addition, we present several applications of our technique to problems from computer graphics and medical visualization. We also provide experimental data that show the efficiency of our approach. million vertices on an SGI Onyx workstation.}, keywords = {rendering, silhouette, simplification}, isbn = {1-58113-357-X}, author = {M. Pop and Duncan, Christian and Barequet, Gill and Goodrich, Michael and Huang, Wenjing and Kumar, Subodh} } @article {49692, title = {The genome sequence of Drosophila melanogaster.}, journal = {Science}, volume = {287}, year = {2000}, month = {2000 Mar 24}, pages = {2185-95}, abstract = {

The fly Drosophila melanogaster is one of the most intensively studied organisms in biology and serves as a model system for the investigation of many developmental and cellular processes common to higher eukaryotes, including humans. We have determined the nucleotide sequence of nearly all of the approximately 120-megabase euchromatic portion of the Drosophila genome using a whole-genome shotgun sequencing strategy supported by extensive clone-based sequence and a high-quality bacterial artificial chromosome physical map. Efforts are under way to close the remaining gaps; however, the sequence is of sufficient accuracy and contiguity to be declared substantially complete and to support an initial analysis of genome structure and preliminary gene annotation and interpretation. The genome encodes approximately 13,600 genes, somewhat fewer than the smaller Caenorhabditis elegans genome, but with comparable functional diversity.

}, keywords = {Animals, Biological Transport, Chromatin, Cloning, Molecular, Computational Biology, Contig Mapping, Cytochrome P-450 Enzyme System, DNA Repair, DNA Replication, Drosophila melanogaster, Euchromatin, Gene Library, Genes, Insect, Genome, Heterochromatin, Insect Proteins, Nuclear Proteins, Protein Biosynthesis, Sequence Analysis, DNA, Transcription, Genetic}, issn = {0036-8075}, author = {Adams, M D and Celniker, S E and Holt, R A and Evans, C A and Gocayne, J D and Amanatides, P G and Scherer, S E and Li, P W and Hoskins, R A and Galle, R F and George, R A and Lewis, S E and Richards, S and Ashburner, M and Henderson, S N and Sutton, G G and Wortman, J R and Yandell, M D and Zhang, Q and Chen, L X and Brandon, R C and Rogers, Y H and Blazej, R G and Champe, M and Pfeiffer, B D and Wan, K H and Doyle, C and Baxter, E G and Helt, G and Nelson, C R and Gabor, G L and Abril, J F and Agbayani, A and An, H J and Andrews-Pfannkoch, C and Baldwin, D and Ballew, R M and Basu, A and Baxendale, J and Bayraktaroglu, L and Beasley, E M and Beeson, K Y and Benos, P V and Berman, B P and Bhandari, D and Bolshakov, S and Borkova, D and Botchan, M R and Bouck, J and Brokstein, P and Brottier, P and Burtis, K C and Busam, D A and Butler, H and Cadieu, E and Center, A and Chandra, I and Cherry, J M and Cawley, S and Dahlke, C and Davenport, L B and Davies, P and de Pablos, B and Delcher, A and Deng, Z and Mays, A D and Dew, I and Dietz, S M and Dodson, K and Doup, L E and Downes, M and Dugan-Rocha, S and Dunkov, B C and Dunn, P and Durbin, K J and Evangelista, C C and Ferraz, C and Ferriera, S and Fleischmann, W and Fosler, C and Gabrielian, A E and Garg, N S and Gelbart, W M and Glasser, K and Glodek, A and Gong, F and Gorrell, J H and Gu, Z and Guan, P and Harris, M and Harris, N L and Harvey, D and Heiman, T J and Hernandez, J R and Houck, J and Hostin, D and Houston, K A and Howland, T J and Wei, M H and Ibegwam, C and Jalali, M and Kalush, F and Karpen, G H and Ke, Z and Kennison, J A and Ketchum, K A and Kimmel, B E and Kodira, C D and Kraft, C and Kravitz, S and Kulp, D and Lai, Z and Lasko, P and Lei, Y and Levitsky, A A and Li, J and Li, Z and Liang, Y and Lin, X and Liu, X and Mattei, B and McIntosh, T C and McLeod, M P and McPherson, D and Merkulov, G and Milshina, N V and Mobarry, C and Morris, J and Moshrefi, A and Mount, S M and Moy, M and Murphy, B and Murphy, L and Muzny, D M and Nelson, D L and Nelson, D R and Nelson, K A and Nixon, K and Nusskern, D R and Pacleb, J M and Palazzolo, M and Pittman, G S and Pan, S and Pollard, J and Puri, V and Reese, M G and Reinert, K and Remington, K and Saunders, R D and Scheeler, F and Shen, H and Shue, B C and Sid{\'e}n-Kiamos, I and Simpson, M and Skupski, M P and Smith, T and Spier, E and Spradling, A C and Stapleton, M and Strong, R and Sun, E and Svirskas, R and Tector, C and Turner, R and Venter, E and Wang, A H and Wang, X and Wang, Z Y and Wassarman, D A and Weinstock, G M and Weissenbach, J and Williams, S M and Worley, K C and Wu, D and Yang, S and Yao, Q A and Ye, J and Yeh, R F and Zaveri, J S and Zhan, M and Zhang, G and Zhao, Q and Zheng, L and Zheng, X H and Zhong, F N and Zhong, W and Zhou, X and Zhu, S and Zhu, X and Smith, H O and Gibbs, R A and Myers, E W and Rubin, G M and Venter, J C} } @article {38360, title = {Ligand-Receptor Pairing Via Tree Comparison}, journal = {Journal of Computational BiologyJournal of Computational Biology}, volume = {7}, year = {2000}, type = {10.1089/10665270050081388}, abstract = {This paper introduces a novel class of tree comparison problems strongly motivated by an important and cost intensive step in drug discovery pipeline viz., mapping cell bound receptors to the ligands they bind to and vice versa. Tree comparison studies motivated by problems such as virus-host tree comparison, gene-species tree comparison and consensus tree problem have been reported. None of these studies are applicable in our context because in all these problems, there is a well-defined mapping of the nodes the trees are built on across the set of trees being compared. A new class of tree comparison problems arises in cases where finding the correspondence among the nodes of the trees being compared is itself the problem. The problem arises while trying to find the interclass correspondence between the members of a pair of coevolving classes, e.g., cell bound receptors and their ligands. Given the evolution of the two classes, the combinatorial problem is to find a mapping among the leaves of the two trees that optimizes a given cost function. In this work we formulate various combinatorial optimization problems motivated by the aforementioned biological problem for the first time. We present hardness results, give an efficient algorithm for a restriction of the problem and demonstrate its applicability.}, isbn = {1066-5277, 1557-8666}, author = {Bafna, Vineet and Sridhar Hannenhalli and Rice, Ken and Vawter, Lisa} } @article {49627, title = {Genetic nomenclature for Trypanosoma and Leishmania.}, journal = {Mol Biochem Parasitol}, volume = {97}, year = {1998}, month = {1998 Nov 30}, pages = {221-4}, keywords = {Animals, Leishmania, Terminology as Topic, Trypanosoma}, issn = {0166-6851}, author = {Clayton, C and Adams, M and Almeida, R and Baltz, T and Barrett, M and Bastien, P and Belli, S and Beverley, S and Biteau, N and Blackwell, J and Blaineau, C and Boshart, M and Bringaud, F and Cross, G and Cruz, A and Degrave, W and Donelson, J and El-Sayed, N and Fu, G and Ersfeld, K and Gibson, W and Gull, K and Ivens, A and Kelly, J and Vanhamme, L} } @article {38548, title = {Trends in the early careers of life scientists - Preface and executive summary}, journal = {Mol Biol CellMol Biol Cell}, volume = {9}, year = {1998}, author = {Tilghman, S. and Astin, H. S. and Brinkley, W. and Chilton, M. D. and Michael P. Cummings and Ehrenberg, R. G. and Fox, M. F. and Glenn, K. and Green, P. J. and Hans, S. and Kelman, A. and LaPidus, J. and Levin, B. and McIntosh, J. R. and Riecken, H. and Stephen, P. E.} } @article {38175, title = {Computer models: A new approach to the investigation of disease}, journal = {MD ComputingMD Computing}, volume = {14}, year = {1997}, author = {Reggia, James A. and Ruppin, E. and Berndt, R. S.} } @article {38361, title = {Local rules for protein folding on a triangular lattice and generalized hydrophobicity in the HP model}, journal = {Journal of Computational BiologyJournal of Computational Biology}, volume = {4}, year = {1997}, author = {Agarwala, R. and Batzoglou, S. and Dan{\v C}{\'I}K, V. and Decatur, S. E. and Sridhar Hannenhalli and Farach, M. and Muthukrishnan, S. and Skiena, S.} } @article {38527, title = {Testing simple polygons}, journal = {Computational GeometryComputational Geometry}, volume = {8}, year = {1997}, type = {10.1016/S0925-7721(96)00015-6}, abstract = {We consider the problem of verifying a simple polygon in the plane using {\textquotedblleft}test points{\textquotedblright}. A test point is a geometric probe that takes as input a point in Euclidean space, and returns {\textquotedblleft}+{\textquotedblright} if the point is inside the object being probed or {\textquotedblleft}-{\textquotedblright} if it is outside. A verification procedure takes as input a description of a target object, including its location and orientation, and it produces a set of test points that are used to verify whether a test object matches the description. We give a procedure for verifying an n-sided, non-degenerate, simple target polygon using 5n test points. This testing strategy works even if the test polygon has n + 1 vertices, and we show a lower bound of 3n + 1 test points for this case. We also give algorithms using O(n) test points for simple polygons that may be degenerate and for test polygons that may have up to n + 2 vertices. All of these algorithms work for polygons with holes. We also discuss extensions of our results to higher dimensions.}, keywords = {probing, Testing, Verifying}, isbn = {0925-7721}, author = {Arkin, Esther M. and Belleville, Patrice and Mitchell, Joseph S. B. and Mount, Dave and Romanik, Kathleen and Salzberg, Steven and Souvaine, Diane} } @article {38245, title = {Evolutionary biology of parasitic platyhelminths: The role of molecular phylogenetics}, journal = {Parasitol TodayParasitol Today}, volume = {12}, year = {1996}, abstract = {As our appreciation of the diversity within the flatworms has grown, so too has our curiosity about the ways in which these varied creatures are related to one another. In particular, the parasitic groups (trematodes, cestodes and monogeneans have been the focus of enquiry. Until recently, morphology, anatomy and life histories have provided the raw data for building hypotheses on relationships. Now, ultrastructural evidence, and most recently, molecular data from nucleic acid sequences, have been brought to bear on the topic. Here, David Blair, Andr{\'e}s Campos, Michael Cummings and Juan Pedro Laclette discuss the ways in which molecular data, in particular, are helping us recognize the various lineages of flatworms.}, author = {Blair, D. and Campos, A. and Michael P. Cummings and Laclette, J. P.} } @inbook {38257, title = {Fast sorting by reversal}, booktitle = {Combinatorial Pattern MatchingCombinatorial Pattern Matching}, series = {Lecture Notes in Computer Science}, volume = {1075}, year = {1996}, publisher = {Springer Berlin / Heidelberg}, organization = {Springer Berlin / Heidelberg}, abstract = {Analysis of genomes evolving by inversions leads to a combinatorial problem of sorting by reversals studied in detail recently. Following a series of work recently, Hannenhalli and Pevzner developed the first polynomial algorithm for the problem of sorting signed permutations by reversals and proposed an O(n 4 ) implementation of the algorithm. In this paper we exploit a few combinatorial properties of the cycle graph of a permutation and propose an O(n 2 (n)) implementation of the algorithm where is the inverse Ackerman function. Besides making this algorithm practical, our technique improves implementations of the other rearrangement distance problems.}, isbn = {978-3-540-61258-2}, author = {Berman, Piotr and Sridhar Hannenhalli}, editor = {Hirschberg, Dan and Myers, Gene} } @inbook {38346, title = {Inferring phylogenies from DNA sequence data: The effects of sampling}, booktitle = {New Uses for New PhylogeniesNew Uses for New Phylogenies}, year = {1996}, publisher = {Oxford University Press}, organization = {Oxford University Press}, author = {Otto, S. P. and Michael P. Cummings and Wakeley, J.}, editor = {Harvey, P. H. and Leigh Brown, A. J. and Maynard Smith, J. and Nee, S.} } @article {38145, title = {cDNA expressed sequence tags of Trypanosoma brucei rhodesiense provide new insights into the biology of the parasite}, journal = {Molecular and Biochemical ParasitologyMolecular and Biochemical Parasitology}, volume = {73}, year = {1995}, type = {16/0166-6851(95)00098-L}, abstract = {A total of 518 expressed sequence tags (ESTs) have been generated from clones randomly selected from a cDNA library and a spliced leader sub-library of a Trypanosoma brucei rhodesiense bloodstream clone. 205 (39\%) of the clones were identified based on matches to 113 unique genes in the public databases. Of these, 71 cDNAs display significant similarities to genes in unrelated organisms encoding metabolic enzymes, signal transduction proteins, transcription factors, ribosomal proteins, histones, a proliferation-associated protein and thimet oligopeptidase, among others. 313 of the cDNAs are not related to any other sequences in the databases. These cDNA ESTs provide new avenues of research for exploring both the novel trypanosome-specific genes and the genome organization of this parasite, as well as a resource for identifying trypanosome homologs to genes expressed in other organisms.}, keywords = {cDNA, Expressed sequence tag, Trypanosoma brucei rhodesiense}, isbn = {0166-6851}, author = {Najib M. El-Sayed and Alarcon, Clara M. and Beck, John C. and Sheffield, Val C. and Donelson, John E.} } @article {38558, title = {Unsupervised learning of disambiguation rules for part of speech tagging}, journal = {Proceedings of the third workshop on very large corporaProceedings of the third workshop on very large corpora}, volume = {30}, year = {1995}, publisher = {Somerset, New Jersey: Association for Computational Linguistics}, author = {Brill, E. and M. Pop} } @article {38336, title = {Identification of the calcium-binding protein calgranulin in the matrix of struvite stones}, journal = {Journal of endourology / Endourological SocietyJournal of endourology / Endourological Society}, volume = {8}, year = {1994}, note = {http://www.ncbi.nlm.nih.gov/pubmed/8061680?dopt=Abstract}, abstract = {The identification of calcium-binding proteins in urine and kidney stones has led to a closer look at the role of matrix proteins in urolithiasis. We analyzed five struvite stones for protein content and identified two bands (8 and 14 KDa) that were confirmed by gel electrophoresis and amino acid sequencing to be calgranulin. This protein, which is known by several other names, has bacteriostatic antifungal activity. Its role in the formation of struvite stones warrants further investigation.}, keywords = {Amino Acid Sequence, Calcium-Binding Proteins, Cell Adhesion Molecules, Neuronal, Electrophoresis, Enzyme-Linked Immunosorbent Assay, HUMANS, Kidney Calculi, Leukocyte L1 Antigen Complex, Magnesium Compounds, Molecular Sequence Data, Phosphates}, author = {Bennett, J. and Dretler, S. P. and J. Selengut and Orme-Johnson, W. H.} } @proceedings {38502, title = {A SIMD solution to the sequence comparison problem on the MGAP}, year = {1994}, month = {1994}, publisher = {IEEE}, type = {10.1109/ASAP.1994.331791}, abstract = {Molecular biologists frequently compare an unknown biosequence with a set of other known biosequences to find the sequence which is maximally similar, with the hope that what is true of one sequence, either physically or functionally, could be true of its analogue. Even though efficient dynamic programming algorithms exist for the problem, when the size of the database is large, the time required is quite long, even for moderate length sequences. In this paper, we present an efficient pipelined SIMD solution to the sequence alignment problem on the Micro-Grain Array Processor (MGAP), a fine-grained massively parallel array of processors with nearest-neighbor connections. The algorithm compares K sequences of length O(M) with the actual sequence of length N, in O(M+N+K) time with O(MN) processors, which is AT-optimal. The implementation on the MGAP computes at the rate of about 0.1 million comparisons per second for sequences of length 128}, keywords = {AT-optimal algorithm, Biological information theory, biology computing, biosequence comparison problem, computational complexity, Computer science, Costs, database size, Databases, DNA computing, dynamic programming, dynamic programming algorithms, fine-grained massively parallel processor array, Genetics, Heuristic algorithms, maximally similar sequence, MGAP parallel computer, Micro-Grain Array Processor, Military computing, molecular biology, molecular biophysics, Nearest neighbor searches, nearest-neighbor connections, Parallel algorithms, pipeline processing, pipelined SIMD solution, sequence alignment problem, sequences}, isbn = {0-8186-6517-3}, author = {Borah, M. and Bajwa, R. S. and Sridhar Hannenhalli and Irwin, M. J.} } @article {49702, title = {Splicing signals in Drosophila: intron size, information content, and consensus sequences.}, journal = {Nucleic Acids Res}, volume = {20}, year = {1992}, month = {1992 Aug 25}, pages = {4255-62}, abstract = {

A database of 209 Drosophila introns was extracted from Genbank (release number 64.0) and examined by a number of methods in order to characterize features that might serve as signals for messenger RNA splicing. A tight distribution of sizes was observed: while the smallest introns in the database are 51 nucleotides, more than half are less than 80 nucleotides in length, and most of these have lengths in the range of 59-67 nucleotides. Drosophila splice sites found in large and small introns differ in only minor ways from each other and from those found in vertebrate introns. However, larger introns have greater pyrimidine-richness in the region between 11 and 21 nucleotides upstream of 3{\textquoteright} splice sites. The Drosophila branchpoint consensus matrix resembles C T A A T (in which branch formation occurs at the underlined A), and differs from the corresponding mammalian signal in the absence of G at the position immediately preceding the branchpoint. The distribution of occurrences of this sequence suggests a minimum distance between 5{\textquoteright} splice sites and branchpoints of about 38 nucleotides, and a minimum distance between 3{\textquoteright} splice sites and branchpoints of 15 nucleotides. The methods we have used detect no information in exon sequences other than in the few nucleotides immediately adjacent to the splice sites. However, Drosophila resembles many other species in that there is a discontinuity in A + T content between exons and introns, which are A + T rich.

}, keywords = {Animals, Base Sequence, Consensus Sequence, Databases, Factual, Drosophila, Introns, Molecular Sequence Data, RNA Splicing, RNA, Messenger, software}, issn = {0305-1048}, author = {Mount, S M and Burks, C and Hertz, G and Stormo, G D and White, O and Fields, C} } @article {49715, title = {Pseudogenes for human small nuclear RNA U3 appear to arise by integration of self-primed reverse transcripts of the RNA into new chromosomal sites.}, journal = {Cell}, volume = {32}, year = {1983}, month = {1983 Feb}, pages = {461-72}, abstract = {

We find that both human and rat U3 snRNA can function as self-priming templates for AMV reverse transcriptase in vitro. The 74 base cDNA is primed by the 3{\textquoteright} end of intact U3 snRNA, and spans the characteristically truncated 69 or 70 base U3 sequence found in four different human U3 pseudogenes. The ability of human and rat U3 snRNA to self-prime is consistent with a U3 secondary structure model derived by a comparison between rat U3 snRNA and the homologous D2 snRNA from Dictyostelium discoideum. We propose that U3 pseudogenes are generated in vivo by integration of a self-primed cDNA copy of U3 snRNA at new chromosomal sites. We also consider the possibility that the same cDNA mediates gene conversion at the 5{\textquoteright} end of bona fide U3 genes where, over the entire region spanned by the U3 cDNA, the two rat U3 sequence variants U3A and U3B are identical.

}, keywords = {Animals, Base Sequence, DNA, genes, HUMANS, Nucleic Acid Conformation, Rats, Recombination, Genetic, Repetitive Sequences, Nucleic Acid, RNA, RNA, Small Nuclear, RNA-Directed DNA Polymerase, Templates, Genetic, Transcription, Genetic}, issn = {0092-8674}, author = {Bernstein, L B and Mount, S M and Weiner, A M} } @article {49718, title = {Structure and function of small ribonucleoproteins from eukaryotic cells.}, journal = {Princess Takamatsu Symp}, volume = {12}, year = {1982}, month = {1982}, pages = {101-7}, abstract = {

Autoantibodies from patients with systemic lupus erythematosus and other related diseases have been used to identify and study small RNA-protein complexes from mammalian cells. Properties of three previously described and several new classes of small ribonucleoproteins (RNPs) are reviewed. The sequence of Drosophila U1 RNA reveals that the region proposed to pair with 5{\textquoteright} splice junctions is conserved, while that proposed to interact with 3{\textquoteright} junctions diverges; this forces some revision of the model for U1 small nuclear (sn)RNP participation in hnRNA splicing. Further characterization of the Ro and La small RNPs has shown that the Ro small cytoplasmic (sc)RNPs are a subclass of La RNPs. Both tRNA and 5S rRNA precursors are at least transiently associated with the La protein. This raises the possibility that the La protein may be an RNA polymerase III transcription factor.

}, keywords = {Antigen-Antibody Complex, Autoantibodies, HUMANS, Lupus Erythematosus, Systemic, Nucleoproteins, Ribonucleoproteins, RNA Polymerase III, Transcription, Genetic}, author = {Steitz, J A and Berg, C and Gottlieb, E and Hardin, J A and Hashimoto, C and Hendrick, J P and Hinterberger, M and Krikeles, M and Lerner, M R and Mount, S M} } @article {49721, title = {Are snRNPs involved in splicing?}, journal = {Nature}, volume = {283}, year = {1980}, month = {1980 Jan 10}, pages = {220-4}, keywords = {Animals, Base Sequence, Cell Line, Chickens, Erythrocytes, HUMANS, Liver, Lupus Erythematosus, Systemic, Molecular Weight, Nucleic Acid Precursors, Nucleoproteins, Ribonucleoproteins, RNA, Heterogeneous Nuclear, Species Specificity}, issn = {0028-0836}, author = {Lerner, M R and Boyle, J A and Mount, S M and Wolin, S L and Steitz, J A} } @article {49628, title = {Detection of alloantigens during preimplantation development and early trophoblast differentiation in the mouse by immunoperoxidase labeling.}, journal = {J Exp Med}, volume = {143}, year = {1976}, month = {1976 Feb 1}, pages = {348-59}, abstract = {

An immunoperoxidase-labeling technique allowing visualization of antibody binding to the cell surface at the electron microscopical level has been employed an an analysis of H-2 and non-H-2 alloantigen expression on the early mouse embryo. The presence of non-H-2 antigenic determinants has been confirmed on eight-cell, morula, and blastocyst stages of development. Contrary to previous reports, however, low levels of H-2 antigen have also been detected on the blastocyst. This is the earliest stage at which H-2 has been shown to be expressed on the fertilized mouse egg and may reflect the greater resolution of the immunoperoxidase technique. Using two different models to study the critical peri-implantation stages, those of experimentally induced blastocyst activation and blastocyst outgrowth in vitro, it has been demonstrated that antigen loss occurs on the trophectoderm at the time of implantation, and that this is not necessarily dependent upon maternal influence. It is suggested that the loss may be an important factor in the prevention of maternal immune rejection during the establishment of the fetal allograft. The two major components of the early postimplantation conceptus display a striking differential in antigenic status. The embryonic sac shows a high degree of peroxidase labeling, while the ectoplacental cone trophoblast is unlabeled. These findings add support to the concept of antigenic neutrality of the early trophoblast and its role in the maintenance of a normal fetomaternal immunological equilibrium.

}, keywords = {Animals, Binding Sites, Antibody, Blastocyst, Cell Differentiation, Cell Membrane, Embryo Implantation, Embryonic Development, Epitopes, Female, Histocompatibility Antigens, HLA Antigens, Horseradish Peroxidase, Mice, Mice, Inbred Strains, Pregnancy, Pregnancy, Animal, Trophoblasts}, issn = {0022-1007}, author = {Searle, R F and Sellens, M H and Elson, J and Jenkinson, E J and Billington, W D} }