@article {49818, title = {A pathway-centric view of spatial proximity in the 3D nucleome across cell lines}, journal = {Scientific Reports}, volume = {6}, year = {2016}, month = {Mar-12-2017}, pages = {39279}, doi = {10.1038/srep39279}, url = {http://www.nature.com/articles/srep39279}, author = {Karathia, Hiren and Kingsford, Carl and Girvan, Michelle and Hannenhalli, Sridhar} } @article {49792, title = {A perspective on 16S rRNA operational taxonomic unit clustering using sequence similarity}, journal = {npj Biofilms and Microbiomes}, volume = {2}, year = {2016}, month = {Aug-04-2017}, pages = {16004}, doi = {10.1038/npjbiofilms.2016.4}, url = {http://www.nature.com/articles/npjbiofilms20164}, author = {Nguyen, Nam-phuong and Warnow, Tandy and Pop, Mihai and White, Bryan} } @article {49815, title = {Positive and strongly relaxed purifying selection drive the evolution of repeats in proteins}, journal = {Nature Communications}, volume = {7}, year = {2016}, month = {Jun-11-2017}, pages = {13570}, doi = {10.1038/ncomms13570}, url = {http://www.nature.com/doifinder/10.1038/ncomms13570}, author = {Persi, Erez and Wolf, Yuri I. and Koonin, Eugene V} } @article {49755, title = {Privacy-Preserving Microbiome Analysis Using Secure Computation}, journal = {Bioinformatics}, year = {2016}, month = {Nov-02-2016}, pages = {btw073}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btw073}, url = {http://bioinformatics.oxfordjournals.org/lookup/doi/10.1093/bioinformatics/btw073}, author = {Wagner, Justin and Paulson, Joseph N. and Wang, Xiao and Bhattacharjee, Bobby and Bravo, {\'e}ctor Corrada} } @article {49592, title = {Phenotype-Dependent Coexpression Gene Clusters: Application to Normal and Premature Ageing}, volume = {12}, year = {2015}, month = {Jan-01-2015}, pages = {30 - 39}, issn = {1545-5963}, doi = {10.1109/TCBB.2014.2359446}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6948331http://xplorestaging.ieee.org/iel7/8857/7035191/06948331.pdf?arnumber=6948331}, author = {Wang, Kun and Das, Avinash and Xiong, Zheng-Mei and Cao, Kan and Hannenhalli, Sridhar} } @article {49513, title = {Plasmodium falciparum field isolates from areas of repeated emergence of drug resistant malaria show no evidence of hypermutator phenotype}, journal = {Infection, Genetics and Evolution}, volume = {30}, year = {2015}, month = {03/2015}, pages = {318 - 322}, issn = {15671348}, doi = {10.1016/j.meegid.2014.12.010}, author = {Brown, Tyler S. and Jacob, Christopher G and Silva, Joana C and Takala-Harrison, Shannon and Djimd{\'e}, Abdoulaye and Dondorp, Arjen M and Fukuda, Mark and Noedl, Harald and Nyunt, Myaing Myaing and Kyaw, Myat Phone and Mayxay, Mayfong and Hien, Tran Tinh and Plowe, Christopher V and Michael P. Cummings} } @article {49754, title = {Privacy-Preserving Microbiome Analysis Using Secure Computation}, year = {2015}, doi = {10.1101/025999}, url = {http://biorxiv.org/lookup/doi/10.1101/025999}, author = {Wagner, Justin and Paulson, Joseph N. and Wang, Xiao-Shun and Bhattacharjee, Bobby and Corrada Bravo, Hector} } @article {49577, title = {Proteomics-based metabolic modeling reveals that fatty acid oxidation (FAO) controls endothelial cell (EC) permeability.}, volume = {14}, year = {2015}, month = {2015 Mar}, pages = {621-34}, abstract = {

Endothelial cells (ECs) play a key role to maintain the functionality of blood vessels. Altered EC permeability causes severe impairment in vessel stability and is a hallmark of pathologies such as cancer and thrombosis. Integrating label-free quantitative proteomics data into genome-wide metabolic modeling, we built up a model that predicts the metabolic fluxes in ECs when cultured on a tridimensional matrix and organize into a vascular-like network. We discovered how fatty acid oxidation increases when ECs are assembled into a fully formed network that can be disrupted by inhibiting CPT1A, the fatty acid oxidation rate-limiting enzyme. Acute CPT1A inhibition reduces cellular ATP levels and oxygen consumption, which are restored by replenishing the tricarboxylic acid cycle. Remarkably, global phosphoproteomic changes measured upon acute CPT1A inhibition pinpointed altered calcium signaling. Indeed, CPT1A inhibition increases intracellular calcium oscillations. Finally, inhibiting CPT1A induces hyperpermeability in vitro and leakage of blood vessel in vivo, which were restored blocking calcium influx or replenishing the tricarboxylic acid cycle. Fatty acid oxidation emerges as central regulator of endothelial functions and blood vessel stability and druggable pathway to control pathological vascular permeability.

}, issn = {1535-9484}, doi = {10.1074/mcp.M114.045575}, author = {Patella, Francesca and Schug, Zachary T and Persi, Erez and Neilson, Lisa J and Erami, Zahra and Avanzato, Daniele and Maione, Federica and Hernandez-Fernaud, Juan R and Mackay, Gillian and Zheng, Liang and Reid, Steven and Frezza, Christian and Giraudo, Enrico and Fiorio Pla, Alessandra and Anderson, Kurt and Ruppin, Eytan and Gottlieb, Eyal and Zanivan, Sara} } @article {49724, title = {Phenotype-based cell-specific metabolic modeling reveals metabolic liabilities of cancer.}, journal = {Elife}, volume = {3}, year = {2014}, month = {2014}, abstract = {

Utilizing molecular data to derive functional physiological models tailored for specific cancer cells can facilitate the use of individually tailored therapies. To this end we present an approach termed PRIME for generating cell-specific genome-scale metabolic models (GSMMs) based on molecular and phenotypic data. We build >280 models of normal and cancer cell-lines that successfully predict metabolic phenotypes in an individual manner. We utilize this set of cell-specific models to predict drug targets that selectively inhibit cancerous but not normal cell proliferation. The top predicted target, MLYCD, is experimentally validated and the metabolic effects of MLYCD depletion investigated. Furthermore, we tested cell-specific predicted responses to the inhibition of metabolic enzymes, and successfully inferred the prognosis of cancer patients based on their PRIME-derived individual GSMMs. These results lay a computational basis and a counterpart experimental proof of concept for future personalized metabolic modeling applications, enhancing the search for novel selective anticancer therapies.

}, keywords = {algorithms, Antineoplastic Agents, Biomarkers, Tumor, Carboxy-Lyases, Cell Line, Tumor, Cell Proliferation, Citric Acid Cycle, Fatty Acids, Gene Knockdown Techniques, Genome, Human, HUMANS, Lymphocytes, Models, Biological, Neoplasms, Oxidation-Reduction, PHENOTYPE, Precision Medicine}, issn = {2050-084X}, doi = {10.7554/eLife.03641}, author = {Yizhak, Keren and Gaude, Edoardo and Le D{\'e}v{\'e}dec, Sylvia and Waldman, Yedael Y and Stein, Gideon Y and van de Water, Bob and Frezza, Christian and Ruppin, Eytan} } @article {49726, title = {Predicting cancer-specific vulnerability via data-driven detection of synthetic lethality.}, journal = {Cell}, volume = {158}, year = {2014}, month = {2014 Aug 28}, pages = {1199-209}, abstract = {

Synthetic lethality occurs when the inhibition of two genes is lethal while the inhibition of each single gene is not. It can be harnessed to selectively treat cancer by identifying inactive genes in a given cancer and targeting their synthetic lethal (SL) partners. We present a data-driven computational pipeline for the genome-wide identification of SL interactions in cancer by analyzing large volumes of cancer genomic data. First, we show that the approach successfully captures known SL partners of tumor suppressors and oncogenes. We then validate SL predictions obtained for the tumor suppressor VHL. Next, we construct a genome-wide network of SL interactions in cancer and demonstrate its value in predicting gene essentiality and clinical prognosis. Finally, we identify synthetic lethality arising from gene overactivation and use it to predict drug efficacy. These results form a computational basis for exploiting synthetic lethality to uncover cancer-specific susceptibilities.

}, keywords = {Breast Neoplasms, Cell Line, Tumor, Computational Biology, Data Mining, Genes, Tumor Suppressor, HUMANS, Neoplasms, Oncogenes, RNA, Small Interfering, workflow}, issn = {1097-4172}, doi = {10.1016/j.cell.2014.07.027}, author = {Jerby-Arnon, Livnat and Pfetzer, Nadja and Waldman, Yedael Y and McGarry, Lynn and James, Daniel and Shanks, Emma and Seashore-Ludlow, Brinton and Weinstock, Adam and Geiger, Tamar and Clemons, Paul A and Gottlieb, Eyal and Ruppin, Eytan} } @article {49545, title = {Primate Transcript and Protein Expression Levels Evolve Under Compensatory Selection Pressures}, volume = {342}, year = {2013}, month = {May-11-2015}, pages = {1100 - 1104}, issn = {0036-8075}, doi = {10.1126/science.1242379}, url = {http://www.sciencemag.org/cgi/doi/10.1126/science.1242379}, author = {Khan, Z. and Ford, M. J. and Cusanovich, D. A. and Mitrano, A. and Pritchard, J. K. and Gilad, Y.} } @article {49738, title = {Primate transcript and protein expression levels evolve under compensatory selection pressures.}, journal = {Science}, volume = {342}, year = {2013}, month = {2013 Nov 29}, pages = {1100-4}, abstract = {

Changes in gene regulation have likely played an important role in the evolution of primates. Differences in messenger RNA (mRNA) expression levels across primates have often been documented; however, it is not yet known to what extent measurements of divergence in mRNA levels reflect divergence in protein expression levels, which are probably more important in determining phenotypic differences. We used high-resolution, quantitative mass spectrometry to collect protein expression measurements from human, chimpanzee, and rhesus macaque lymphoblastoid cell lines and compared them to transcript expression data from the same samples. We found dozens of genes with significant expression differences between species at the mRNA level yet little or no difference in protein expression. Overall, our data suggest that protein expression levels evolve under stronger evolutionary constraint than mRNA levels.

}, keywords = {Animals, Evolution, Molecular, Gene Expression Regulation, HUMANS, Macaca mulatta, Pan troglodytes, Protein Biosynthesis, RNA, Messenger, Selection, Genetic, Species Specificity, Transcription, Genetic}, issn = {1095-9203}, doi = {10.1126/science.1242379}, author = {Khan, Zia and Ford, Michael J and Cusanovich, Darren A and Mitrano, Amy and Pritchard, Jonathan K and Gilad, Yoav} } @article {38421, title = {The partitioned LASSO-patternsearch algorithm with application to gene expression data}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {13}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22587526?dopt=Abstract}, type = {10.1186/1471-2105-13-98}, abstract = {BACKGROUND: In systems biology, the task of reverse engineering gene pathways from data has been limited not just by the curse of dimensionality (the interaction space is huge) but also by systematic error in the data. The gene expression barcode reduces spurious association driven by batch effects and probe effects. The binary nature of the resulting expression calls lends itself perfectly to modern regularization approaches that thrive in high-dimensional settings. RESULTS: The Partitioned LASSO-Patternsearch algorithm is proposed to identify patterns of multiple dichotomous risk factors for outcomes of interest in genomic studies. A partitioning scheme is used to identify promising patterns by solving many LASSO-Patternsearch subproblems in parallel. All variables that survive this stage proceed to an aggregation stage where the most significant patterns are identified by solving a reduced LASSO-Patternsearch problem in just these variables. This approach was applied to genetic data sets with expression levels dichotomized by gene expression bar code. Most of the genes and second-order interactions thus selected and are known to be related to the outcomes. CONCLUSIONS: We demonstrate with simulations and data analyses that the proposed method not only selects variables and patterns more accurately, but also provides smaller models with better prediction accuracy, in comparison to several alternative methodologies.}, keywords = {algorithms, Breast Neoplasms, Computer simulation, Female, Gene expression, Gene Expression Profiling, Genomics, HUMANS, Models, Genetic}, author = {Shi, Weiliang and Wahba, Grace and Irizarry, Rafael A. and H{\'e}ctor Corrada Bravo and Wright, Stephen J.} } @article {49531, title = {Plasmodium falciparum merozoite surface protein 1 blocks the proinflammatory protein S100P.}, volume = {109}, year = {2012}, month = {2012 Apr 3}, pages = {5429-34}, abstract = {

The malaria parasite, Plasmodium falciparum, and the human immune system have coevolved to ensure that the parasite is not eliminated and reinfection is not resisted. This relationship is likely mediated through a myriad of host-parasite interactions, although surprisingly few such interactions have been identified. Here we show that the 33-kDa fragment of P. falciparum merozoite surface protein 1 (MSP1(33)), an abundant protein that is shed during red blood cell invasion, binds to the proinflammatory protein, S100P. MSP1(33) blocks S100P-induced NFκB activation in monocytes and chemotaxis in neutrophils. Remarkably, S100P binds to both dimorphic alleles of MSP1, estimated to have diverged >27 Mya, suggesting an ancient, conserved relationship between these parasite and host proteins that may serve to attenuate potentially damaging inflammatory responses.

}, keywords = {Amino Acid Sequence, Animals, Calcium-Binding Proteins, Chromatography, Gel, Electrophoresis, Polyacrylamide Gel, Enzyme-Linked Immunosorbent Assay, HUMANS, Merozoite Surface Protein 1, Microscopy, Confocal, Molecular Sequence Data, Neoplasm Proteins, Plasmodium falciparum, Sequence Homology, Amino Acid, Surface Plasmon Resonance}, issn = {1091-6490}, doi = {10.1073/pnas.1202689109}, author = {Waisberg, Michael and Cerqueira, Gustavo C and Yager, Stephanie B and Francischetti, Ivo M B and Lu, Jinghua and Gera, Nidhi and Srinivasan, Prakash and Miura, Kazutoyo and Rada, Balazs and Lukszo, Jan and Barbian, Kent D and Leto, Thomas L and Porcella, Stephen F and Narum, David L and El-Sayed, Najib and Miller, Louis H and Pierce, Susan K} } @article {38440, title = {Population Dynamics of Vibrio Cholerae and Cholera in the Bangladesh Sundarbans: Role of Zooplankton Diversity}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, year = {2011}, type = {10.1128/AEM.01472-10}, abstract = {Vibrio cholerae, a bacterium autochthonous to the aquatic environment, is the causative agent of cholera, a severe watery, life-threatening diarrhoeal disease occurring predominantly in developing countries. V. cholerae, including both serogroup O1 and O139, i.e. found in association with crustacean zooplankton, mainly copepods, and notably in ponds, rivers, and estuarine systems globally. The incidence of cholera and occurrence of V. cholerae pathogenic strains with zooplankton were studied in two areas of Bangladesh: Bakerganj and Mathbaria. Chitinous zooplankton communities of several bodies of water were analyzed in order to understand the interaction of zooplankton population composition with the population dynamics of pathogenic V. cholerae and incidence of cholera. Two dominant zooplankton groups were found to be consistently associated with detection of V. cholerae and/or occurrence of cholera cases, namely rotifers, and cladocerans, in addition to copepods. Local differences indicate there are subtle ecological factors that can influence interactions between V. cholerae, its plankton hosts, and the incidence of cholera.}, isbn = {0099-2240, 1098-5336}, author = {De Magny, Guillaume Constantin and Mozumder, Pronob K. and Grim, Christopher J. and Hasan, Nur A. and Naser, M. Niamul and Alam, Munirul and Sack, Bradley and Huq, Anwar and Rita R. Colwell} } @article {49728, title = {Predicting selective drug targets in cancer through metabolic networks.}, journal = {Mol Syst Biol}, volume = {7}, year = {2011}, month = {2011}, pages = {501}, abstract = {

The interest in studying metabolic alterations in cancer and their potential role as novel targets for therapy has been rejuvenated in recent years. Here, we report the development of the first genome-scale network model of cancer metabolism, validated by correctly identifying genes essential for cellular proliferation in cancer cell lines. The model predicts 52 cytostatic drug targets, of which 40\% are targeted by known, approved or experimental anticancer drugs, and the rest are new. It further predicts combinations of synthetic lethal drug targets, whose synergy is validated using available drug efficacy and gene expression measurements across the NCI-60 cancer cell line collection. Finally, potential selective treatments for specific cancers that depend on cancer type-specific downregulation of gene expression and somatic mutations are compiled.

}, keywords = {Cell Line, Tumor, Cell Proliferation, Computational Biology, Cytostatic Agents, Down-Regulation, Drug Delivery Systems, Gene Expression Regulation, Neoplastic, HUMANS, Metabolic Networks and Pathways, Models, Biological, Neoplasms, RNA, Small Interfering}, issn = {1744-4292}, doi = {10.1038/msb.2011.35}, author = {Folger, Ori and Jerby, Livnat and Frezza, Christian and Gottlieb, Eyal and Ruppin, Eytan and Shlomi, Tomer} } @article {38452, title = {ProPhylo: partial phylogenetic profiling to guide protein family construction and assignment of biological process}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {12}, year = {2011}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22070167?dopt=Abstract}, type = {10.1186/1471-2105-12-434}, abstract = {BACKGROUND: Phylogenetic profiling is a technique of scoring co-occurrence between a protein family and some other trait, usually another protein family, across a set of taxonomic groups. In spite of several refinements in recent years, the technique still invites significant improvement. To be its most effective, a phylogenetic profiling algorithm must be able to examine co-occurrences among protein families whose boundaries are uncertain within large homologous protein superfamilies. RESULTS: Partial Phylogenetic Profiling (PPP) is an iterative algorithm that scores a given taxonomic profile against the taxonomic distribution of families for all proteins in a genome. The method works through optimizing the boundary of each protein family, rather than by relying on prebuilt protein families or fixed sequence similarity thresholds. Double Partial Phylogenetic Profiling (DPPP) is a related procedure that begins with a single sequence and searches for optimal granularities for its surrounding protein family in order to generate the best query profiles for PPP. We present ProPhylo, a high-performance software package for phylogenetic profiling studies through creating individually optimized protein family boundaries. ProPhylo provides precomputed databases for immediate use and tools for manipulating the taxonomic profiles used as queries. CONCLUSION: ProPhylo results show universal markers of methanogenesis, a new DNA phosphorothioation-dependent restriction enzyme, and efficacy in guiding protein family construction. The software and the associated databases are freely available under the open source Perl Artistic License from ftp://ftp.jcvi.org/pub/data/ppp/.}, keywords = {algorithms, Archaea, Archaeal Proteins, DNA, Methane, Phylogeny, software}, author = {Basu, Malay K. and J. Selengut and Haft, Daniel H.} } @article {49777, title = {ProPhylo: partial phylogenetic profiling to guide protein family construction and assignment of biological process.}, journal = {BMC Bioinformatics}, volume = {12}, year = {2011}, month = {2011}, pages = {434}, abstract = {

BACKGROUND: Phylogenetic profiling is a technique of scoring co-occurrence between a protein family and some other trait, usually another protein family, across a set of taxonomic groups. In spite of several refinements in recent years, the technique still invites significant improvement. To be its most effective, a phylogenetic profiling algorithm must be able to examine co-occurrences among protein families whose boundaries are uncertain within large homologous protein superfamilies.

RESULTS: Partial Phylogenetic Profiling (PPP) is an iterative algorithm that scores a given taxonomic profile against the taxonomic distribution of families for all proteins in a genome. The method works through optimizing the boundary of each protein family, rather than by relying on prebuilt protein families or fixed sequence similarity thresholds. Double Partial Phylogenetic Profiling (DPPP) is a related procedure that begins with a single sequence and searches for optimal granularities for its surrounding protein family in order to generate the best query profiles for PPP. We present ProPhylo, a high-performance software package for phylogenetic profiling studies through creating individually optimized protein family boundaries. ProPhylo provides precomputed databases for immediate use and tools for manipulating the taxonomic profiles used as queries.

CONCLUSION: ProPhylo results show universal markers of methanogenesis, a new DNA phosphorothioation-dependent restriction enzyme, and efficacy in guiding protein family construction. The software and the associated databases are freely available under the open source Perl Artistic License from ftp://ftp.jcvi.org/pub/data/ppp/.

}, keywords = {algorithms, Archaea, Archaeal Proteins, DNA, Methane, Phylogeny, software}, issn = {1471-2105}, doi = {10.1186/1471-2105-12-434}, author = {Basu, Malay K and Selengut, Jeremy D and Haft, Daniel H} } @article {38444, title = {The power of protein interaction networks for associating genes with diseases}, journal = {BioinformaticsBioinformatics}, volume = {26}, year = {2010}, author = {Navlakha, S. and Kingsford, Carl} } @article {38448, title = {The pre-seventh pandemic Vibrio cholerae BX 330286 El Tor genome: evidence for the environment as a genome reservoir}, journal = {Environmental Microbiology ReportsEnvironmental Microbiology Reports}, volume = {2}, year = {2010}, type = {10.1111/j.1758-2229.2010.00141.x}, abstract = {Vibrio cholerae O1 El Tor BX 330286 was isolated from a water sample in Australia in 1986, 9 years after an indigenous outbreak of cholera occurred in that region. This environmental strain encodes virulence factors highly similar to those of clinical strains, suggesting an ability to cause disease in humans. We demonstrate its high similarity in gene content and genome-wide nucleotide sequence to clinical V. cholerae strains, notably to pre-seventh pandemic O1 El Tor strains isolated in 1910 (V. cholerae NCTC 8457) and 1937 (V. cholerae MAK 757), as well as seventh pandemic strains isolated after 1960 globally. Here we demonstrate that this strain represents a transitory clone with shared characteristics between pre-seventh and seventh pandemic strains of V. cholerae. Interestingly, this strain was isolated 25 years after the beginning of the seventh pandemic, suggesting the environment as a genome reservoir in areas where cholera does not occur in sporadic, endemic or epidemic form.}, isbn = {1758-2229}, author = {Haley, Bradd J. and Grim, Christopher J. and Hasan, Nur A. and Taviani, Elisa and Jongsik, Chun and Brettin, Thomas S. and Bruce, David C. and Challacombe, Jean F. and Detter, J. Chris and Han, Cliff S. and Huq, Anwar and Nair, G. Balakrish and Rita R. Colwell} } @article {38420, title = {Parametric Complexity of Sequence Assembly: Theory and Applications to Next Generation Sequencing}, journal = {Journal of Computational BiologyJournal of Computational Biology}, volume = {16}, year = {2009}, type = {10.1089/cmb.2009.0005}, abstract = {In recent years, a flurry of new DNA sequencing technologies have altered the landscape of genomics, providing a vast amount of sequence information at a fraction of the costs that were previously feasible. The task of assembling these sequences into a genome has, however, still remained an algorithmic challenge that is in practice answered by heuristic solutions. In order to design better assembly algorithms and exploit the characteristics of sequence data from new technologies, we need an improved understanding of the parametric complexity of the assembly problem. In this article, we provide a first theoretical study in this direction, exploring the connections between repeat complexity, read lengths, overlap lengths and coverage in determining the {\textquotedblleft}hard{\textquotedblright} instances of the assembly problem. Our work suggests at least two ways in which existing assemblers can be extended in a rigorous fashion, in addition to delineating directions for future theoretical investigations.}, isbn = {1066-5277, 1557-8666}, author = {Nagarajan, Niranjan and M. Pop} } @article {38432, title = {A phylogenetic mixture model for the evolution of gene expression}, journal = {Molecular biology and evolutionMolecular biology and evolution}, volume = {26}, year = {2009}, author = {Eng, K. H. and H{\'e}ctor Corrada Bravo and Keles, S.} } @article {49558, title = {A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays}, volume = {25}, year = {2009}, month = {Jan-07-2009}, pages = {1609 - 1616}, issn = {1367-4803}, doi = {10.1093/bioinformatics/btp275}, url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btp275}, author = {Khan, Z. and Bloom, J. S. and Kruglyak, L. and Singh, M.} } @article {49748, title = {A practical algorithm for finding maximal exact matches in large sequence datasets using sparse suffix arrays.}, journal = {Bioinformatics}, volume = {25}, year = {2009}, month = {2009 Jul 1}, pages = {1609-16}, abstract = {

MOTIVATION: High-throughput sequencing technologies place ever increasing demands on existing algorithms for sequence analysis. Algorithms for computing maximal exact matches (MEMs) between sequences appear in two contexts where high-throughput sequencing will vastly increase the volume of sequence data: (i) seeding alignments of high-throughput reads for genome assembly and (ii) designating anchor points for genome-genome comparisons.

RESULTS: We introduce a new algorithm for finding MEMs. The algorithm leverages a sparse suffix array (SA), a text index that stores every K-th position of the text. In contrast to a full text index that stores every position of the text, a sparse SA occupies much less memory. Even though we use a sparse index, the output of our algorithm is the same as a full text index algorithm as long as the space between the indexed suffixes is not greater than a minimum length of a MEM. By relying on partial matches and additional text scanning between indexed positions, the algorithm trades memory for extra computation. The reduced memory usage makes it possible to determine MEMs between significantly longer sequences.

AVAILABILITY: Source code for the algorithm is available under a BSD open source license at http://compbio.cs.princeton.edu/mems. The implementation can serve as a drop-in replacement for the MEMs algorithm in MUMmer 3.

}, keywords = {algorithms, Base Sequence, Genomics, sequence alignment, Sequence Analysis, DNA}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btp275}, author = {Khan, Zia and Bloom, Joshua S and Kruglyak, Leonid and Singh, Mona} } @article {38446, title = {Predicting the distribution of Vibrio spp. in the Chesapeake Bay: a Vibrio cholerae case study}, journal = {EcoHealthEcoHealth}, volume = {6}, year = {2009}, type = {10.1007/s10393-009-0273-6}, abstract = {Vibrio cholerae, the causative agent of cholera, is a naturally occurring inhabitant of the Chesapeake Bay and serves as a predictor for other clinically important vibrios, including Vibrio parahaemolyticus and Vibrio vulnificus. A system was constructed to predict the likelihood of the presence of V. cholerae in surface waters of the Chesapeake Bay, with the goal to provide forecasts of the occurrence of this and related pathogenic Vibrio spp. Prediction was achieved by driving an available multivariate empirical habitat model estimating the probability of V. cholerae within a range of temperatures and salinities in the Bay, with hydrodynamically generated predictions of ambient temperature and salinity. The experimental predictions provided both an improved understanding of the in situ variability of V. cholerae, including identification of potential hotspots of occurrence, and usefulness as an early warning system. With further development of the system, prediction of the probability of the occurrence of related pathogenic vibrios in the Chesapeake Bay, notably V. parahaemolyticus and V. vulnificus, will be possible, as well as its transport to any geographical location where sufficient relevant data are available.}, author = {Constantin de Magny, G. and Long, W. and Brown, C. W. and Hood, R. R. and Huq, A. and Murtugudde, R. and Rita R. Colwell} } @article {49747, title = {Protein quantification across hundreds of experimental conditions.}, journal = {Proc Natl Acad Sci U S A}, volume = {106}, year = {2009}, month = {2009 Sep 15}, pages = {15544-8}, abstract = {

Quantitative studies of protein abundance rarely span more than a small number of experimental conditions and replicates. In contrast, quantitative studies of transcript abundance often span hundreds of experimental conditions and replicates. This situation exists, in part, because extracting quantitative data from large proteomics datasets is significantly more difficult than reading quantitative data from a gene expression microarray. To address this problem, we introduce two algorithmic advances in the processing of quantitative proteomics data. First, we use space-partitioning data structures to handle the large size of these datasets. Second, we introduce techniques that combine graph-theoretic algorithms with space-partitioning data structures to collect relative protein abundance data across hundreds of experimental conditions and replicates. We validate these algorithmic techniques by analyzing several datasets and computing both internal and external measures of quantification accuracy. We demonstrate the scalability of these techniques by applying them to a large dataset that comprises a total of 472 experimental conditions and replicates.

}, keywords = {algorithms, Animals, Automatic Data Processing, Chromatography, Liquid, Databases, Factual, Fungal Proteins, HUMANS, Isotopes, Mice, Proteins, proteomics, Tandem Mass Spectrometry}, issn = {1091-6490}, doi = {10.1073/pnas.0904100106}, author = {Khan, Zia and Bloom, Joshua S and Garcia, Benjamin A and Singh, Mona and Kruglyak, Leonid} } @article {49557, title = {Protein quantification across hundreds of experimental conditions}, volume = {106}, year = {2009}, month = {Mar-09-2010}, pages = {15544 - 15548}, issn = {0027-8424}, doi = {10.1073/pnas.0904100106}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.0904100106}, author = {Khan, Z. and Bloom, J. S. and Garcia, B. A. and Singh, M. and Kruglyak, L.} } @article {38453, title = {PTM-Switchboard{\textemdash}a database of posttranslational modifications of transcription factors, the mediating enzymes and target genes}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {37}, year = {2009}, publisher = {Oxford Univ Press}, author = {Everett, L. and Vo, A. and Sridhar Hannenhalli} } @article {38441, title = {Position and distance specificity are important determinants of cis-regulatory motifs in addition to evolutionary conservation}, journal = {Nucleic Acids ResearchNucleic Acids Research}, volume = {35}, year = {2007}, type = {10.1093/nar/gkm201}, abstract = {Computational discovery of cis-regulatory elements remains challenging. To cope with the high false positives, evolutionary conservation is routinely used. However, conservation is only one of the attributes of cis-regulatory elements and is neither necessary nor sufficient. Here, we assess two additional attributes{\textemdash}positional and inter-motif distance specificity{\textemdash}that are critical for interactions between transcription factors. We first show that for a greater than expected fraction of known motifs, the genes that contain the motifs in their promoters in a position-specific or distance-specific manner are related, both in function and/or in expression pattern. We then use the position and distance specificity to discover novel motifs. Our work highlights the importance of distance and position specificity, in addition to the evolutionary conservation, in discovering cis-regulatory motifs.}, author = {Vardhanabhuti, Saran and Wang, Junwen and Sridhar Hannenhalli} } @article {38427, title = {Patterns of sequence conservation in presynaptic neural genes}, journal = {Genome BiolGenome Biol}, volume = {7}, year = {2006}, author = {Hadley, D. and Murphy, T. and Valladares, O. and Sridhar Hannenhalli and Ungar, L. and Kim, J. and Bucan, M. and others,} } @conference {49851, title = {Procrastination leads to efficient filtration for local multiple alignment}, booktitle = {International Workshop on Algorithms in Bioinformatics}, year = {2006}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, author = {Darling, Aaron E and Todd Treangen and Zhang, Louxin and Kuiken, Carla and Messeguer, Xavier and Perna, Nicole T} } @book {49867, title = {Procrastination Leads to Efficient Filtration for Local Multiple Alignment}, volume = {4175}, year = {2006}, pages = {126 - 137}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, isbn = {978-3-540-39583-6}, issn = {0302-9743}, doi = {10.1007/1185156110.1007/11851561_12}, url = {http://www.springerlink.com/index/10.1007/11851561http://www.springerlink.com/index/pdf/10.1007/11851561http://link.springer.com/10.1007/11851561_12http://www.springerlink.com/index/pdf/10.1007/11851561_12}, author = {Darling, Aaron E. and Todd Treangen and Zhang, Louxin and Kuiken, Carla and Messeguer, Xavier and Perna, Nicole T.} } @article {38425, title = {Pathogenic Vibrio species in the marine and estuarine environment}, journal = {Oceans and health: pathogens in the marine environmentOceans and health: pathogens in the marine environment}, year = {2005}, type = {10.1007/0-387-23709-7_9}, abstract = {The genus Vibrio includes more than 30 species, at least 12 of which are pathogenic to humans and/or have been associated with foodborne diseases (Chakraborty et al., 1997). Among these species, Vibrio cholerae, serogroups O1 and O139, are the most important, since they are associated with epidemic and pandemic diarrhea outbreaks in many parts of the world (Centers for Disease Control and Prevention, 1995; Kaper et al., 1995). However, other species of vibrios capable of causing diarrheal disease in humans have received greater attention in the last decade. These include Vibrio parahaemolyticus, a leading cause of foodborne disease outbreaks in Japan and Korea (Lee et al., 2001), Vibrio vulnificus, Vibrio alginolyticus, Vibrio damsela, Vibrio fluvialis, Vibrio furnissii, Vibrio hollisae, Vibrio metschnikovii, and Vibrio mimicus (Altekruse et al., 2000; H{\o}i et al., 1997). In the USA, Vibrio species have been estimated to be the cause of about 8000 illnesses annually (Mead et al., 1999).}, author = {Pruzzo, C. and Huq, A. and Rita R. Colwell and Donelli, G.} } @article {38443, title = {Post-transcriptional Control in Mammalian Dendrites}, year = {2005}, author = {Simola, D. F. and Dalva, M. and Sridhar Hannenhalli and Liebhaber, S. and Bucan, M. and Ungar, L.} } @article {38450, title = {Promoter architecture and response to a positive regulator of archaeal transcription}, journal = {Molecular MicrobiologyMolecular Microbiology}, volume = {56}, year = {2005}, type = {10.1111/j.1365-2958.2005.04563.x}, abstract = {The archaeal transcription apparatus is chimeric: its core components (RNA polymerase and basal factors) closely resemble those of eukaryotic RNA polymerase II, but the putative archaeal transcriptional regulators are overwhelmingly of bacterial type. Particular interest attaches to how these bacterial-type effectors, especially activators, regulate a eukaryote-like transcription system. The hyperthermophilic archaeon Methanocaldococcus jannaschii encodes a potent transcriptional activator, Ptr2, related to the Lrp/AsnC family of bacterial regulators. Ptr2 activates rubredoxin 2 (rb2) transcription through a bipartite upstream activating site (UAS), and conveys its stimulatory effects on its cognate transcription machinery through direct recruitment of the TATA binding protein (TBP). A functional dissection of the highly constrained architecture of the rb2 promoter shows that a {\textquoteleft}one-site{\textquoteright} minimal UAS suffices for activation by Ptr2, and specifies the required placement of this site. The presence of such a simplified UAS upstream of the natural rubrerythrin (rbr) promoter also suffices for positive regulation by Ptr2 in vitro, and TBP recruitment remains the primary means of transcriptional activation at this promoter.}, isbn = {1365-2958}, author = {Ouhammouch, Mohamed and Langham, Geoffrey E. and Hausner, Winfried and Simpson, Anjana J. and Najib M. El-Sayed and Geiduschek, E. Peter} } @article {38418, title = {Pandemic strains of O3:K6 Vibrio parahaemolyticus in the aquatic environment of Bangladesh}, journal = {Canadian Journal of MicrobiologyCanadian Journal of Microbiology}, volume = {50}, year = {2004}, abstract = {A total of 1500 environmental strains of Vibrio parahaemolyticus, isolated from the aquatic environment of Bangladesh, were screened for the presence of a major V. parahaemolyticus virulence factor, the thermostable direct haemolysin (tdh) gene, by the colony blot hybridization method using a digoxigenin-labeled tdh gene probe. Of 1500 strains, 5 carried the tdh sequence, which was further confirmed by PCR using primers specific for the tdh gene. Examination by PCR confirmed that the 5 strains were V. parahamolyticus and lacked the thermostable direct haemolysin-related haemolysin (trh) gene, the alternative major virulence gene known to be absent in pandemic strains. All 5 strains gave positive Kanagawa phenomenon reaction with characteristic beta-haemolysis on Wagatsuma agar medium. Southern blot analysis of the HindIII-digested chromosomal DNA demonstrated, in all 5 strains, the presence of 2 tdh genes common to strains positive for Kanagawa phenomenon. However, the 5 strains were found to belong to 3 different serotypes (O3:K29, O4:K37, and O3:K6). The 2 with pandemic serotype O3:K6 gave positive results in group-specific PCR and ORF8 PCR assays, characteristics unique to the pandemic clone. Clonal variations among the 5 isolates were analyzed by comparing RAPD and ribotyping patterns. Results showed different patterns for the 3 serotypes, but the pattern was identical among the O3:K6 strains. This is the first report on the isolation of pandemic O3:K6 strains of V. parahaemolyticus from the aquatic environment of Bangladesh.}, author = {Islam, M. S. and Tasmin, Rizwana and Khan, Sirajul I. s l a m and Bakht, Habibul B. M. and Mahmood, Zahid H. a y a t and Rahman, M. Z. i a u r and Bhuiyan, Nurul A. m i n and Nishibuchi, Mitsuaki and Nair, G. B. a l a k r i s h and Sack, R. B. r a d l e y and Huq, Anwar and Rita R. Colwell and Sack, David A.} } @inbook {49520, title = {PHYLIP (Phylogeny Inference Package)}, year = {2004}, publisher = {John Wiley \& Sons, Inc.}, organization = {John Wiley \& Sons, Inc.}, address = {Hoboken, NJ, USA}, doi = {10.1002/0471650129.dob0534}, author = {Michael P. Cummings}, editor = {Hancock, John M. and Zvelebil, Marketa J.} } @article {38437, title = {Polylysogeny and prophage induction by secondary infection in Vibrio cholerae}, journal = {Environmental MicrobiologyEnvironmental Microbiology}, volume = {6}, year = {2004}, type = {10.1111/j.1462-2920.2004.00603.x}, abstract = {Strains of Vibrio cholerae O1, biotypes El Tor and classical, were infected with a known temperate phage (ΦP15) and monitored over a 15-day period for prophage induction. Over the course of the experiment two morphologically and three genomically distinct virus-like particles were observed from the phage-infected El Tor strain by transmission electron microscopy and field inversion gel electrophoresis, respectively, whereas only one phage, ΦP15, was observed from the infected classical strain. In the uninfected El Tor culture one prophage was spontaneously induced after 6~days. No induction in either strain was observed after treatment with mitomycin C. Data indicate that El Tor biotypes of V. cholerae may be polylysogenic and that secondary infection can promote multiple prophage induction. These traits may be important in the transfer of genetic material among V. cholerae by providing an environmentally relevant route for multiple prophage propagation and transmission.}, isbn = {1462-2920}, author = {Espeland, Eric M. and Lipp, Erin K. and Huq, Anwar and Rita R. Colwell} } @article {38424, title = {Pathogenic Potential of Environmental Vibrio Cholerae Strains Carrying Genetic Variants of the Toxin-Coregulated Pilus Pathogenicity Island}, journal = {Infection and ImmunityInfect. Immun.Infection and ImmunityInfect. Immun.}, volume = {71}, year = {2003}, type = {10.1128/IAI.71.2.1020-1025.2003}, abstract = {The major virulence factors of toxigenic Vibrio cholerae are cholera toxin (CT), which is encoded by a lysogenic bacteriophage (CTXΦ), and toxin-coregulated pilus (TCP), an essential colonization factor which is also the receptor for CTXΦ. The genes for the biosynthesis of TCP are part of a larger genetic element known as the TCP pathogenicity island. To assess their pathogenic potential, we analyzed environmental strains of V. cholerae carrying genetic variants of the TCP pathogenicity island for colonization of infant mice, susceptibility to CTXΦ, and diarrheagenicity in adult rabbits. Analysis of 14 environmental strains, including 3 strains carrying a new allele of the tcpA gene, 9 strains carrying a new allele of the toxT gene, and 2 strains carrying conventional tcpA and toxT genes, showed that all strains colonized infant mice with various efficiencies in competition with a control El Tor biotype strain of V. cholerae O1. Five of the 14 strains were susceptible to CTXΦ, and these transductants produced CT and caused diarrhea in adult rabbits. These results suggested that the new alleles of the tcpA and toxT genes found in environmental strains of V. cholerae encode biologically active gene products. Detection of functional homologs of the TCP island genes in environmental strains may have implications for understanding the origin and evolution of virulence genes of V. cholerae.}, isbn = {0019-9567, 1098-5522}, author = {Faruque, Shah M. and Kamruzzaman, M. and Meraj, Ismail M. and Chowdhury, Nityananda and Nair, G. Balakrish and Sack, R. Bradley and Rita R. Colwell and Sack, David A.} } @article {38429, title = {Persistence of adhesive properties in Vibrio cholerae after long-term exposure to sea water}, journal = {Environmental MicrobiologyEnvironmental Microbiology}, volume = {5}, year = {2003}, type = {10.1046/j.1462-2920.2003.00498.x}, abstract = {The effect of exposure to artificial sea water (ASW) on the ability of classical Vibrio cholerae O1 cells to interact with chitin-containing substrates and human intestinal cells was studied. Incubation of vibrios in ASW at 5{\textdegree}C and 18{\textdegree}C resulted in two kinds of cell responses: the viable but non-culturable (VBNC) state (i.e.~<0.1 colony forming unit ml-1) at 5{\textdegree}C, and starvation (i.e. maintenance of culturability of the population) at 18{\textdegree}C. The latter remained rod shaped and, after 40~days{\textquoteright} incubation, presented a 47{\textendash}58\% reduction in the number of cells attached to chitin, a 48{\textendash}53\% reduction in the number of bacteria adhering to copepods, and a 48{\textendash}54\% reduction in the number of bacteria adhering to human cultured intestinal cells, compared to control cells not suspended in ASW. Bacteria suspended in ASW at 5{\textdegree}C became coccoid and, after 40~days, showed 34{\textendash}42\% fewer cells attached to chitin, 52{\textendash}55\% fewer adhering to copep-ods, and 45{\textendash}48\% fewer cells adhering to intestinal cell monolayers, compared to controls. Sarkosyl-insoluble membrane proteins that bind chitin particles were isolated and analysed by SDS-PAGE. After 40~days incubation in ASW at both 5{\textdegree}C and 18{\textdegree}C vibrios expressed chitin-binding ligands similar to bacteria harvested in the stationary growth phase. It is concluded that as vibrios do not lose adhesive properties after long-term exposure to ASW, it is important to include methods for VBNC bacteria when testing environmental and clinical samples for purposes of public health safety.}, isbn = {1462-2920}, author = {Pruzzo, Carla and Tarsi, Renato and Del Mar Lle{\`o}, Maria and Signoretto, Caterina and Zampini, Massimiliano and Pane, Luigi and Rita R. Colwell and Canepari, Pietro} } @article {38431, title = {Phylogenetic analysis reveals five independent transfers of the chloroplast gene {\i}t rbcL to the mitochondrial genome in angiosperms}, journal = {Curr GenetCurr Genet}, volume = {43}, year = {2003}, type = {10.1007/s00294-003-0378-3}, abstract = {We used the chloroplast gene rbcL as a model to study the frequency and relative timing of transfer of chloroplast sequences to the mitochondrial genome. Southern blot survey of 20 mitochondrial DNAs confirmed three previously reported groups of plants containing rbcL in their mitochondrion, while PCR studies identified a new mitochondrial rbcL. Published and newly determined mitochondrial and chloroplast rbcL sequences were used to reconstruct rbcL phylogeny. The results imply five or six separate interorganellar transfers of rbcL among the angiosperms examined, and hundreds of successful transfers across all flowering plants. By taxonomic criteria, the crucifer transfer is the most ancient, two separate transfers within the grass family are of intermediate ancestry, and the morning-glory transfer is most recent. All five mitochondrial copies of rbcL examined exhibit insertion and/or deletion events that disrupt the reading frame (three are grossly truncated); and all are elevated in the proportion of nonsynonymous substitutions, providing clear evidence that these sequences are pseudogenes.}, author = {Michael P. Cummings and Nugent, J. M. and Olmstead, R. G. and Palmer, J. D.} } @article {38445, title = {Predictability of Vibrio Cholerae in Chesapeake Bay}, journal = {Applied and Environmental MicrobiologyAppl. Environ. Microbiol.Applied and Environmental MicrobiologyAppl. Environ. Microbiol.}, volume = {69}, year = {2003}, type = {10.1128/AEM.69.5.2773-2785.2003}, abstract = {Vibrio cholerae is autochthonous to natural waters and can pose a health risk when it is consumed via untreated water or contaminated shellfish. The correlation between the occurrence of V. cholerae in Chesapeake Bay and environmental factors was investigated over a 3-year period. Water and plankton samples were collected monthly from five shore sampling sites in northern Chesapeake Bay (January 1998 to February 2000) and from research cruise stations on a north-south transect (summers of 1999 and 2000). Enrichment was used to detect culturable V. cholerae, and 21.1\% (n = 427) of the samples were positive. As determined by serology tests, the isolates, did not belong to serogroup O1 or O139 associated with cholera epidemics. A direct fluorescent-antibody assay was used to detect V. cholerae O1, and 23.8\% (n = 412) of the samples were positive. V. cholerae was more frequently detected during the warmer months and in northern Chesapeake Bay, where the salinity is lower. Statistical models successfully predicted the presence of V. cholerae as a function of water temperature and salinity. Temperatures above 19{\textdegree}C and salinities between 2 and 14 ppt yielded at least a fourfold increase in the number of detectable V. cholerae. The results suggest that salinity variation in Chesapeake Bay or other parameters associated with Susquehanna River inflow contribute to the variability in the occurrence of V. cholerae and that salinity is a useful indicator. Under scenarios of global climate change, increased climate variability, accompanied by higher stream flow rates and warmer temperatures, could favor conditions that increase the occurrence of V. cholerae in Chesapeake Bay.}, isbn = {0099-2240, 1098-5336}, author = {Louis, Val{\'e}rie R. and Russek-Cohen, Estelle and Choopun, Nipa and Rivera, Irma N. G. and Gangle, Brian and Jiang, Sunny C. and Rubin, Andrea and Patz, Jonathan A. and Huq, Anwar and Rita R. Colwell} } @article {38430, title = {Phylogenetic analysis based on 18S ribosomal RNA gene sequences supports the existence of class Polyacanthocephala (Acanthocephala)}, journal = {Mol Phylogenet EvolMol Phylogenet Evol}, volume = {23}, year = {2002}, type = {10.1016/S1055-7903(02)00020-9}, abstract = {Members of phylum Acanthocephala are parasites of vertebrates and arthropods and are distributed worldwide. The phylum has traditionally been divided into three classes, Archiacanthocephala, Palaeacanthocephala, and Eoacanthocephala; a fourth class, Polyacanthocephala, has been recently proposed. However, erection of this new class, based on morphological characters, has been controversial. We sequenced the near complete 18S rRNA gene of Polyacanthorhynchus caballeroi (Polyacanthocephala) and Rhadinorhynchus sp. (Palaeacanthocephala); these sequences were aligned with another 21 sequences of acanthocephalans representing the three widely recognized classes of the phylum and with 16 sequences from outgroup taxa. Phylogenetic relationships inferred by maximum-likelihood and maximum-parsimony analyses showed Archiacanthocephala as the most basal group within the phylum, whereas classes Polyacanthocephala + Eoacanthocephala formed a monophyletic clade, with Palaeacanthocephala as its sister group. These results are consistent with the view of Polyacanthocephala representing an independent class within Acanthocephala.}, author = {Garc{\'\i}a-Varela, M. and Michael P. Cummings and P{\'e}rez-Ponce de Le{\'o}n, G. and Gardner, S. L. and Laclette, J. P.} } @article {38447, title = {Predicting Transcription Factor Synergism}, journal = {Nucleic Acids ResearchNucl. Acids Res.Nucleic Acids ResearchNucl. Acids Res.}, volume = {30}, year = {2002}, type = {10.1093/nar/gkf535}, abstract = {Transcriptional regulation is mediated by a battery of transcription factor (TF) proteins, that form complexes involving protein{\textendash}protein and protein{\textendash}DNA interactions. Individual TFs bind to their cognate cis-elements or transcription factor-binding sites (TFBS). TFBS are organized on the DNA proximal to the gene in groups confined to a few hundred base pair regions. These groups are referred to as modules. Various modules work together to provide the combinatorial regulation of gene transcription in response to various developmental and environmental conditions. The sets of modules constitute a promoter model. Determining the TFs that preferentially work in concert as part of a module is an essential component of understanding transcriptional regulation. The TFs that act synergistically in such a fashion are likely to have their cis-elements co-localized on the genome at specific distances apart. We exploit this notion to predict TF pairs that are likely to be part of a transcriptional module on the human genome sequence. The computational method is validated statistically, using known interacting pairs extracted from the literature. There are 251 TFBS pairs up to 50 bp apart and 70 TFBS pairs up to 200 bp apart that score higher than any of the known synergistic pairs. Further investigation of 50 pairs randomly selected from each of these two sets using PubMed queries provided additional supporting evidence from the existing biological literature suggesting TF synergism for these novel pairs.}, isbn = {0305-1048, 1362-4962}, author = {Sridhar Hannenhalli and Levy, Samuel} } @article {38449, title = {Proceedings of the sixth annual international conference on Computational biology}, year = {2002}, publisher = {ACM}, author = {Myers, G. and Sridhar Hannenhalli and Sankoff, D. and Istrail, S. and Pevzner, P. and Waterman, M.} } @article {38454, title = {Purification and properties of the extracellular lipase, LipA, of Acinetobacter sp. RAG-1}, journal = {European Journal of BiochemistryEuropean Journal of Biochemistry}, volume = {269}, year = {2002}, type = {10.1046/j.1432-1033.2002.03235.x}, abstract = {An extracellular lipase, LipA, extracted from Acinetobacter sp. RAG-1 grown on hexadecane was purified and properties of the enzyme investigated. The enzyme is released into the growth medium during the transition to stationary phase. The lipase was harvested from cells grown to stationary phase, and purified with 22\% yield and > 10-fold purification. The protein demonstrates little affinity for anion exchange resins, with contaminating proteins removed by passing crude supernatants over a Mono Q column. The lipase was bound to a butyl Sepharose column and eluted in a Triton~X-100 gradient. The molecular mass (33~kDa) was determined employing SDS/PAGE. LipA was found to be stable at pH~5.8{\textendash}9.0, with optimal activity at 9.0. The lipase remained active at temperatures up to 70~{\textdegree}C, with maximal activity observed at 55~{\textdegree}C. LipA is active against a wide range of fatty acid esters of p-nitrophenyl, but preferentially attacks medium length acyl chains (C6, C8). The enzyme demonstrates hydrolytic activity in emulsions of both medium and long chain triglycerides, as demonstrated by zymogram analysis. RAG-1 lipase is stabilized by Ca2+, with no loss in activity observed in preparations containing the cation, compared to a 70\% loss over 30~h without Ca2+. The lipase is strongly inhibited by EDTA, Hg2+, and Cu2+, but shows no loss in activity after incubation with other metals or inhibitors examined in this study. The protein retains more than 75\% of its initial activity after exposure to organic solvents, but is rapidly deactivated by pyridine. RAG-1 lipase offers potential for use as a biocatalyst.}, keywords = {Acinetobacter sp. RAG-1, LipA, lipase, protein purification, zymogram}, isbn = {1432-1033}, author = {Snellman, Erick A. and Sullivan, Elise R. and Rita R. Colwell} } @article {38451, title = {Promoter prediction in the human genome}, journal = {BioinformaticsBioinformatics}, volume = {17}, year = {2001}, type = {10.1093/bioinformatics/17.suppl_1.S90}, abstract = {Computational prediction of eukaryotic polII promoters has been one of the most elusive problems despite considerable effort devoted to the study. Researchers have looked for various types of signals around the transcriptional start site (TSS), viz. oligo-nucleotide statistics, potential binding sites for core factors, clusters of binding sites, proximity to CpG islands etc.. The proximity of CpG islands to gene starts is now a well established fact, although until recently, it was based on very little genomic data. In this work we explore the possibility of enhancing the promoter prediction accuracy by combining CpG island information with a few other, biologically motivated, seemingly independent signals, that cover most of the known knowledge. We benchmarked the method on a much larger genomic datasets compared to previous studies. We were able to improve slightly upon current prediction accuracy. Furthermore, we observe that CpG islands are the most dominant signals and the other signals do not improve the prediction. This suggests that the computational prediction of promoters for genes with no associated CpG-island (typically having tissue-specific expression) looking only at the immediate neighborhood of the TSS may not even be possible. We suggest some biological experiments and studies to better understand the biology of transcription.}, isbn = {1367-4803, 1460-2059}, author = {Sridhar Hannenhalli and Levy, S.} } @article {38433, title = {Phylogenetic relationships of Acanthocephala based on analysis of 18S ribosomal RNA gene sequences}, journal = {J Mol EvolJ Mol Evol}, volume = {50}, year = {2000}, abstract = {Acanthocephala (thorny-headed worms) is a phylum of endoparasites of vertebrates and arthropods, included among the most phylogenetically basal tripoblastic pseudocoelomates. The phylum is divided into three classes: Archiacanthocephala, Palaeacanthocephala, and Eoacanthocephala. These classes are distinguished by morphological characters such as location of lacunar canals, persistence of ligament sacs in females, number and type of cement glands in males, number and size of proboscis hooks, host taxonomy, and ecology. To understand better the phylogenetic relationships within Acanthocephala, and between Acanthocephala and Rotifera, we sequenced the nearly complete 18S rRNA genes of nine species from the three classes of Acanthocephala and four species of Rotifera from the classes Bdelloidea and Monogononta. Phylogenetic relationships were inferred by maximum-likelihood analyses of these new sequences and others previously determined. The analyses showed that Acanthocephala is the sister group to a clade including Eoacanthocephala and Palaeacanthocephala. Archiacanthocephala exhibited a slower rate of evolution at the nucleotide level, as evidenced by shorter branch lengths for the group. We found statistically significant support for the monophyly of Rotifera, represented in our analysis by species from the clade Eurotatoria, which includes the classes Bdelloidea and Monogononta. Eurotatoria also appears as the sister group to Acanthocephala.}, author = {Garc{\'\i}a-Varela, M. and P{\'e}rez-Ponce de Le{\'o}n, G. and de la Torre, P. and Michael P. Cummings and Sarma, S. S. and Laclette, J. P.} } @article {38434, title = {Phylogenetic relationships of {\i}t Phytophthora species based on ribosomal ITS I DNA sequence analysis with emphasis on Waterhouse groups V and VI}, journal = {Mycol ResMycol Res}, volume = {104}, year = {2000}, abstract = {Phylogenetic relationships among Phytophthora species were investigated by sequence analysis of the internal transcribed spacer region I of the ribosomal DNA repeat unit. The extensive collection of isolates included taxa from all six morphological groups recognized by Waterhouse (1963) including molecular groups previously identified using isozymes and mtDNA restriction fragment length polymorphisms. Similar to previous studies, the inferred relationships indicated that molecular groups of P. cryptooea/drechsleri-like and P. megasperma-like taxa are polyphyletic. Morphological groups V and VI, which are differentiated by the presence of amphigynous or paragynous antheridia, are not monophyletic: species of the two groups are interspersed in the tree. Species with papillate and semi-papillate sporangia (groups I-IV) clustered together and this cluster was distinct from those of species with non-papillate sporangia. There was no congruence between the mode of antheridial attachment, sporangial caducity, or homo- or heterothallic habit and the molecular grouping of the species. Our study provides evidence that the antheridial position together with homo- or heterothallic habit does not reflect phylogenetic relationships within Phytophthora. Consequently, confirming studies done previously (Cooke \& Duncan 1997), this study provides evidence that the morphological characters used in Phytophthora taxonomy are of limited value for deducing phylogenetic relationships, because they exhibit convergent evolution.}, author = {F{\"o}rster, H. and Michael P. Cummings and Coffey, M. D.} } @article {49690, title = {Pre-messenger RNA processing factors in the Drosophila genome.}, journal = {J Cell Biol}, volume = {150}, year = {2000}, month = {2000 Jul 24}, pages = {F37-44}, keywords = {Animals, Drosophila melanogaster, Genome, Genomic Library, HUMANS, RNA Precursors, RNA, Messenger}, issn = {0021-9525}, author = {Mount, S M and Salz, H K} } @article {38423, title = {Pathogenic mechanisms in ischemic damage: a computational study}, journal = {Computers in biology and medicineComputers in biology and medicine}, volume = {29}, year = {1999}, author = {Ruppin, E. and Ofer, E. and Reggia, James A. and Revett, K.} } @article {38428, title = {Penumbral tissue damage following acute stroke: a computational investigation}, journal = {Progress in brain researchProgress in brain research}, volume = {121}, year = {1999}, author = {Ruppin, E. and Revett, K. and Ofer, E. and Goodall, S. and Reggia, James A.} } @article {38435, title = {Phylogenetic relationships of platyhelminthes based on 18S ribosomal gene sequences}, journal = {Mol Phylogenet EvolMol Phylogenet Evol}, volume = {10}, year = {1998}, type = {10.1006/mpev.1997.0483}, abstract = {Nucleotide sequences of 18S ribosomal RNA from 71 species of Platyhelminthes, the flatworms, were analyzed using maximum likelihood, and the resulting phylogenetic trees were compared with previous phylogenetic hypotheses. Analyses including 15 outgroup species belonging to eight other phyla show that Platyhelminthes are monophyletic with the exception of a sequence putatively from Acoela sp., Lecithoepitheliata, Polycladida, Tricladida, Trematoda (Aspidobothrii + Digenea), Monogenea, and Cestoda (Gyrocotylidea + Amphilinidea + Eucestoda) are monophyletic groups. Catenulids form the sister group to the rest of platyhelminths, whereas a complex clade formed by Acoela, Tricladida, "Dalyellioida", and perhaps "Typhloplanoida" is sister to Neodermata. "Typhloplanoida" does not appear to be monophyletic; Fecampiida does not appear to belong within "Dalyellioida," nor Kalyptorhynchia within "Typhloplanoida." Trematoda is the sister group to the rest of Neodermata, and Monogenea is sister group to Cestoda. Within Trematoda, Aspidobothrii is the sister group of Digenea and Heronimidae is the most basal family in Digenea. Our trees support the hypothesis that parasitism evolved at least twice in Platyhelminthes, once in the ancestor to Neodermata and again in the ancestor of Fecampiida, independently to the ancestor of putatively parasitic "Dalyellioida."}, author = {Campos, A. and Michael P. Cummings and Reyes, J. L. and Laclette, J. P.} } @article {38436, title = {Pigment composition of putatively achlorophyllous angiosperms}, journal = {Plant Syst EvolPlant Syst Evol}, volume = {210}, year = {1998}, abstract = {Chlorophyll and carotenoid pigment composition was determined for ten species of putatively achlorophyllous angiosperms using high-performance liquid chromatography. Four families were represented: Lennoaceae (Pholisma arenarium); Monotropaceae (Allotropa virgata, Monotropa uniflora, Pterospora andromedea, Sarcodes sanguinea); Orobanchaceae (Epifagus virginiana, Orobanche cooperi, O. uniflora); Orchidaceae (Cephalanthera austinae, Corallorhiza maculata). Chlorophyll a was detected in all tars, but chlorophyll b was only detected in Corallorhiza maculata. The relative amount of chlorophyll and chlorophyll-related pigments in these plants is greatly reduced compared to fully autotrophic angiosperms.}, keywords = {Angiospermae, carotenoid, chlorophyll, high-performance liquid chromatography, Lennoaceae, Monotropaceae, Orchidaceae, Orobanchaceae, pigment}, author = {Michael P. Cummings and Welschmeyer, N. A.} } @article {38422, title = {Pathogenesis of schizophrenic delusions and hallucinations: a neural model}, journal = {Schizophrenia bulletinSchizophrenia Bulletin}, volume = {22}, year = {1996}, author = {Ruppin, E. and Reggia, James A. and Horn, D.} } @article {38439, title = {Polynomial-time algorithm for computing translocation distance between genomes}, journal = {Discrete Applied MathematicsDiscrete Applied Mathematics}, volume = {71}, year = {1996}, type = {10.1016/S0166-218X(96)00061-3}, abstract = {With the advent of large-scale DNA physical mapping and sequencing, studies of genome rearrangements are becoming increasingly important in evolutionary molecular biology. From a computational perspective, the study of evolution based on rearrangements leads to a rearrangement distance problem, i.e., computing the minimum number of rearrangement events required to transform one genome into another. Different types of rearrangement events give rise to a spectrum of interesting combinatorial problems. The complexity of most of these problems is unknown. Multichromosomal genomes frequently evolve by a rearrangement event called translocation which exchanges genetic material between different chromosomes. In this paper we study the translocation distance problem, modeling the evolution of genomes evolving by translocations. The translocation distance problem was recently studied for the first time by Kececioglu and Ravi, who gave a 2-approximation algorithm for computing translocation distance. In this paper we prove a duality theorem leading to a polynomial time algorithm for computing translocation distance for the case when the orientations of the genes are known. This leads to an algorithm generating a most parsimonious (shortest) scenario, transforming one genome into another by translocations.}, isbn = {0166-218X}, author = {Sridhar Hannenhalli} } @article {38442, title = {Positional sequencing by hybridization}, journal = {Computer applications in the biosciences : CABIOSComputer applications in the biosciences : CABIOS}, volume = {12}, year = {1996}, type = {10.1093/bioinformatics/12.1.19}, abstract = {Sequencing by hybridization (SBH) is a promising alternative to the classical DNA sequencing approaches. However, the resolving power of SBH is rather low: with 64kb sequencing chips, unknown DNA fragments only as long as 200 bp can be reconstructed in a single SBH experiment. To improve the resolving power of SBH, positional SBH (PSBH) has recently been suggested; this allows (with additional experimental work) approximate positions of every l-tuple in a target DNA fragment to be measured. We study the positional Eulerian path problem motivated by PSBH. The input to the positional eulerian path problem is an Eulerian graph G( V, E) in which every edge has an associated range of integers and the problem is to find an Eulerian path el, {\textellipsis}, e|E| in G such that the range of ei, contains i. We show that the positional Eulerian path problem is NP-complete even when the maximum out-degree (in-degree) of any vertex in the graph is 2. On a positive note we present polynomial algorithms to solve a special case of PSBH (bounded PSBH), where the range of the allowed positions for any edge is bounded by a constant (it corresponds to accurate experimental measurements of positions in PSBH). Moreover, if the positions of every l-tuple in an unknown DNA fragment of length n are measured with O(log n) error, then our algorithm runs in polynomial time. We also present an estimate of the resolving power of PSBH for a more realistic case when positions are measured with Θ(n) error.}, author = {Sridhar Hannenhalli and Feldman, William and Lewis, Herbert F. and Skiena, Steven S. and Pevzner, Pavel A.} } @article {38426, title = {Patterns of functional damage in neural network models of associative memory}, journal = {Neural computationNeural computation}, volume = {7}, year = {1995}, author = {Ruppin, E. and Reggia, James A.} } @article {38438, title = {Polynomial-time algorithm for computing translocation distance between genomes}, journal = {Combinatorial Pattern MatchingCombinatorial Pattern Matching}, year = {1995}, publisher = {Springer}, author = {Sridhar Hannenhalli} } @article {49699, title = {P element-mediated in vivo deletion analysis of white-apricot: deletions between direct repeats are strongly favored.}, journal = {Genetics}, volume = {136}, year = {1994}, month = {1994 Mar}, pages = {1001-11}, abstract = {

We have isolated and characterized deletions arising within a P transposon, P[hswa], in the presence of P transposase. P[hswa] carries white-apricot (wa) sequences, including a complete copia element, under the control of an hsp70 promoter, and resembles the original wa allele in eye color phenotype. In the presence of P transposase, P[hswa] shows a high overall rate (approximately 3\%) of germline mutations that result in increased eye pigmentation. Of 234 derivatives of P[hswa] with greatly increased eye pigmentation, at least 205 carried deletions within copia. Of these, 201 were precise deletions between the directly repeated 276-nucleotide copia long terminal repeats (LTRs), and four were unique deletions. High rates of transposase-induced precise deletion were observed within another P transposon carrying unrelated 599 nucleotide repeats (yeast 2 mu FLP; recombinase target sites) separated by 5.7 kb. Our observation that P element-mediated deletion formation occurs preferentially between direct repeats suggests general methods for controlling deletion formation.

}, keywords = {Alleles, Animals, Animals, Genetically Modified, Base Sequence, Crosses, Genetic, DNA, DNA Transposable Elements, Drosophila, Eye Color, Female, Genes, Insect, Male, Molecular Sequence Data, Nucleotidyltransferases, PHENOTYPE, Recombination, Genetic, Repetitive Sequences, Nucleic Acid, Sequence Deletion, Transformation, Genetic, Transposases}, issn = {0016-6731}, author = {Kurkulos, M and Weinberg, J M and Roy, D and Mount, S M} } @article {49522, title = {Patterns and processes of sequence evolution: plant organelle genomes and copia-like retrotransposons}, year = {1992}, type = {phd}, author = {Michael P. Cummings} } @proceedings {38419, title = {Parallel transitive closure computations using topological sort}, year = {1991}, month = {1991}, publisher = {IEEE}, type = {10.1109/PDIS.1991.183079}, abstract = {Deals with parallel transitive closure computations. The sort-based approaches introduced sorts the tuples of the relation into topological order, and the sorted relation is then horizontally partitioned and distributed across several processing nodes of a message passing multiprocessor system. This data partitioning strategy allows the transitive closure computation of the local data fragments to be computed in parallel with no interprocessor communication. The construction of the final result then requires only a small number of joins. Extensive analytical results are included in the paper as well. They show that the proposed techniques leads to a speedup that is essentially linear with the number of processors. Its performance is significantly better than the recently published hashless parallel algorithm}, keywords = {Computer science, Concurrent computing, data partitioning, Database systems, database theory, deductive databases, File systems, horizontal partitioning, joins, local data fragments, message passing multiprocessor system, Multiprocessing systems, Parallel algorithms, PARALLEL PROCESSING, parallel programming, parallel transitive closure, processing nodes, relation tuples, Relational databases, sorting, topological sort}, isbn = {0-8186-2295-4}, author = {Hua, K. A. and Sridhar Hannenhalli} } @article {49703, title = {Polyadenylylation in copia requires unusually distant upstream sequences.}, journal = {Proc Natl Acad Sci U S A}, volume = {88}, year = {1991}, month = {1991 Apr 15}, pages = {3038-42}, abstract = {

Retroviruses and related genetic elements generate terminally redundant RNA products by differential polyadenylylation within a long terminal repeat. Expression of the white-apricot (wa) allele of Drosophila melanogaster, which carries an insertion of the 5.1-kilobase retrovirus-like transposable element copia in a small intron, is influenced by signals within copia. By using this indicator, we have isolated a 518-base-pair deletion, 312 base pairs upstream of the copia polyadenylylation site, that is phenotypically like much larger deletions and eliminates RNA species polyadenylylated in copia. This requirement of distant upstream sequences for copia polyadenylylation has implications for the expression of many genetic elements bearing long terminal repeats.

}, keywords = {Animals, Base Sequence, Blotting, Northern, DNA Transposable Elements, Drosophila melanogaster, Eye Color, Molecular Sequence Data, Oligonucleotides, Polymerase Chain Reaction, Regulatory Sequences, Nucleic Acid, Repetitive Sequences, Nucleic Acid, RNA Processing, Post-Transcriptional, RNA, Messenger}, issn = {0027-8424}, author = {Kurkulos, M and Weinberg, J M and Pepling, M E and Mount, S M} } @article {49708, title = {Partial revertants of the transposable element-associated suppressible allele white-apricot in Drosophila melanogaster: structures and responsiveness to genetic modifiers.}, journal = {Genetics}, volume = {118}, year = {1988}, month = {1988 Feb}, pages = {221-34}, abstract = {

The eye color phenotype of white-apricot (wa), a mutant allele of the white locus caused by the insertion of the transposable element copia into a small intron, is suppressed by the extragenic suppressor suppressor-of-white-apricot (su(wa] and enhanced by the extragenic enhancers suppressor-of-forked su(f] and Enhancer-of-white-apricot (E(wa]. Derivatives of wa have been analyzed molecularly and genetically in order to correlate the structure of these derivatives with their response to modifiers. Derivatives in which the copia element is replaced precisely by a solo long terminal repeat (sLTR) were generated in vitro and returned to the germline by P-element mediated transformation; flies carrying this allele within a P transposon show a nearly wild-type phenotype and no response to either su(f) or su(wa). In addition, eleven partial phenotypic revertants of wa were analyzed. Of these, one appears to be a duplication of a large region which includes wa, three are new alleles of su(wa), two are sLTR derivatives whose properties confirm results obtained using transformation, and five are secondary insertions into the copia element within wa. One of these, waR84h, differs from wa by the insertion of the most 3{\textquoteright} 83 nucleotides of the I factor. The five insertion derivatives show a variety of phenotypes and modes of interaction with su[f) and su(wa). The eye pigmentation of waR84h is affected by su(f) and E(wa), but not su(wa). These results demonstrate that copia (as opposed to the interruption of white sequences) is essential for the wa phenotype and its response to genetic modifiers, and that there are multiple mechanisms for the alteration of the wa phenotype by modifiers.

}, keywords = {Alleles, Animals, Base Sequence, DNA Transposable Elements, Drosophila melanogaster, Enhancer Elements, Genetic, GENOTYPE, Molecular Sequence Data, Mutation, Suppression, Genetic}, issn = {0016-6731}, author = {Mount, S M and Green, M M and Rubin, G M} } @article {49715, title = {Pseudogenes for human small nuclear RNA U3 appear to arise by integration of self-primed reverse transcripts of the RNA into new chromosomal sites.}, journal = {Cell}, volume = {32}, year = {1983}, month = {1983 Feb}, pages = {461-72}, abstract = {

We find that both human and rat U3 snRNA can function as self-priming templates for AMV reverse transcriptase in vitro. The 74 base cDNA is primed by the 3{\textquoteright} end of intact U3 snRNA, and spans the characteristically truncated 69 or 70 base U3 sequence found in four different human U3 pseudogenes. The ability of human and rat U3 snRNA to self-prime is consistent with a U3 secondary structure model derived by a comparison between rat U3 snRNA and the homologous D2 snRNA from Dictyostelium discoideum. We propose that U3 pseudogenes are generated in vivo by integration of a self-primed cDNA copy of U3 snRNA at new chromosomal sites. We also consider the possibility that the same cDNA mediates gene conversion at the 5{\textquoteright} end of bona fide U3 genes where, over the entire region spanned by the U3 cDNA, the two rat U3 sequence variants U3A and U3B are identical.

}, keywords = {Animals, Base Sequence, DNA, genes, HUMANS, Nucleic Acid Conformation, Rats, Recombination, Genetic, Repetitive Sequences, Nucleic Acid, RNA, RNA, Small Nuclear, RNA-Directed DNA Polymerase, Templates, Genetic, Transcription, Genetic}, issn = {0092-8674}, author = {Bernstein, L B and Mount, S M and Weiner, A M} }