@article {49649, title = {Genome-wide analysis reveals novel genes essential for heme homeostasis in Caenorhabditis elegans.}, journal = {PLoS Genet}, volume = {6}, year = {2010}, month = {2010 Jul}, pages = {e1001044}, abstract = {

Heme is a cofactor in proteins that function in almost all sub-cellular compartments and in many diverse biological processes. Heme is produced by a conserved biosynthetic pathway that is highly regulated to prevent the accumulation of heme--a cytotoxic, hydrophobic tetrapyrrole. Caenorhabditis elegans and related parasitic nematodes do not synthesize heme, but instead require environmental heme to grow and develop. Heme homeostasis in these auxotrophs is, therefore, regulated in accordance with available dietary heme. We have capitalized on this auxotrophy in C. elegans to study gene expression changes associated with precisely controlled dietary heme concentrations. RNA was isolated from cultures containing 4, 20, or 500 microM heme; derived cDNA probes were hybridized to Affymetrix C. elegans expression arrays. We identified 288 heme-responsive genes (hrgs) that were differentially expressed under these conditions. Of these genes, 42\% had putative homologs in humans, while genomes of medically relevant heme auxotrophs revealed homologs for 12\% in both Trypanosoma and Leishmania and 24\% in parasitic nematodes. Depletion of each of the 288 hrgs by RNA-mediated interference (RNAi) in a transgenic heme-sensor worm strain identified six genes that regulated heme homeostasis. In addition, seven membrane-spanning transporters involved in heme uptake were identified by RNAi knockdown studies using a toxic heme analog. Comparison of genes that were positive in both of the RNAi screens resulted in the identification of three genes in common that were vital for organismal heme homeostasis in C. elegans. Collectively, our results provide a catalog of genes that are essential for metazoan heme homeostasis and demonstrate the power of C. elegans as a genetic animal model to dissect the regulatory circuits which mediate heme trafficking in both vertebrate hosts and their parasites, which depend on environmental heme for survival.

}, keywords = {Animals, Caenorhabditis elegans, Dose-Response Relationship, Drug, Gene Expression Profiling, Gene Expression Regulation, genes, Genome-Wide Association Study, Heme, Homeostasis, HUMANS, Leishmania, Nematoda, Trypanosoma}, issn = {1553-7404}, doi = {10.1371/journal.pgen.1001044}, author = {Severance, Scott and Rajagopal, Abbhirami and Rao, Anita U and Cerqueira, Gustavo C and Mitreva, Makedonka and El-Sayed, Najib M and Krause, Michael and Hamza, Iqbal} } @article {38290, title = {Genome assortment, not serogroup, defines Vibrio cholerae pandemic strains}, journal = {NatureNature}, year = {2009}, abstract = {Vibrio cholerae, the causative agent of cholera, is a bacterium autochthonous to the aquatic environment, and a serious public health threat. V. cholerae serogroup O1 is responsible for the previous two cholera pandemics, in which classical and El Tor biotypes were dominant in the 6th and the current 7th pandemics, respectively. Cholera researchers continually face newly emerging and re-emerging pathogenic clones carrying combinations of new serogroups as well as of phenotypic and genotypic properties. These genotype and phenotype changes have hampered control of the disease. Here we compare the complete genome sequences of 23 strains of V. cholerae isolated from a variety of sources and geographical locations over the past 98 years in an effort to elucidate the evolutionary mechanisms governing genetic diversity and genesis of new pathogenic clones. The genome-based phylogeny revealed 12 distinct V. cholerae phyletic lineages, of which one, designated the V. cholerae core genome (CG), comprises both O1 classical and EI Tor biotypes. All 7th pandemic clones share nearly identical gene content, i.e., the same genome backbone. The transition from 6th to 7th pandemic strains is defined here as a {\textquoteright}shift{\textquoteright} between pathogenic clones belonging to the same O1 serogroup, but from significantly different phyletic lineages within the CG clade. In contrast, transition among clones during the present 7th pandemic period can be characterized as a {\textquoteright}drift{\textquoteright} between clones, differentiated mainly by varying composition of laterally transferred genomic islands, resulting in emergence of variants, exemplified by V.cholerae serogroup O139 and V.cholerae O1 El Tor hybrid clones that produce cholera toxin of classical biotype. Based on the comprehensive comparative genomics presented in this study it is concluded that V. cholerae undergoes extensive genetic recombination via lateral gene transfer, and, therefore, genome assortment, not serogroup, should be used to define pathogenic V. cholerae clones.}, keywords = {59, CHOLERA, genes, Genetics, GENOTYPE, ISLANDS, ORIGIN, PHENOTYPE, PUBLIC HEALTH, recombination, STRAINS, Toxins}, author = {Brettin, Thomas S. and Bruce, David C. and Challacombe, Jean F. and Detter, John C. and Han, Cliff S. and Munik, A. C. and Chertkov, Olga and Meincke, Linda and Saunders, Elizabeth and Choi, Seon Y. and Haley, Bradd J. and Taviani, Elisa and Jeon, Yoon-Seong and Kim, Dong Wook and Lee, Jae-Hak and Walters, Ronald A. and Hug, Anwar and Rita R. Colwell} } @proceedings {38343, title = {Inexact Local Alignment Search over Suffix Arrays}, year = {2009}, month = {2009}, publisher = {IEEE}, type = {10.1109/BIBM.2009.25}, abstract = {We describe an algorithm for finding approximate seeds for DNA homology searches. In contrast to previous algorithms that use exact or spaced seeds, our approximate seeds may contain insertions and deletions. We present a generalized heuristic for finding such seeds efficiently and prove that the heuristic does not affect sensitivity. We show how to adapt this algorithm to work over the memory efficient suffix array with provably minimal overhead in running time. We demonstrate the effectiveness of our algorithm on two tasks: whole genome alignment of bacteria and alignment of the DNA sequences of 177 genes that are orthologous in human and mouse. We show our algorithm achieves better sensitivity and uses less memory than other commonly used local alignment tools.}, keywords = {bacteria, Bioinformatics, biology computing, Computational Biology, Costs, DNA, DNA homology searches, DNA sequences, Educational institutions, generalized heuristic, genes, Genetics, genome alignment, Genomics, human, inexact local alignment search, inexact seeds, local alignment, local alignment tools, memory efficient suffix array, microorganisms, molecular biophysics, mouse, Organisms, Sensitivity and Specificity, sequences, suffix array, USA Councils}, isbn = {978-0-7695-3885-3}, author = {Ghodsi, M. and M. Pop} } @proceedings {38218, title = {Dynamic querying for pattern identification in microarray and genomic data}, volume = {3}, year = {2003}, month = {2003}, publisher = {IEEE}, type = {10.1109/ICME.2003.1221346}, abstract = {Data sets involving linear ordered sequences are a recurring theme in bioinformatics. Dynamic query tools that support exploration of these data sets can be useful for identifying patterns of interest. This paper describes the use of one such tool - timesearcher - to interactively explore linear sequence data sets taken from two bioinformatics problems. Microarray time course data sets involve expression levels for large numbers of genes over multiple time points. Timesearcher can be used to interactively search these data sets for genes with expression profiles of interest. The occurrence frequencies of short sequences of DNA in aligned exons can be used to identify sequences that play a role in the pre-mRNA splicing. Timesearcher can be used to search these data sets for candidate splicing signals.}, keywords = {Bioinformatics, data sets, Displays, dynamic querying, expression profiles, Frequency, Gene expression, genes, Genetics, genomic data, Genomics, linear ordered sequences, macromolecules, medical signal processing, Mice, Microarray, pattern identification, pattern recognition, premRNA splicing, Query processing, sequences, Signal processing, splicing, TimeSearcher}, isbn = {0-7803-7965-9}, author = {Hochheiser, H. and Baehrecke, E. H. and Stephen M. Mount and Shneiderman, Ben} } @article {49687, title = {The draft genome of Ciona intestinalis: insights into chordate and vertebrate origins.}, journal = {Science}, volume = {298}, year = {2002}, month = {2002 Dec 13}, pages = {2157-67}, abstract = {

The first chordates appear in the fossil record at the time of the Cambrian explosion, nearly 550 million years ago. The modern ascidian tadpole represents a plausible approximation to these ancestral chordates. To illuminate the origins of chordate and vertebrates, we generated a draft of the protein-coding portion of the genome of the most studied ascidian, Ciona intestinalis. The Ciona genome contains approximately 16,000 protein-coding genes, similar to the number in other invertebrates, but only half that found in vertebrates. Vertebrate gene families are typically found in simplified form in Ciona, suggesting that ascidians contain the basic ancestral complement of genes involved in cell signaling and development. The ascidian genome has also acquired a number of lineage-specific innovations, including a group of genes engaged in cellulose metabolism that are related to those in bacteria and fungi.

}, keywords = {Alleles, Animals, Apoptosis, Base Sequence, Cellulose, Central Nervous System, Ciona intestinalis, Computational Biology, Endocrine System, Gene Dosage, Gene Duplication, genes, Genes, Homeobox, Genome, Heart, Immunity, Molecular Sequence Data, Multigene Family, Muscle Proteins, Organizers, Embryonic, Phylogeny, Polymorphism, Genetic, Proteins, Sequence Analysis, DNA, Sequence Homology, Nucleic Acid, Species Specificity, Thyroid Gland, Urochordata, Vertebrates}, issn = {1095-9203}, doi = {10.1126/science.1080049}, author = {Dehal, Paramvir and Satou, Yutaka and Campbell, Robert K and Chapman, Jarrod and Degnan, Bernard and De Tomaso, Anthony and Davidson, Brad and Di Gregorio, Anna and Gelpke, Maarten and Goodstein, David M and Harafuji, Naoe and Hastings, Kenneth E M and Ho, Isaac and Hotta, Kohji and Huang, Wayne and Kawashima, Takeshi and Lemaire, Patrick and Martinez, Diego and Meinertzhagen, Ian A and Necula, Simona and Nonaka, Masaru and Putnam, Nik and Rash, Sam and Saiga, Hidetoshi and Satake, Masanobu and Terry, Astrid and Yamada, Lixy and Wang, Hong-Gang and Awazu, Satoko and Azumi, Kaoru and Boore, Jeffrey and Branno, Margherita and Chin-Bow, Stephen and DeSantis, Rosaria and Doyle, Sharon and Francino, Pilar and Keys, David N and Haga, Shinobu and Hayashi, Hiroko and Hino, Kyosuke and Imai, Kaoru S and Inaba, Kazuo and Kano, Shungo and Kobayashi, Kenji and Kobayashi, Mari and Lee, Byung-In and Makabe, Kazuhiro W and Manohar, Chitra and Matassi, Giorgio and Medina, Monica and Mochizuki, Yasuaki and Mount, Steve and Morishita, Tomomi and Miura, Sachiko and Nakayama, Akie and Nishizaka, Satoko and Nomoto, Hisayo and Ohta, Fumiko and Oishi, Kazuko and Rigoutsos, Isidore and Sano, Masako and Sasaki, Akane and Sasakura, Yasunori and Shoguchi, Eiichi and Shin-i, Tadasu and Spagnuolo, Antoinetta and Stainier, Didier and Suzuki, Miho M and Tassy, Olivier and Takatori, Naohito and Tokuoka, Miki and Yagi, Kasumi and Yoshizaki, Fumiko and Wada, Shuichi and Zhang, Cindy and Hyatt, P Douglas and Larimer, Frank and Detter, Chris and Doggett, Norman and Glavina, Tijana and Hawkins, Trevor and Richardson, Paul and Lucas, Susan and Kohara, Yuji and Levine, Michael and Satoh, Nori and Rokhsar, Daniel S} } @article {49689, title = {Genomic sequence, splicing, and gene annotation.}, journal = {Am J Hum Genet}, volume = {67}, year = {2000}, month = {2000 Oct}, pages = {788-92}, keywords = {Animals, Consensus Sequence, Exons, genes, Genome, Genomics, HUMANS, Nucleotides, Regulatory Sequences, Nucleic Acid, RNA Splice Sites, RNA Splicing, Untranslated Regions}, issn = {0002-9297}, doi = {10.1086/303098}, author = {Mount, S M} } @article {49704, title = {Drosophila melanogaster genes for U1 snRNA variants and their expression during development.}, journal = {Nucleic Acids Res}, volume = {18}, year = {1990}, month = {1990 Dec 11}, pages = {6971-9}, abstract = {

We have cloned and characterized a complete set of seven U1-related sequences from Drosophila melanogaster. These sequences are located at the three cytogenetic loci 21D, 82E, and 95C. Three of these sequences have been previously studied: one U1 gene at 21D which encodes the prototype U1 sequence (U1a), one U1 gene at 82E which encodes a U1 variant with a single nucleotide substitution (U1b), and a pseudogene at 82E. The four previously uncharacterized genes are another U1b gene at 82E, two additional U1a genes at 95C, and a U1 gene at 95C which encodes a new variant (U1c) with a distinct single nucleotide change relative to U1a. Three blocks of 5{\textquoteright} flanking sequence similarity are common to all six full length genes. Using specific primer extension assays, we have observed that the U1b RNA is expressed in Drosophila Kc cells and is associated with snRNP proteins, suggesting that the U1b-containing snRNP particles are able to participate in the process of pre-mRNA splicing. We have also examined the expression throughout Drosophila development of the two U1 variants relative to the prototype sequence. The U1c variant is undetectable by our methods, while the U1b variant exhibits a primarily embryonic pattern reminiscent of the expression of certain U1 variants in sea urchin, Xenopus, and mouse.

}, keywords = {Animals, Base Sequence, Blotting, Southern, Cloning, Molecular, Drosophila melanogaster, Gene Expression Regulation, genes, Genetic Variation, Molecular Sequence Data, Nucleic Acid Conformation, Pseudogenes, Restriction Mapping, RNA, Small Nuclear}, issn = {0305-1048}, author = {Lo, P C and Mount, S M} } @article {49707, title = {Sequence of a cDNA from the Drosophila melanogaster white gene.}, journal = {Nucleic Acids Res}, volume = {18}, year = {1990}, month = {1990 Mar 25}, pages = {1633}, keywords = {Amino Acid Sequence, Animals, Base Sequence, DNA, Drosophila melanogaster, Eye Color, genes, Molecular Sequence Data}, issn = {0305-1048}, author = {Pepling, M and Mount, S M} } @article {49706, title = {Structure and expression of the Drosophila melanogaster gene for the U1 small nuclear ribonucleoprotein particle 70K protein.}, journal = {Mol Cell Biol}, volume = {10}, year = {1990}, month = {1990 Jun}, pages = {2492-502}, abstract = {

A genomic clone encoding the Drosophila U1 small nuclear ribonucleoprotein particle 70K protein was isolated by hybridization with a human U1 small nuclear ribonucleoprotein particle 70K protein cDNA. Southern blot and in situ hybridizations showed that this U1 70K gene is unique in the Drosophila genome, residing at cytological position 27D1,2. Polyadenylated transcripts of 1.9 and 3.1 kilobases were observed. While the 1.9-kilobase mRNA is always more abundant, the ratio of these two transcripts is developmentally regulated. Analysis of cDNA and genomic sequences indicated that these two RNAs encode an identical protein with a predicted molecular weight of 52,879. Comparison of the U1 70K proteins predicted from Drosophila, human, and Xenopus cDNAs revealed 68\% amino acid identity in the most amino-terminal 214 amino acids, which include a sequence motif common to many proteins which bind RNA. The carboxy-terminal half is less well conserved but is highly charged and contains distinctive arginine-rich regions in all three species. These arginine-rich regions contain stretches of arginine-serine dipeptides like those found in transformer, transformer-2, and suppressor-of-white-apricot proteins, all of which have been identified as regulators of mRNA splicing in Drosophila melanogaster.

}, keywords = {Amino Acid Sequence, Animals, Base Sequence, Blotting, Northern, Blotting, Southern, Cloning, Molecular, DNA, Drosophila melanogaster, Gene expression, Gene Library, genes, HUMANS, Molecular Sequence Data, Molecular Weight, Oligonucleotide Probes, Poly A, Ribonucleoproteins, Ribonucleoproteins, Small Nuclear, RNA, RNA, Messenger, Sequence Homology, Nucleic Acid, Xenopus}, issn = {0270-7306}, author = {Mancebo, R and Lo, P C and Mount, S M} } @article {49709, title = {Sequence similarity.}, journal = {Nature}, volume = {325}, year = {1987}, month = {1987 Feb 5-11}, pages = {487}, keywords = {Adenosine Triphosphate, Amino Acid Sequence, Animals, Bacterial Proteins, Biological Transport, Active, Carrier Proteins, Drosophila melanogaster, genes, HUMANS, Pigments, Biological, Sequence Homology, Nucleic Acid}, issn = {0028-0836}, doi = {10.1038/325487c0}, author = {Mount, S M} } @article {49715, title = {Pseudogenes for human small nuclear RNA U3 appear to arise by integration of self-primed reverse transcripts of the RNA into new chromosomal sites.}, journal = {Cell}, volume = {32}, year = {1983}, month = {1983 Feb}, pages = {461-72}, abstract = {

We find that both human and rat U3 snRNA can function as self-priming templates for AMV reverse transcriptase in vitro. The 74 base cDNA is primed by the 3{\textquoteright} end of intact U3 snRNA, and spans the characteristically truncated 69 or 70 base U3 sequence found in four different human U3 pseudogenes. The ability of human and rat U3 snRNA to self-prime is consistent with a U3 secondary structure model derived by a comparison between rat U3 snRNA and the homologous D2 snRNA from Dictyostelium discoideum. We propose that U3 pseudogenes are generated in vivo by integration of a self-primed cDNA copy of U3 snRNA at new chromosomal sites. We also consider the possibility that the same cDNA mediates gene conversion at the 5{\textquoteright} end of bona fide U3 genes where, over the entire region spanned by the U3 cDNA, the two rat U3 sequence variants U3A and U3B are identical.

}, keywords = {Animals, Base Sequence, DNA, genes, HUMANS, Nucleic Acid Conformation, Rats, Recombination, Genetic, Repetitive Sequences, Nucleic Acid, RNA, RNA, Small Nuclear, RNA-Directed DNA Polymerase, Templates, Genetic, Transcription, Genetic}, issn = {0092-8674}, author = {Bernstein, L B and Mount, S M and Weiner, A M} } @article {49717, title = {A catalogue of splice junction sequences.}, journal = {Nucleic Acids Res}, volume = {10}, year = {1982}, month = {1982 Jan 22}, pages = {459-72}, abstract = {

Splice junction sequences from a large number of nuclear and viral genes encoding protein have been collected. The sequence CAAG/GTAGAGT was found to be a consensus of 139 exon-intron boundaries (or donor sequences) and (TC)nNCTAG/G was found to be a consensus of 130 intron-exon boundaries (or acceptor sequences). The possible role of splice junction sequences as signals for processing is discussed.

}, keywords = {Animals, Base Sequence, genes, Genes, Viral, HUMANS, Repetitive Sequences, Nucleic Acid, RNA Splicing, Species Specificity}, issn = {0305-1048}, author = {Mount, S M} }