@article {38529, title = {TIGRFAMs and Genome Properties in 2013}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {41}, year = {2013}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23197656?dopt=Abstract}, type = {10.1093/nar/gks1234}, abstract = {TIGRFAMs, available online at http://www.jcvi.org/tigrfams is a database of protein family definitions. Each entry features a seed alignment of trusted representative sequences, a hidden Markov model (HMM) built from that alignment, cutoff scores that let automated annotation pipelines decide which proteins are members, and annotations for transfer onto member proteins. Most TIGRFAMs models are designated equivalog, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence. Models describing more functionally heterogeneous families are designated subfamily or domain, and assign less specific but more widely applicable annotations. The Genome Properties database, available at http://www.jcvi.org/genome-properties, specifies how computed evidence, including TIGRFAMs HMM results, should be used to judge whether an enzymatic pathway, a protein complex or another type of molecular subsystem is encoded in a genome. TIGRFAMs and Genome Properties content are developed in concert because subsystems reconstruction for large numbers of genomes guides selection of seed alignment sequences and cutoff values during protein family construction. Both databases specialize heavily in bacterial and archaeal subsystems. At present, 4284 models appear in TIGRFAMs, while 628 systems are described by Genome Properties. Content derives both from subsystem discovery work and from biocuration of the scientific literature.}, keywords = {Databases, Protein, Genome, Archaeal, Genome, Bacterial, Genomics, Internet, Markov chains, Molecular Sequence Annotation, Proteins, sequence alignment}, author = {Haft, Daniel H. and J. Selengut and Richter, Roland A. and Harkins, Derek and Basu, Malay K. and Beck, Erin} } @article {38119, title = {Archaeosortases and exosortases are widely distributed systems linking membrane transit with posttranslational modification}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {194}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22037399?dopt=Abstract}, type = {10.1128/JB.06026-11}, abstract = {Multiple new prokaryotic C-terminal protein-sorting signals were found that reprise the tripartite architecture shared by LPXTG and PEP-CTERM: motif, TM helix, basic cluster. Defining hidden Markov models were constructed for all. PGF-CTERM occurs in 29 archaeal species, some of which have more than 50 proteins that share the domain. PGF-CTERM proteins include the major cell surface protein in Halobacterium, a glycoprotein with a partially characterized diphytanylglyceryl phosphate linkage near its C terminus. Comparative genomics identifies a distant exosortase homolog, designated archaeosortase A (ArtA), as the likely protein-processing enzyme for PGF-CTERM. Proteomics suggests that the PGF-CTERM region is removed. Additional systems include VPXXXP-CTERM/archeaosortase B in two of the same archaea and PEF-CTERM/archaeosortase C in four others. Bacterial exosortases often fall into subfamilies that partner with very different cohorts of extracellular polymeric substance biosynthesis proteins; several species have multiple systems. Variant systems include the VPDSG-CTERM/exosortase C system unique to certain members of the phylum Verrucomicrobia, VPLPA-CTERM/exosortase D in several alpha- and deltaproteobacterial species, and a dedicated (single-target) VPEID-CTERM/exosortase E system in alphaproteobacteria. Exosortase-related families XrtF in the class Flavobacteria and XrtG in Gram-positive bacteria mark distinctive conserved gene neighborhoods. A picture emerges of an ancient and now well-differentiated superfamily of deeply membrane-embedded protein-processing enzymes. Their target proteins are destined to transit cellular membranes during their biosynthesis, during which most undergo additional posttranslational modifications such as glycosylation.}, keywords = {Amino Acid Sequence, Aminoacyltransferases, Archaeal Proteins, Bacterial Proteins, Cell Membrane, Cysteine Endopeptidases, Gene Expression Regulation, Archaeal, Gene Expression Regulation, Bacterial, Gene Expression Regulation, Enzymologic, Molecular Sequence Data, Protein Processing, Post-Translational}, author = {Haft, Daniel H. and Payne, Samuel H. and J. Selengut} } @article {38316, title = {Genomic insights to SAR86, an abundant and uncultivated marine bacterial lineage}, journal = {The ISME journalThe ISME journal}, volume = {6}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22170421?dopt=Abstract}, type = {10.1038/ismej.2011.189}, abstract = {Bacteria in the 16S rRNA clade SAR86 are among the most abundant uncultivated constituents of microbial assemblages in the surface ocean for which little genomic information is currently available. Bioinformatic techniques were used to assemble two nearly complete genomes from marine metagenomes and single-cell sequencing provided two more partial genomes. Recruitment of metagenomic data shows that these SAR86 genomes substantially increase our knowledge of non-photosynthetic bacteria in the surface ocean. Phylogenomic analyses establish SAR86 as a basal and divergent lineage of γ-proteobacteria, and the individual genomes display a temperature-dependent distribution. Modestly sized at 1.25-1.7 Mbp, the SAR86 genomes lack several pathways for amino-acid and vitamin synthesis as well as sulfate reduction, trends commonly observed in other abundant marine microbes. SAR86 appears to be an aerobic chemoheterotroph with the potential for proteorhodopsin-based ATP generation, though the apparent lack of a retinal biosynthesis pathway may require it to scavenge exogenously-derived pigments to utilize proteorhodopsin. The genomes contain an expanded capacity for the degradation of lipids and carbohydrates acquired using a wealth of tonB-dependent outer membrane receptors. Like the abundant planktonic marine bacterial clade SAR11, SAR86 exhibits metabolic streamlining, but also a distinct carbon compound specialization, possibly avoiding competition.}, keywords = {Computational Biology, Gammaproteobacteria, Genome, Bacterial, Genomic Library, metagenomics, Oceans and Seas, Phylogeny, plankton, Rhodopsin, RNA, Ribosomal, 16S, Seawater}, author = {Dupont, Chris L. and Rusch, Douglas B. and Yooseph, Shibu and Lombardo, Mary-Jane and Richter, R. Alexander and Valas, Ruben and Novotny, Mark and Yee-Greenbaum, Joyclyn and J. Selengut and Haft, Dan H. and Halpern, Aaron L. and Lasken, Roger S. and Nealson, Kenneth and Friedman, Robert and Venter, J. Craig} } @article {38352, title = {InterPro in 2011: new developments in the family and domain prediction database}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {40}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22096229?dopt=Abstract}, type = {10.1093/nar/gkr948}, abstract = {InterPro (http://www.ebi.ac.uk/interpro/) is a database that integrates diverse information about protein families, domains and functional sites, and makes it freely available to the public via Web-based interfaces and services. Central to the database are diagnostic models, known as signatures, against which protein sequences can be searched to determine their potential function. InterPro has utility in the large-scale analysis of whole genomes and meta-genomes, as well as in characterizing individual protein sequences. Herein we give an overview of new developments in the database and its associated software since 2009, including updates to database content, curation processes and Web and programmatic interfaces.}, keywords = {Databases, Protein, Protein Structure, Tertiary, Proteins, Sequence Analysis, Protein, software, Terminology as Topic, User-Computer Interface}, author = {Hunter, Sarah and Jones, Philip and Mitchell, Alex and Apweiler, Rolf and Attwood, Teresa K. and Bateman, Alex and Bernard, Thomas and Binns, David and Bork, Peer and Burge, Sarah and de Castro, Edouard and Coggill, Penny and Corbett, Matthew and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D. and Fraser, Matthew and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and McMenamin, Conor and Mi, Huaiyu and Mutowo-Muellenet, Prudence and Mulder, Nicola and Natale, Darren and Orengo, Christine and Pesseat, Sebastien and Punta, Marco and Quinn, Antony F. and Rivoire, Catherine and Sangrador-Vegas, Amaia and J. Selengut and Sigrist, Christian J. A. and Scheremetjew, Maxim and Tate, John and Thimmajanarthanan, Manjulapramila and Thomas, Paul D. and Wu, Cathy H. and Yeats, Corin and Yong, Siew-Yit} } @article {38573, title = {Whole genome analysis of Leptospira licerasiae provides insight into leptospiral evolution and pathogenicity}, journal = {PLoS neglected tropical diseasesPLoS neglected tropical diseases}, volume = {6}, year = {2012}, note = {http://www.ncbi.nlm.nih.gov/pubmed/23145189?dopt=Abstract}, type = {10.1371/journal.pntd.0001853}, abstract = {The whole genome analysis of two strains of the first intermediately pathogenic leptospiral species to be sequenced (Leptospira licerasiae strains VAR010 and MMD0835) provides insight into their pathogenic potential and deepens our understanding of leptospiral evolution. Comparative analysis of eight leptospiral genomes shows the existence of a core leptospiral genome comprising 1547 genes and 452 conserved genes restricted to infectious species (including L. licerasiae) that are likely to be pathogenicity-related. Comparisons of the functional content of the genomes suggests that L. licerasiae retains several proteins related to nitrogen, amino acid and carbohydrate metabolism which might help to explain why these Leptospira grow well in artificial media compared with pathogenic species. L. licerasiae strains VAR010(T) and MMD0835 possess two prophage elements. While one element is circular and shares homology with LE1 of L. biflexa, the second is cryptic and homologous to a previously identified but unnamed region in L. interrogans serovars Copenhageni and Lai. We also report a unique O-antigen locus in L. licerasiae comprised of a 6-gene cluster that is unexpectedly short compared with L. interrogans in which analogous regions may include >90 such genes. Sequence homology searches suggest that these genes were acquired by lateral gene transfer (LGT). Furthermore, seven putative genomic islands ranging in size from 5 to 36 kb are present also suggestive of antecedent LGT. How Leptospira become naturally competent remains to be determined, but considering the phylogenetic origins of the genes comprising the O-antigen cluster and other putative laterally transferred genes, L. licerasiae must be able to exchange genetic material with non-invasive environmental bacteria. The data presented here demonstrate that L. licerasiae is genetically more closely related to pathogenic than to saprophytic Leptospira and provide insight into the genomic bases for its infectiousness and its unique antigenic characteristics.}, keywords = {DNA, Bacterial, Evolution, Molecular, Gene Transfer, Horizontal, Genome, Bacterial, Genomic islands, HUMANS, Leptospira, Molecular Sequence Data, Multigene Family, Prophages, Sequence Analysis, DNA, Virulence factors}, author = {Ricaldi, Jessica N. and Fouts, Derrick E. and J. Selengut and Harkins, Derek M. and Patra, Kailash P. and Moreno, Angelo and Lehmann, Jason S. and Purushe, Janaki and Sanka, Ravi and Torres, Michael and Webster, Nicholas J. and Vinetz, Joseph M. and Matthias, Michael A.} } @article {38452, title = {ProPhylo: partial phylogenetic profiling to guide protein family construction and assignment of biological process}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {12}, year = {2011}, note = {http://www.ncbi.nlm.nih.gov/pubmed/22070167?dopt=Abstract}, type = {10.1186/1471-2105-12-434}, abstract = {BACKGROUND: Phylogenetic profiling is a technique of scoring co-occurrence between a protein family and some other trait, usually another protein family, across a set of taxonomic groups. In spite of several refinements in recent years, the technique still invites significant improvement. To be its most effective, a phylogenetic profiling algorithm must be able to examine co-occurrences among protein families whose boundaries are uncertain within large homologous protein superfamilies. RESULTS: Partial Phylogenetic Profiling (PPP) is an iterative algorithm that scores a given taxonomic profile against the taxonomic distribution of families for all proteins in a genome. The method works through optimizing the boundary of each protein family, rather than by relying on prebuilt protein families or fixed sequence similarity thresholds. Double Partial Phylogenetic Profiling (DPPP) is a related procedure that begins with a single sequence and searches for optimal granularities for its surrounding protein family in order to generate the best query profiles for PPP. We present ProPhylo, a high-performance software package for phylogenetic profiling studies through creating individually optimized protein family boundaries. ProPhylo provides precomputed databases for immediate use and tools for manipulating the taxonomic profiles used as queries. CONCLUSION: ProPhylo results show universal markers of methanogenesis, a new DNA phosphorothioation-dependent restriction enzyme, and efficacy in guiding protein family construction. The software and the associated databases are freely available under the open source Perl Artistic License from ftp://ftp.jcvi.org/pub/data/ppp/.}, keywords = {algorithms, Archaea, Archaeal Proteins, DNA, Methane, Phylogeny, software}, author = {Basu, Malay K. and J. Selengut and Haft, Daniel H.} } @article {38506, title = {Sites Inferred by Metabolic Background Assertion Labeling (SIMBAL): adapting the Partial Phylogenetic Profiling algorithm to scan sequences for signatures that predict protein function}, journal = {BMC bioinformaticsBMC Bioinformatics}, volume = {11}, year = {2010}, note = {http://www.ncbi.nlm.nih.gov/pubmed/20102603?dopt=Abstract}, type = {10.1186/1471-2105-11-52}, abstract = {BACKGROUND: Comparative genomics methods such as phylogenetic profiling can mine powerful inferences from inherently noisy biological data sets. We introduce Sites Inferred by Metabolic Background Assertion Labeling (SIMBAL), a method that applies the Partial Phylogenetic Profiling (PPP) approach locally within a protein sequence to discover short sequence signatures associated with functional sites. The approach is based on the basic scoring mechanism employed by PPP, namely the use of binomial distribution statistics to optimize sequence similarity cutoffs during searches of partitioned training sets. RESULTS: Here we illustrate and validate the ability of the SIMBAL method to find functionally relevant short sequence signatures by application to two well-characterized protein families. In the first example, we partitioned a family of ABC permeases using a metabolic background property (urea utilization). Thus, the TRUE set for this family comprised members whose genome of origin encoded a urea utilization system. By moving a sliding window across the sequence of a permease, and searching each subsequence in turn against the full set of partitioned proteins, the method found which local sequence signatures best correlated with the urea utilization trait. Mapping of SIMBAL "hot spots" onto crystal structures of homologous permeases reveals that the significant sites are gating determinants on the cytosolic face rather than, say, docking sites for the substrate-binding protein on the extracellular face. In the second example, we partitioned a protein methyltransferase family using gene proximity as a criterion. In this case, the TRUE set comprised those methyltransferases encoded near the gene for the substrate RF-1. SIMBAL identifies sequence regions that map onto the substrate-binding interface while ignoring regions involved in the methyltransferase reaction mechanism in general. Neither method for training set construction requires any prior experimental characterization. CONCLUSIONS: SIMBAL shows that, in functionally divergent protein families, selected short sequences often significantly outperform their full-length parent sequence for making functional predictions by sequence similarity, suggesting avenues for improved functional classifiers. When combined with structural data, SIMBAL affords the ability to localize and model functional sites.}, keywords = {algorithms, Amino Acid Sequence, Gene Expression Profiling, Molecular Sequence Data, Phylogeny, Proteins, Sequence Analysis, Protein, Structure-Activity Relationship}, author = {J. Selengut and Rusch, Douglas B. and Haft, Daniel H.} } @article {38556, title = {Unexpected abundance of coenzyme F(420)-dependent enzymes in Mycobacterium tuberculosis and other actinobacteria}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {192}, year = {2010}, note = {http://www.ncbi.nlm.nih.gov/pubmed/20675471?dopt=Abstract}, type = {10.1128/JB.00425-10}, abstract = {Regimens targeting Mycobacterium tuberculosis, the causative agent of tuberculosis (TB), require long courses of treatment and a combination of three or more drugs. An increase in drug-resistant strains of M. tuberculosis demonstrates the need for additional TB-specific drugs. A notable feature of M. tuberculosis is coenzyme F(420), which is distributed sporadically and sparsely among prokaryotes. This distribution allows for comparative genomics-based investigations. Phylogenetic profiling (comparison of differential gene content) based on F(420) biosynthesis nominated many actinobacterial proteins as candidate F(420)-dependent enzymes. Three such families dominated the results: the luciferase-like monooxygenase (LLM), pyridoxamine 5{\textquoteright}-phosphate oxidase (PPOX), and deazaflavin-dependent nitroreductase (DDN) families. The DDN family was determined to be limited to F(420)-producing species. The LLM and PPOX families were observed in F(420)-producing species as well as species lacking F(420) but were particularly numerous in many actinobacterial species, including M. tuberculosis. Partitioning the LLM and PPOX families based on an organism{\textquoteright}s ability to make F(420) allowed the application of the SIMBAL (sites inferred by metabolic background assertion labeling) profiling method to identify F(420)-correlated subsequences. These regions were found to correspond to flavonoid cofactor binding sites. Significantly, these results showed that M. tuberculosis carries at least 28 separate F(420)-dependent enzymes, most of unknown function, and a paucity of flavin mononucleotide (FMN)-dependent proteins in these families. While prevalent in mycobacteria, markers of F(420) biosynthesis appeared to be absent from the normal human gut flora. These findings suggest that M. tuberculosis relies heavily on coenzyme F(420) for its redox reactions. This dependence and the cofactor{\textquoteright}s rarity may make F(420)-related proteins promising drug targets.}, keywords = {Actinobacteria, Amino Acid Sequence, Binding Sites, Coenzymes, Flavonoids, Gene Expression Profiling, Gene Expression Regulation, Bacterial, Genome, Bacterial, molecular biology, Molecular Sequence Data, Molecular Structure, Mycobacterium tuberculosis, Phylogeny, Protein Conformation, Riboflavin}, author = {J. Selengut and Haft, Daniel H.} } @article {38353, title = {InterPro: the integrative protein signature database}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {37}, year = {2009}, note = {http://www.ncbi.nlm.nih.gov/pubmed/18940856?dopt=Abstract}, type = {10.1093/nar/gkn785}, abstract = {The InterPro database (http://www.ebi.ac.uk/interpro/) integrates together predictive models or {\textquoteright}signatures{\textquoteright} representing protein domains, families and functional sites from multiple, diverse source databases: Gene3D, PANTHER, Pfam, PIRSF, PRINTS, ProDom, PROSITE, SMART, SUPERFAMILY and TIGRFAMs. Integration is performed manually and approximately half of the total approximately 58,000 signatures available in the source databases belong to an InterPro entry. Recently, we have started to also display the remaining un-integrated signatures via our web interface. Other developments include the provision of non-signature data, such as structural data, in new XML files on our FTP site, as well as the inclusion of matchless UniProtKB proteins in the existing match XML files. The web interface has been extended and now links out to the ADAN predicted protein-protein interaction database and the SPICE and Dasty viewers. The latest public release (v18.0) covers 79.8\% of UniProtKB (v14.1) and consists of 16 549 entries. InterPro data may be accessed either via the web address above, via web services, by downloading files by anonymous FTP or by using the InterProScan search software (http://www.ebi.ac.uk/Tools/InterProScan/).}, keywords = {Databases, Protein, Proteins, Sequence Analysis, Protein, Systems Integration}, author = {Hunter, Sarah and Apweiler, Rolf and Attwood, Teresa K. and Bairoch, Amos and Bateman, Alex and Binns, David and Bork, Peer and Das, Ujjwal and Daugherty, Louise and Duquenne, Lauranne and Finn, Robert D. and Gough, Julian and Haft, Daniel and Hulo, Nicolas and Kahn, Daniel and Kelly, Elizabeth and Laugraud, Aur{\'e}lie and Letunic, Ivica and Lonsdale, David and Lopez, Rodrigo and Madera, Martin and Maslen, John and McAnulla, Craig and McDowall, Jennifer and Mistry, Jaina and Mitchell, Alex and Mulder, Nicola and Natale, Darren and Orengo, Christine and Quinn, Antony F. and J. Selengut and Sigrist, Christian J. A. and Thimma, Manjula and Thomas, Paul D. and Valentin, Franck and Wilson, Derek and Wu, Cathy H. and Yeats, Corin} } @article {38528, title = {Three genomes from the phylum Acidobacteria provide insight into the lifestyles of these microorganisms in soils}, journal = {Applied and environmental microbiologyApplied and environmental microbiology}, volume = {75}, year = {2009}, note = {http://www.ncbi.nlm.nih.gov/pubmed/19201974?dopt=Abstract}, type = {10.1128/AEM.02294-08}, abstract = {The complete genomes of three strains from the phylum Acidobacteria were compared. Phylogenetic analysis placed them as a unique phylum. They share genomic traits with members of the Proteobacteria, the Cyanobacteria, and the Fungi. The three strains appear to be versatile heterotrophs. Genomic and culture traits indicate the use of carbon sources that span simple sugars to more complex substrates such as hemicellulose, cellulose, and chitin. The genomes encode low-specificity major facilitator superfamily transporters and high-affinity ABC transporters for sugars, suggesting that they are best suited to low-nutrient conditions. They appear capable of nitrate and nitrite reduction but not N(2) fixation or denitrification. The genomes contained numerous genes that encode siderophore receptors, but no evidence of siderophore production was found, suggesting that they may obtain iron via interaction with other microorganisms. The presence of cellulose synthesis genes and a large class of novel high-molecular-weight excreted proteins suggests potential traits for desiccation resistance, biofilm formation, and/or contribution to soil structure. Polyketide synthase and macrolide glycosylation genes suggest the production of novel antimicrobial compounds. Genes that encode a variety of novel proteins were also identified. The abundance of acidobacteria in soils worldwide and the breadth of potential carbon use by the sequenced strains suggest significant and previously unrecognized contributions to the terrestrial carbon cycle. Combining our genomic evidence with available culture traits, we postulate that cells of these isolates are long-lived, divide slowly, exhibit slow metabolic rates under low-nutrient conditions, and are well equipped to tolerate fluctuations in soil hydration.}, keywords = {Anti-Bacterial Agents, bacteria, Biological Transport, Carbohydrate Metabolism, Cyanobacteria, DNA, Bacterial, Fungi, Genome, Bacterial, Macrolides, Molecular Sequence Data, Nitrogen, Phylogeny, Proteobacteria, Sequence Analysis, DNA, Sequence Homology, Soil Microbiology}, author = {Ward, Naomi L. and Challacombe, Jean F. and Janssen, Peter H. and Henrissat, Bernard and Coutinho, Pedro M. and Wu, Martin and Xie, Gary and Haft, Daniel H. and Sait, Michelle and Badger, Jonathan and Barabote, Ravi D. and Bradley, Brent and Brettin, Thomas S. and Brinkac, Lauren M. and Bruce, David and Creasy, Todd and Daugherty, Sean C. and Davidsen, Tanja M. and DeBoy, Robert T. and Detter, J. Chris and Dodson, Robert J. and Durkin, A. Scott and Ganapathy, Anuradha and Gwinn-Giglio, Michelle and Han, Cliff S. and Khouri, Hoda and Kiss, Hajnalka and Kothari, Sagar P. and Madupu, Ramana and Nelson, Karen E. and Nelson, William C. and Paulsen, Ian and Penn, Kevin and Ren, Qinghu and Rosovitz, M. J. and J. Selengut and Shrivastava, Susmita and Sullivan, Steven A. and Tapia, Roxanne and Thompson, L. Sue and Watkins, Kisha L. and Yang, Qi and Yu, Chunhui and Zafar, Nikhat and Zhou, Liwei and Kuske, Cheryl R.} } @article {38383, title = {The minimum information about a genome sequence (MIGS) specification}, journal = {Nature biotechnologyNature biotechnology}, volume = {26}, year = {2008}, note = {http://www.ncbi.nlm.nih.gov/pubmed/18464787?dopt=Abstract}, type = {10.1038/nbt1360}, abstract = {With the quantity of genomic data increasing at an exponential rate, it is imperative that these data be captured electronically, in a standard format. Standardization activities must proceed within the auspices of open-access and international working bodies. To tackle the issues surrounding the development of better descriptions of genomic investigations, we have formed the Genomic Standards Consortium (GSC). Here, we introduce the minimum information about a genome sequence (MIGS) specification with the intent of promoting participation in its development and discussing the resources that will be required to develop improved mechanisms of metadata capture and exchange. As part of its wider goals, the GSC also supports improving the {\textquoteright}transparency{\textquoteright} of the information contained in existing genomic databases.}, keywords = {Chromosome mapping, Databases, Factual, information dissemination, Information Storage and Retrieval, Information Theory, Internationality}, author = {Field, Dawn and Garrity, George and Gray, Tanya and Morrison, Norman and J. Selengut and Sterk, Peter and Tatusova, Tatiana and Thomson, Nicholas and Allen, Michael J. and Angiuoli, Samuel V. and Ashburner, Michael and Axelrod, Nelson and Baldauf, Sandra and Ballard, Stuart and Boore, Jeffrey and Cochrane, Guy and Cole, James and Dawyndt, Peter and De Vos, Paul and DePamphilis, Claude and Edwards, Robert and Faruque, Nadeem and Feldman, Robert and Gilbert, Jack and Gilna, Paul and Gl{\"o}ckner, Frank Oliver and Goldstein, Philip and Guralnick, Robert and Haft, Dan and Hancock, David and Hermjakob, Henning and Hertz-Fowler, Christiane and Hugenholtz, Phil and Joint, Ian and Kagan, Leonid and Kane, Matthew and Kennedy, Jessie and Kowalchuk, George and Kottmann, Renzo and Kolker, Eugene and Kravitz, Saul and Kyrpides, Nikos and Leebens-Mack, Jim and Lewis, Suzanna E. and Li, Kelvin and Lister, Allyson L. and Lord, Phillip and Maltsev, Natalia and Markowitz, Victor and Martiny, Jennifer and Methe, Barbara and Mizrachi, Ilene and Moxon, Richard and Nelson, Karen and Parkhill, Julian and Proctor, Lita and White, Owen and Sansone, Susanna-Assunta and Spiers, Andrew and Stevens, Robert and Swift, Paul and Taylor, Chris and Tateno, Yoshio and Tett, Adrian and Turner, Sarah and Ussery, David and Vaughan, Bob and Ward, Naomi and Whetzel, Trish and San Gil, Ingio and Wilson, Gareth and Wipat, Anil} } @article {38296, title = {Genome sequence and identification of candidate vaccine antigens from the animal pathogen Dichelobacter nodosus}, journal = {Nature biotechnologyNature biotechnology}, volume = {25}, year = {2007}, note = {http://www.ncbi.nlm.nih.gov/pubmed/17468768?dopt=Abstract}, type = {10.1038/nbt1302}, abstract = {Dichelobacter nodosus causes ovine footrot, a disease that leads to severe economic losses in the wool and meat industries. We sequenced its 1.4-Mb genome, the smallest known genome of an anaerobe. It differs markedly from small genomes of intracellular bacteria, retaining greater biosynthetic capabilities and lacking any evidence of extensive ongoing genome reduction. Comparative genomic microarray studies and bioinformatic analysis suggested that, despite its small size, almost 20\% of the genome is derived from lateral gene transfer. Most of these regions seem to be associated with virulence. Metabolic reconstruction indicated unsuspected capabilities, including carbohydrate utilization, electron transfer and several aerobic pathways. Global transcriptional profiling and bioinformatic analysis enabled the prediction of virulence factors and cell surface proteins. Screening of these proteins against ovine antisera identified eight immunogenic proteins that are candidate antigens for a cross-protective vaccine.}, keywords = {Animals, Antigens, Chromosome mapping, Dichelobacter nodosus, Foot Rot, Genome, Bacterial, Sequence Analysis, DNA}, author = {Myers, Garry S. A. and Parker, Dane and Al-Hasani, Keith and Kennan, Ruth M. and Seemann, Torsten and Ren, Qinghu and Badger, Jonathan H. and J. Selengut and DeBoy, Robert T. and Tettelin, Herv{\'e} and Boyce, John D. and McCarl, Victoria P. and Han, Xiaoyan and Nelson, William C. and Madupu, Ramana and Mohamoud, Yasmin and Holley, Tara and Fedorova, Nadia and Khouri, Hoda and Bottomley, Steven P. and Whittington, Richard J. and Adler, Ben and Songer, J. Glenn and Rood, Julian I. and Paulsen, Ian T.} } @article {38530, title = {TIGRFAMs and Genome Properties: tools for the assignment of molecular function and biological process in prokaryotic genomes}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {35}, year = {2007}, note = {http://www.ncbi.nlm.nih.gov/pubmed/17151080?dopt=Abstract}, type = {10.1093/nar/gkl1043}, abstract = {TIGRFAMs is a collection of protein family definitions built to aid in high-throughput annotation of specific protein functions. Each family is based on a hidden Markov model (HMM), where both cutoff scores and membership in the seed alignment are chosen so that the HMMs can classify numerous proteins according to their specific molecular functions. Most TIGRFAMs models describe {\textquoteright}equivalog{\textquoteright} families, where both orthology and lateral gene transfer may be part of the evolutionary history, but where a single molecular function has been conserved. The Genome Properties system contains a queriable set of metabolic reconstructions, genome metrics and extractions of information from the scientific literature. Its genome-by-genome assertions of whether or not specific structures, pathways or systems are present provide high-level conceptual descriptions of genomic content. These assertions enable comparative genomics, provide a meaningful biological context to aid in manual annotation, support assignments of Gene Ontology (GO) biological process terms and help validate HMM-based predictions of protein function. The Genome Properties system is particularly useful as a generator of phylogenetic profiles, through which new protein family functions may be discovered. The TIGRFAMs and Genome Properties systems can be accessed at http://www.tigr.org/TIGRFAMs and http://www.tigr.org/Genome_Properties.}, keywords = {Archaeal Proteins, Bacterial Proteins, Databases, Protein, Genome, Bacterial, Genomics, Internet, Phylogeny, software, User-Computer Interface}, author = {J. Selengut and Haft, Daniel H. and Davidsen, Tanja and Ganapathy, Anurhada and Gwinn-Giglio, Michelle and Nelson, William C. and Richter, R. Alexander and White, Owen} } @article {38159, title = {Comparative genomic evidence for a close relationship between the dimorphic prosthecate bacteria Hyphomonas neptunium and Caulobacter crescentus}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {188}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16980487?dopt=Abstract}, type = {10.1128/JB.00111-06}, abstract = {The dimorphic prosthecate bacteria (DPB) are alpha-proteobacteria that reproduce in an asymmetric manner rather than by binary fission and are of interest as simple models of development. Prior to this work, the only member of this group for which genome sequence was available was the model freshwater organism Caulobacter crescentus. Here we describe the genome sequence of Hyphomonas neptunium, a marine member of the DPB that differs from C. crescentus in that H. neptunium uses its stalk as a reproductive structure. Genome analysis indicates that this organism shares more genes with C. crescentus than it does with Silicibacter pomeroyi (a closer relative according to 16S rRNA phylogeny), that it relies upon a heterotrophic strategy utilizing a wide range of substrates, that its cell cycle is likely to be regulated in a similar manner to that of C. crescentus, and that the outer membrane complements of H. neptunium and C. crescentus are remarkably similar. H. neptunium swarmer cells are highly motile via a single polar flagellum. With the exception of cheY and cheR, genes required for chemotaxis were absent in the H. neptunium genome. Consistent with this observation, H. neptunium swarmer cells did not respond to any chemotactic stimuli that were tested, which suggests that H. neptunium motility is a random dispersal mechanism for swarmer cells rather than a stimulus-controlled navigation system for locating specific environments. In addition to providing insights into bacterial development, the H. neptunium genome will provide an important resource for the study of other interesting biological processes including chromosome segregation, polar growth, and cell aging.}, keywords = {Alphaproteobacteria, Bacterial Outer Membrane Proteins, Caulobacter crescentus, cell cycle, Chemotaxis, DNA, Bacterial, Flagella, Genome, Bacterial, Microbial Viability, Molecular Sequence Data, Movement, Sequence Analysis, DNA, Sequence Homology, signal transduction}, author = {Badger, Jonathan H. and Hoover, Timothy R. and Brun, Yves V. and Weiner, Ronald M. and Laub, Michael T. and Alexandre, Gladys and Mr{\'a}zek, Jan and Ren, Qinghu and Paulsen, Ian T. and Nelson, Karen E. and Khouri, Hoda M. and Radune, Diana and Sosa, Julia and Dodson, Robert J. and Sullivan, Steven A. and Rosovitz, M. J. and Madupu, Ramana and Brinkac, Lauren M. and Durkin, A. Scott and Daugherty, Sean C. and Kothari, Sagar P. and Giglio, Michelle Gwinn and Zhou, Liwei and Haft, Daniel H. and J. Selengut and Davidsen, Tanja M. and Yang, Qi and Zafar, Nikhat and Ward, Naomi L.} } @article {38161, title = {Comparative genomics of emerging human ehrlichiosis agents}, journal = {PLoS geneticsPLoS genetics}, volume = {2}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16482227?dopt=Abstract}, type = {10.1371/journal.pgen.0020021}, abstract = {Anaplasma (formerly Ehrlichia) phagocytophilum, Ehrlichia chaffeensis, and Neorickettsia (formerly Ehrlichia) sennetsu are intracellular vector-borne pathogens that cause human ehrlichiosis, an emerging infectious disease. We present the complete genome sequences of these organisms along with comparisons to other organisms in the Rickettsiales order. Ehrlichia spp. and Anaplasma spp. display a unique large expansion of immunodominant outer membrane proteins facilitating antigenic variation. All Rickettsiales have a diminished ability to synthesize amino acids compared to their closest free-living relatives. Unlike members of the Rickettsiaceae family, these pathogenic Anaplasmataceae are capable of making all major vitamins, cofactors, and nucleotides, which could confer a beneficial role in the invertebrate vector or the vertebrate host. Further analysis identified proteins potentially involved in vacuole confinement of the Anaplasmataceae, a life cycle involving a hematophagous vector, vertebrate pathogenesis, human pathogenesis, and lack of transovarial transmission. These discoveries provide significant insights into the biology of these obligate intracellular pathogens.}, keywords = {Animals, Biotin, DNA Repair, Ehrlichia, Ehrlichiosis, Genome, Genomics, HUMANS, Models, Biological, Phylogeny, Rickettsia, Ticks}, author = {Dunning Hotopp, Julie C. and Lin, Mingqun and Madupu, Ramana and Crabtree, Jonathan and Angiuoli, Samuel V. and Eisen, Jonathan A. and Eisen, Jonathan and Seshadri, Rekha and Ren, Qinghu and Wu, Martin and Utterback, Teresa R. and Smith, Shannon and Lewis, Matthew and Khouri, Hoda and Zhang, Chunbin and Niu, Hua and Lin, Quan and Ohashi, Norio and Zhi, Ning and Nelson, William and Brinkac, Lauren M. and Dodson, Robert J. and Rosovitz, M. J. and Sundaram, Jaideep and Daugherty, Sean C. and Davidsen, Tanja and Durkin, Anthony S. and Gwinn, Michelle and Haft, Daniel H. and J. Selengut and Sullivan, Steven A. and Zafar, Nikhat and Zhou, Liwei and Benahmed, Faiza and Forberger, Heather and Halpin, Rebecca and Mulligan, Stephanie and Robinson, Jeffrey and White, Owen and Rikihisa, Yasuko and Tettelin, Herv{\'e}} } @article {38247, title = {Exopolysaccharide-associated protein sorting in environmental organisms: the PEP-CTERM/EpsH system. Application of a novel phylogenetic profiling heuristic}, journal = {BMC biologyBMC biology}, volume = {4}, year = {2006}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16930487?dopt=Abstract}, type = {10.1186/1741-7007-4-29}, abstract = {BACKGROUND: Protein translocation to the proper cellular destination may be guided by various classes of sorting signals recognizable in the primary sequence. Detection in some genomes, but not others, may reveal sorting system components by comparison of the phylogenetic profile of the class of sorting signal to that of various protein families. RESULTS: We describe a short C-terminal homology domain, sporadically distributed in bacteria, with several key characteristics of protein sorting signals. The domain includes a near-invariant motif Pro-Glu-Pro (PEP). This possible recognition or processing site is followed by a predicted transmembrane helix and a cluster rich in basic amino acids. We designate this domain PEP-CTERM. It tends to occur multiple times in a genome if it occurs at all, with a median count of eight instances; Verrucomicrobium spinosum has sixty-five. PEP-CTERM-containing proteins generally contain an N-terminal signal peptide and exhibit high diversity and little homology to known proteins. All bacteria with PEP-CTERM have both an outer membrane and exopolysaccharide (EPS) production genes. By a simple heuristic for screening phylogenetic profiles in the absence of pre-formed protein families, we discovered that a homolog of the membrane protein EpsH (exopolysaccharide locus protein H) occurs in a species when PEP-CTERM domains are found. The EpsH family contains invariant residues consistent with a transpeptidase function. Most PEP-CTERM proteins are encoded by single-gene operons preceded by large intergenic regions. In the Proteobacteria, most of these upstream regions share a DNA sequence, a probable cis-regulatory site that contains a sigma-54 binding motif. The phylogenetic profile for this DNA sequence exactly matches that of three proteins: a sigma-54-interacting response regulator (PrsR), a transmembrane histidine kinase (PrsK), and a TPR protein (PrsT). CONCLUSION: These findings are consistent with the hypothesis that PEP-CTERM and EpsH form a protein export sorting system, analogous to the LPXTG/sortase system of Gram-positive bacteria, and correlated to EPS expression. It occurs preferentially in bacteria from sediments, soils, and biofilms. The novel method that led to these findings, partial phylogenetic profiling, requires neither global sequence clustering nor arbitrary similarity cutoffs and appears to be a rapid, effective alternative to other profiling methods.}, keywords = {Amino Acid Motifs, Amino Acid Sequence, bacteria, Bacterial Proteins, Biofilms, Genome, Bacterial, Markov chains, Molecular Sequence Data, Phylogeny, Polysaccharides, Bacterial, Protein Sorting Signals, Protein Transport, Seawater, sequence alignment, Soil Microbiology}, author = {Haft, Daniel H. and Paulsen, Ian T. and Ward, Naomi and J. Selengut} } @article {38227, title = {eGenomics: Cataloguing our Complete Genome Collection}, journal = {Comparative and functional genomicsComparative and functional genomics}, volume = {6}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/18629208?dopt=Abstract}, type = {10.1002/cfg.494}, author = {Field, Dawn and Garrity, George and Morrison, Norman and J. Selengut and Sterk, Peter and Tatusova, Tatiana and Thomson, Nick} } @article {38287, title = {Genome analysis of multiple pathogenic isolates of Streptococcus agalactiae: implications for the microbial "pan-genome"}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {102}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16172379?dopt=Abstract}, type = {10.1073/pnas.0506758102}, abstract = {The development of efficient and inexpensive genome sequencing methods has revolutionized the study of human bacterial pathogens and improved vaccine design. Unfortunately, the sequence of a single genome does not reflect how genetic variability drives pathogenesis within a bacterial species and also limits genome-wide screens for vaccine candidates or for antimicrobial targets. We have generated the genomic sequence of six strains representing the five major disease-causing serotypes of Streptococcus agalactiae, the main cause of neonatal infection in humans. Analysis of these genomes and those available in databases showed that the S. agalactiae species can be described by a pan-genome consisting of a core genome shared by all isolates, accounting for approximately 80\% of any single genome, plus a dispensable genome consisting of partially shared and strain-specific genes. Mathematical extrapolation of the data suggests that the gene reservoir available for inclusion in the S. agalactiae pan-genome is vast and that unique genes will continue to be identified even after sequencing hundreds of genomes.}, keywords = {Amino Acid Sequence, Bacterial Capsules, Base Sequence, Gene expression, Genes, Bacterial, Genetic Variation, Genome, Bacterial, Molecular Sequence Data, Phylogeny, sequence alignment, Sequence Analysis, DNA, Streptococcus agalactiae, virulence}, author = {Tettelin, Herv{\'e} and Masignani, Vega and Cieslewicz, Michael J. and Donati, Claudio and Medini, Duccio and Ward, Naomi L. and Angiuoli, Samuel V. and Crabtree, Jonathan and Jones, Amanda L. and Durkin, A. Scott and DeBoy, Robert T. and Davidsen, Tanja M. and Mora, Marirosa and Scarselli, Maria and Margarit y Ros, Immaculada and Peterson, Jeremy D. and Hauser, Christopher R. and Sundaram, Jaideep P. and Nelson, William C. and Madupu, Ramana and Brinkac, Lauren M. and Dodson, Robert J. and Rosovitz, Mary J. and Sullivan, Steven A. and Daugherty, Sean C. and Haft, Daniel H. and J. Selengut and Gwinn, Michelle L. and Zhou, Liwei and Zafar, Nikhat and Khouri, Hoda and Radune, Diana and Dimitrov, George and Watkins, Kisha and O{\textquoteright}Connor, Kevin J. B. and Smith, Shannon and Utterback, Teresa R. and White, Owen and Rubens, Craig E. and Grandi, Guido and Madoff, Lawrence C. and Kasper, Dennis L. and Telford, John L. and Wessels, Michael R. and Rappuoli, Rino and Fraser, Claire M.} } @article {38294, title = {Genome Properties: a system for the investigation of prokaryotic genetic content for microbiology, genome annotation and comparative genomics}, journal = {Bioinformatics (Oxford, England)Bioinformatics (Oxford, England)}, volume = {21}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15347579?dopt=Abstract}, type = {10.1093/bioinformatics/bti015}, abstract = {MOTIVATION: The presence or absence of metabolic pathways and structures provide a context that makes protein annotation far more reliable. Compiling such information across microbial genomes improves the functional classification of proteins and provides a valuable resource for comparative genomics. RESULTS: We have created a Genome Properties system to present key aspects of prokaryotic biology using standardized computational methods and controlled vocabularies. Properties reflect gene content, phenotype, phylogeny and computational analyses. The results of searches using hidden Markov models allow many properties to be deduced automatically, especially for families of proteins (equivalogs) conserved in function since their last common ancestor. Additional properties are derived from curation, published reports and other forms of evidence. Genome Properties system was applied to 156 complete prokaryotic genomes, and is easily mined to find differences between species, correlations between metabolic features and families of uncharacterized proteins, or relationships among properties. AVAILABILITY: Genome Properties can be found at http://www.tigr.org/Genome_Properties SUPPLEMENTARY INFORMATION: http://www.tigr.org/tigr-scripts/CMR2/genome_properties_references.spl.}, keywords = {Chromosome mapping, database management systems, Databases, Genetic, documentation, Gene Expression Profiling, Gene Expression Regulation, Genomics, Information Storage and Retrieval, Microbiological Techniques, natural language processing, Prokaryotic Cells, Proteome, signal transduction, software, User-Computer Interface, Vocabulary, Controlled}, author = {Haft, Daniel H. and J. Selengut and Brinkac, Lauren M. and Zafar, Nikhat and White, Owen} } @article {38325, title = {A guild of 45 CRISPR-associated (Cas) protein families and multiple CRISPR/Cas subtypes exist in prokaryotic genomes}, journal = {PLoS computational biologyPLOS Computational Biology}, volume = {1}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16292354?dopt=Abstract}, type = {10.1371/journal.pcbi.0010060}, abstract = {Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. Repeats of 21-37 bp typically show weak dyad symmetry and are separated by regularly sized, nonrepetitive spacer sequences. Four CRISPR-associated (Cas) protein families, designated Cas1 to Cas4, are strictly associated with CRISPR elements and always occur near a repeat cluster. Some spacers originate from mobile genetic elements and are thought to confer "immunity" against the elements that harbor these sequences. In the present study, we have systematically investigated uncharacterized proteins encoded in the vicinity of these CRISPRs and found many additional protein families that are strictly associated with CRISPR loci across multiple prokaryotic species. Multiple sequence alignments and hidden Markov models have been built for 45 Cas protein families. These models identify family members with high sensitivity and selectivity and classify key regulators of development, DevR and DevS, in Myxococcus xanthus as Cas proteins. These identifications show that CRISPR/cas gene regions can be quite large, with up to 20 different, tandem-arranged cas genes next to a repeat cluster or filling the region between two repeat clusters. Distinctive subsets of the collection of Cas proteins recur in phylogenetically distant species and correlate with characteristic repeat periodicity. The analyses presented here support initial proposals of mobility of these units, along with the likelihood that loci of different subtypes interact with one another as well as with host cell defensive, replicative, and regulatory systems. It is evident from this analysis that CRISPR/cas loci are larger, more complex, and more heterogeneous than previously appreciated.}, keywords = {Genes, Archaeal, Genes, Bacterial, Genes, Fungal, Genome, Genome, Bacterial, Haloarcula marismortui, Markov chains, Multigene Family, Oligonucleotide Array Sequence Analysis, Phylogeny, Prokaryotic Cells, Proteins, Repetitive Sequences, Nucleic Acid, Yersinia pestis}, author = {Haft, Daniel H. and J. Selengut and Mongodin, Emmanuel F. and Nelson, Karen E.} } @article {38575, title = {Whole-genome sequence analysis of Pseudomonas syringae pv. phaseolicola 1448A reveals divergence among pathovars in genes involved in virulence and transposition}, journal = {Journal of bacteriologyJournal of bacteriology}, volume = {187}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16159782?dopt=Abstract}, type = {10.1128/JB.187.18.6488-6498.2005}, abstract = {Pseudomonas syringae pv. phaseolicola, a gram-negative bacterial plant pathogen, is the causal agent of halo blight of bean. In this study, we report on the genome sequence of P. syringae pv. phaseolicola isolate 1448A, which encodes 5,353 open reading frames (ORFs) on one circular chromosome (5,928,787 bp) and two plasmids (131,950 bp and 51,711 bp). Comparative analyses with a phylogenetically divergent pathovar, P. syringae pv. tomato DC3000, revealed a strong degree of conservation at the gene and genome levels. In total, 4,133 ORFs were identified as putative orthologs in these two pathovars using a reciprocal best-hit method, with 3,941 ORFs present in conserved, syntenic blocks. Although these two pathovars are highly similar at the physiological level, they have distinct host ranges; 1448A causes disease in beans, and DC3000 is pathogenic on tomato and Arabidopsis. Examination of the complement of ORFs encoding virulence, fitness, and survival factors revealed a substantial, but not complete, overlap between these two pathovars. Another distinguishing feature between the two pathovars is their distinctive sets of transposable elements. With access to a fifth complete pseudomonad genome sequence, we were able to identify 3,567 ORFs that likely comprise the core Pseudomonas genome and 365 ORFs that are P. syringae specific.}, keywords = {Bacterial Proteins, DNA, Bacterial, Genes, Bacterial, Genome, Bacterial, Molecular Sequence Data, Pseudomonas syringae, Species Specificity, virulence}, author = {Joardar, Vinita and Lindeberg, Magdalen and Jackson, Robert W. and J. Selengut and Dodson, Robert and Brinkac, Lauren M. and Daugherty, Sean C. and Deboy, Robert and Durkin, A. Scott and Giglio, Michelle Gwinn and Madupu, Ramana and Nelson, William C. and Rosovitz, M. J. and Sullivan, Steven and Crabtree, Jonathan and Creasy, Todd and Davidsen, Tanja and Haft, Dan H. and Zafar, Nikhat and Zhou, Liwei and Halpin, Rebecca and Holley, Tara and Khouri, Hoda and Feldblyum, Tamara and White, Owen and Fraser, Claire M. and Chatterjee, Arun K. and Cartinhour, Sam and Schneider, David J. and Mansfield, John and Collmer, Alan and Buell, C. Robin} } @article {38165, title = {Comparison of the genome of the oral pathogen Treponema denticola with other spirochete genomes}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {101}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15064399?dopt=Abstract}, type = {10.1073/pnas.0307639101}, abstract = {We present the complete 2,843,201-bp genome sequence of Treponema denticola (ATCC 35405) an oral spirochete associated with periodontal disease. Analysis of the T. denticola genome reveals factors mediating coaggregation, cell signaling, stress protection, and other competitive and cooperative measures, consistent with its pathogenic nature and lifestyle within the mixed-species environment of subgingival dental plaque. Comparisons with previously sequenced spirochete genomes revealed specific factors contributing to differences and similarities in spirochete physiology as well as pathogenic potential. The T. denticola genome is considerably larger in size than the genome of the related syphilis-causing spirochete Treponema pallidum. The differences in gene content appear to be attributable to a combination of three phenomena: genome reduction, lineage-specific expansions, and horizontal gene transfer. Genes lost due to reductive evolution appear to be largely involved in metabolism and transport, whereas some of the genes that have arisen due to lineage-specific expansions are implicated in various pathogenic interactions, and genes acquired via horizontal gene transfer are largely phage-related or of unknown function.}, keywords = {ATP-Binding Cassette Transporters, Bacterial Proteins, Base Sequence, Borrelia burgdorferi, Genes, Bacterial, Genome, Bacterial, Leptospira interrogans, Models, Genetic, Molecular Sequence Data, Mouth, Sequence Homology, Amino Acid, Treponema, Treponema pallidum}, author = {Seshadri, Rekha and Myers, Garry S. A. and Tettelin, Herv{\'e} and Eisen, Jonathan A. and Heidelberg, John F. and Dodson, Robert J. and Davidsen, Tanja M. and DeBoy, Robert T. and Fouts, Derrick E. and Haft, Dan H. and J. Selengut and Ren, Qinghu and Brinkac, Lauren M. and Madupu, Ramana and Kolonay, Jamie and Durkin, A. Scott and Daugherty, Sean C. and Shetty, Jyoti and Shvartsbeyn, Alla and Gebregeorgis, Elizabeth and Geer, Keita and Tsegaye, Getahun and Malek, Joel and Ayodeji, Bola and Shatsman, Sofiya and McLeod, Michael P. and Smajs, David and Howell, Jerrilyn K. and Pal, Sangita and Amin, Anita and Vashisth, Pankaj and McNeill, Thomas Z. and Xiang, Qin and Sodergren, Erica and Baca, Ernesto and Weinstock, George M. and Norris, Steven J. and Fraser, Claire M. and Paulsen, Ian T.} } @article {38302, title = {Genome sequence of Silicibacter pomeroyi reveals adaptations to the marine environment}, journal = {NatureNature}, volume = {432}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15602564?dopt=Abstract}, type = {10.1038/nature03170}, abstract = {Since the recognition of prokaryotes as essential components of the oceanic food web, bacterioplankton have been acknowledged as catalysts of most major biogeochemical processes in the sea. Studying heterotrophic bacterioplankton has been challenging, however, as most major clades have never been cultured or have only been grown to low densities in sea water. Here we describe the genome sequence of Silicibacter pomeroyi, a member of the marine Roseobacter clade (Fig. 1), the relatives of which comprise approximately 10-20\% of coastal and oceanic mixed-layer bacterioplankton. This first genome sequence from any major heterotrophic clade consists of a chromosome (4,109,442 base pairs) and megaplasmid (491,611 base pairs). Genome analysis indicates that this organism relies upon a lithoheterotrophic strategy that uses inorganic compounds (carbon monoxide and sulphide) to supplement heterotrophy. Silicibacter pomeroyi also has genes advantageous for associations with plankton and suspended particles, including genes for uptake of algal-derived compounds, use of metabolites from reducing microzones, rapid growth and cell-density-dependent regulation. This bacterium has a physiology distinct from that of marine oligotrophs, adding a new strategy to the recognized repertoire for coping with a nutrient-poor ocean.}, keywords = {Adaptation, Physiological, Carrier Proteins, Genes, Bacterial, Genome, Bacterial, marine biology, Molecular Sequence Data, Oceans and Seas, Phylogeny, plankton, RNA, Ribosomal, 16S, Roseobacter, Seawater}, author = {Moran, Mary Ann and Buchan, Alison and Gonz{\'a}lez, Jos{\'e} M. and Heidelberg, John F. and Whitman, William B. and Kiene, Ronald P. and Henriksen, James R. and King, Gary M. and Belas, Robert and Fuqua, Clay and Brinkac, Lauren and Lewis, Matt and Johri, Shivani and Weaver, Bruce and Pai, Grace and Eisen, Jonathan A. and Rahe, Elisha and Sheldon, Wade M. and Ye, Wenying and Miller, Todd R. and Carlton, Jane and Rasko, David A. and Paulsen, Ian T. and Ren, Qinghu and Daugherty, Sean C. and DeBoy, Robert T. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Nelson, William C. and Sullivan, Steven A. and Rosovitz, M. J. and Haft, Daniel H. and J. Selengut and Ward, Naomi} } @article {38303, title = {The genome sequence of the anaerobic, sulfate-reducing bacterium Desulfovibrio vulgaris Hildenborough}, journal = {Nature biotechnologyNature biotechnology}, volume = {22}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15077118?dopt=Abstract}, type = {10.1038/nbt959}, abstract = {Desulfovibrio vulgaris Hildenborough is a model organism for studying the energy metabolism of sulfate-reducing bacteria (SRB) and for understanding the economic impacts of SRB, including biocorrosion of metal infrastructure and bioremediation of toxic metal ions. The 3,570,858 base pair (bp) genome sequence reveals a network of novel c-type cytochromes, connecting multiple periplasmic hydrogenases and formate dehydrogenases, as a key feature of its energy metabolism. The relative arrangement of genes encoding enzymes for energy transduction, together with inferred cellular location of the enzymes, provides a basis for proposing an expansion to the {\textquoteright}hydrogen-cycling{\textquoteright} model for increasing energy efficiency in this bacterium. Plasmid-encoded functions include modification of cell surface components, nitrogen fixation and a type-III protein secretion system. This genome sequence represents a substantial step toward the elucidation of pathways for reduction (and bioremediation) of pollutants such as uranium and chromium and offers a new starting point for defining this organism{\textquoteright}s complex anaerobic respiration.}, keywords = {Desulfovibrio vulgaris, Energy Metabolism, Genome, Bacterial, Molecular Sequence Data}, author = {Heidelberg, John F. and Seshadri, Rekha and Haveman, Shelley A. and Hemme, Christopher L. and Paulsen, Ian T. and Kolonay, James F. and Eisen, Jonathan A. and Ward, Naomi and Methe, Barbara and Brinkac, Lauren M. and Daugherty, Sean C. and DeBoy, Robert T. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Nelson, William C. and Sullivan, Steven A. and Fouts, Derrick and Haft, Daniel H. and J. Selengut and Peterson, Jeremy D. and Davidsen, Tanja M. and Zafar, Nikhat and Zhou, Liwei and Radune, Diana and Dimitrov, George and Hance, Mark and Tran, Kevin and Khouri, Hoda and Gill, John and Utterback, Terry R. and Feldblyum, Tamara V. and Wall, Judy D. and Voordouw, Gerrit and Fraser, Claire M.} } @article {38514, title = {Structural flexibility in the Burkholderia mallei genome}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {101}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15377793?dopt=Abstract}, type = {10.1073/pnas.0403306101}, abstract = {The complete genome sequence of Burkholderia mallei ATCC 23344 provides insight into this highly infectious bacterium{\textquoteright}s pathogenicity and evolutionary history. B. mallei, the etiologic agent of glanders, has come under renewed scientific investigation as a result of recent concerns about its past and potential future use as a biological weapon. Genome analysis identified a number of putative virulence factors whose function was supported by comparative genome hybridization and expression profiling of the bacterium in hamster liver in vivo. The genome contains numerous insertion sequence elements that have mediated extensive deletions and rearrangements of the genome relative to Burkholderia pseudomallei. The genome also contains a vast number (>12,000) of simple sequence repeats. Variation in simple sequence repeats in key genes can provide a mechanism for generating antigenic variation that may account for the mammalian host{\textquoteright}s inability to mount a durable adaptive immune response to a B. mallei infection.}, keywords = {Animals, Base Composition, Base Sequence, Burkholderia mallei, Chromosomes, Bacterial, Cricetinae, Genome, Bacterial, Glanders, Liver, Mesocricetus, Molecular Sequence Data, Multigene Family, Oligonucleotide Array Sequence Analysis, Open Reading Frames, virulence}, author = {Nierman, William C. and DeShazer, David and Kim, H. Stanley and Tettelin, Herv{\'e} and Nelson, Karen E. and Feldblyum, Tamara and Ulrich, Ricky L. and Ronning, Catherine M. and Brinkac, Lauren M. and Daugherty, Sean C. and Davidsen, Tanja D. and DeBoy, Robert T. and Dimitrov, George and Dodson, Robert J. and Durkin, A. Scott and Gwinn, Michelle L. and Haft, Daniel H. and Khouri, Hoda and Kolonay, James F. and Madupu, Ramana and Mohammoud, Yasmin and Nelson, William C. and Radune, Diana and Romero, Claudia M. and Sarria, Saul and J. Selengut and Shamblin, Christine and Sullivan, Steven A. and White, Owen and Yu, Yan and Zafar, Nikhat and Zhou, Liwei and Fraser, Claire M.} } @article {38574, title = {Whole genome comparisons of serotype 4b and 1/2a strains of the food-borne pathogen Listeria monocytogenes reveal new insights into the core genome components of this species}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {32}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15115801?dopt=Abstract}, type = {10.1093/nar/gkh562}, abstract = {The genomes of three strains of Listeria monocytogenes that have been associated with food-borne illness in the USA were subjected to whole genome comparative analysis. A total of 51, 97 and 69 strain-specific genes were identified in L.monocytogenes strains F2365 (serotype 4b, cheese isolate), F6854 (serotype 1/2a, frankfurter isolate) and H7858 (serotype 4b, meat isolate), respectively. Eighty-three genes were restricted to serotype 1/2a and 51 to serotype 4b strains. These strain- and serotype-specific genes probably contribute to observed differences in pathogenicity, and the ability of the organisms to survive and grow in their respective environmental niches. The serotype 1/2a-specific genes include an operon that encodes the rhamnose biosynthetic pathway that is associated with teichoic acid biosynthesis, as well as operons for five glycosyl transferases and an adenine-specific DNA methyltransferase. A total of 8603 and 105 050 high quality single nucleotide polymorphisms (SNPs) were found on the draft genome sequences of strain H7858 and strain F6854, respectively, when compared with strain F2365. Whole genome comparative analyses revealed that the L.monocytogenes genomes are essentially syntenic, with the majority of genomic differences consisting of phage insertions, transposable elements and SNPs.}, keywords = {Base Composition, Chromosomes, Bacterial, DNA Transposable Elements, Food Microbiology, Genes, Bacterial, Genome, Bacterial, Genomics, Listeria monocytogenes, Meat, Open Reading Frames, Physical Chromosome Mapping, Polymorphism, Single Nucleotide, Prophages, Serotyping, Species Specificity, Synteny, virulence}, author = {Nelson, Karen E. and Fouts, Derrick E. and Mongodin, Emmanuel F. and Ravel, Jacques and DeBoy, Robert T. and Kolonay, James F. and Rasko, David A. and Angiuoli, Samuel V. and Gill, Steven R. and Paulsen, Ian T. and Peterson, Jeremy and White, Owen and Nelson, William C. and Nierman, William and Beanan, Maureen J. and Brinkac, Lauren M. and Daugherty, Sean C. and Dodson, Robert J. and Durkin, A. Scott and Madupu, Ramana and Haft, Daniel H. and J. Selengut and Van Aken, Susan and Khouri, Hoda and Fedorova, Nadia and Forberger, Heather and Tran, Bao and Kathariou, Sophia and Wonderling, Laura D. and Uhlich, Gaylen A. and Bayles, Darrell O. and Luchansky, John B. and Fraser, Claire M.} } @article {38578, title = {X-ray crystal structure of the hypothetical phosphotyrosine phosphatase MDP-1 of the haloacid dehalogenase superfamily}, journal = {BiochemistryBiochemistry}, volume = {43}, year = {2004}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15461449?dopt=Abstract}, type = {10.1021/bi0490688}, abstract = {The haloacid dehalogenase (HAD) superfamily is comprised of structurally homologous enzymes that share several conserved sequence motifs (loops I-IV) in their active site. The majority of HAD members are phosphohydrolases and may be divided into three subclasses depending on domain organization. In classes I and II, a mobile "cap" domain reorients upon substrate binding, closing the active site to bulk solvent. Members of the third class lack this additional domain. Herein, we report the 1.9 A X-ray crystal structures of a member of the third subclass, magnesium-dependent phosphatase-1 (MDP-1) both in its unliganded form and with the product analogue, tungstate, bound to the active site. The secondary structure of MDP-1 is similar to that of the "core" domain of other type I and type II HAD members with the addition of a small, 28-amino acid insert that does not close down to exclude bulk solvent in the presence of ligand. In addition, the monomeric oligomeric state of MDP-1 does not allow the participation of a second subunit in the formation and solvent protection of the active site. The binding sites for the phosphate portion of the substrate and Mg(II) cofactor are also similar to those of other HAD members, with all previously observed contacts conserved. Unlike other subclass III HAD members, MDP-1 appears to be equally able to dephosphorylate phosphotyrosine and closed-ring phosphosugars. Modeling of possible substrates in the active site of MDP-1 reveals very few potential interactions with the substrate leaving group. The mapping of conserved residues in sequences of MDP-1 from different eukaryotic organisms reveals that they colocalize to a large region on the surface of the protein outside the active site. This observation combined with the modeling studies suggests that the target of MDP-1 is most likely a phosphotyrosine in an unknown protein rather than a small sugar-based substrate.}, keywords = {Amino Acid Sequence, Animals, Binding Sites, Crystallography, X-Ray, HUMANS, Hydrogen-Ion Concentration, Hydrolases, Magnesium, Mice, Models, Molecular, Molecular Sequence Data, Phosphoprotein Phosphatases, Phosphotyrosine, Protein Phosphatase 1, Protein Structure, Quaternary, Protein Structure, Tertiary, sequence alignment, Solvents, Substrate Specificity}, author = {Peisach, Ezra and J. Selengut and Dunaway-Mariano, Debra and Allen, Karen N.} } @article {38168, title = {The complete genome sequence of the Arabidopsis and tomato pathogen Pseudomonas syringae pv. tomato DC3000}, journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America}, volume = {100}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12928499?dopt=Abstract}, type = {10.1073/pnas.1731982100}, abstract = {We report the complete genome sequence of the model bacterial pathogen Pseudomonas syringae pathovar tomato DC3000 (DC3000), which is pathogenic on tomato and Arabidopsis thaliana. The DC3000 genome (6.5 megabases) contains a circular chromosome and two plasmids, which collectively encode 5,763 ORFs. We identified 298 established and putative virulence genes, including several clusters of genes encoding 31 confirmed and 19 predicted type III secretion system effector proteins. Many of the virulence genes were members of paralogous families and also were proximal to mobile elements, which collectively comprise 7\% of the DC3000 genome. The bacterium possesses a large repertoire of transporters for the acquisition of nutrients, particularly sugars, as well as genes implicated in attachment to plant surfaces. Over 12\% of the genes are dedicated to regulation, which may reflect the need for rapid adaptation to the diverse environments encountered during epiphytic growth and pathogenesis. Comparative analyses confirmed a high degree of similarity with two sequenced pseudomonads, Pseudomonas putida and Pseudomonas aeruginosa, yet revealed 1,159 genes unique to DC3000, of which 811 lack a known function.}, keywords = {Arabidopsis, Base Sequence, Biological Transport, Genome, Bacterial, Lycopersicon esculentum, Molecular Sequence Data, Plant Growth Regulators, Plasmids, Pseudomonas, Reactive Oxygen Species, Siderophores, virulence}, author = {Buell, C. Robin and Joardar, Vinita and Lindeberg, Magdalen and J. Selengut and Paulsen, Ian T. and Gwinn, Michelle L. and Dodson, Robert J. and DeBoy, Robert T. and Durkin, A. Scott and Kolonay, James F. and Madupu, Ramana and Daugherty, Sean and Brinkac, Lauren and Beanan, Maureen J. and Haft, Daniel H. and Nelson, William C. and Davidsen, Tanja and Zafar, Nikhat and Zhou, Liwei and Liu, Jia and Yuan, Qiaoping and Khouri, Hoda and Fedorova, Nadia and Tran, Bao and Russell, Daniel and Berry, Kristi and Utterback, Teresa and Aken, Susan E. van and Feldblyum, Tamara V. and D{\textquoteright}Ascenzo, Mark and Deng, Wen-Ling and Ramos, Adela R. and Alfano, James R. and Cartinhour, Samuel and Chatterjee, Arun K. and Delaney, Terrence P. and Lazarowitz, Sondra G. and Martin, Gregory B. and Schneider, David J. and Tang, Xiaoyan and Bender, Carol L. and White, Owen and Fraser, Claire M. and Collmer, Alan} } @article {38291, title = {Genome of Geobacter sulfurreducens: metal reduction in subsurface environments}, journal = {Science (New York, N.Y.)Science (New York, N.Y.)}, volume = {302}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/14671304?dopt=Abstract}, type = {10.1126/science.1088727}, abstract = {The complete genome sequence of Geobacter sulfurreducens, a delta-proteobacterium, reveals unsuspected capabilities, including evidence of aerobic metabolism, one-carbon and complex carbon metabolism, motility, and chemotactic behavior. These characteristics, coupled with the possession of many two-component sensors and many c-type cytochromes, reveal an ability to create alternative, redundant, electron transport networks and offer insights into the process of metal ion reduction in subsurface environments. As well as playing roles in the global cycling of metals and carbon, this organism clearly has the potential for use in bioremediation of radioactive metals and in the generation of electricity.}, keywords = {Acetates, Acetyl Coenzyme A, Aerobiosis, Anaerobiosis, Bacterial Proteins, Carbon, Chemotaxis, Chromosomes, Bacterial, Cytochromes c, Electron Transport, Energy Metabolism, Genes, Bacterial, Genes, Regulator, Genome, Bacterial, Geobacter, Hydrogen, Metals, Movement, Open Reading Frames, Oxidation-Reduction, Phylogeny}, author = {Meth{\'e}, B. A. and Nelson, K. E. and Eisen, J. A. and Paulsen, I. T. and Nelson, W. and Heidelberg, J. F. and Wu, D. and Wu, M. and Ward, N. and Beanan, M. J. and Dodson, R. J. and Madupu, R. and Brinkac, L. M. and Daugherty, S. C. and DeBoy, R. T. and Durkin, A. S. and Gwinn, M. and Kolonay, J. F. and Sullivan, S. A. and Haft, D. H. and J. Selengut and Davidsen, T. M. and Zafar, N. and White, O. and Tran, B. and Romero, C. and Forberger, H. A. and Weidman, J. and Khouri, H. and Feldblyum, T. V. and Utterback, T. R. and Van Aken, S. E. and Lovley, D. R. and Fraser, C. M.} } @article {38531, title = {The TIGRFAMs database of protein families}, journal = {Nucleic acids researchNucleic Acids Research}, volume = {31}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12520025?dopt=Abstract}, abstract = {TIGRFAMs is a collection of manually curated protein families consisting of hidden Markov models (HMMs), multiple sequence alignments, commentary, Gene Ontology (GO) assignments, literature references and pointers to related TIGRFAMs, Pfam and InterPro models. These models are designed to support both automated and manually curated annotation of genomes. TIGRFAMs contains models of full-length proteins and shorter regions at the levels of superfamilies, subfamilies and equivalogs, where equivalogs are sets of homologous proteins conserved with respect to function since their last common ancestor. The scope of each model is set by raising or lowering cutoff scores and choosing members of the seed alignment to group proteins sharing specific function (equivalog) or more general properties. The overall goal is to provide information with maximum utility for the annotation process. TIGRFAMs is thus complementary to Pfam, whose models typically achieve broad coverage across distant homologs but end at the boundaries of conserved structural domains. The database currently contains over 1600 protein families. TIGRFAMs is available for searching or downloading at www.tigr.org/TIGRFAMs.}, keywords = {Animals, Databases, Protein, Markov chains, Mixed Function Oxygenases, Phylogeny, Proteins, Pyruvate Carboxylase, Sequence Homology, Amino Acid}, author = {Haft, Daniel H. and J. Selengut and White, Owen} } @article {38536, title = {The transcription factor Eyes absent is a protein tyrosine phosphatase}, journal = {NatureNature}, volume = {426}, year = {2003}, note = {http://www.ncbi.nlm.nih.gov/pubmed/14628053?dopt=Abstract}, type = {10.1038/nature02097}, abstract = {Post-translational modifications provide sensitive and flexible mechanisms to dynamically modulate protein function in response to specific signalling inputs. In the case of transcription factors, changes in phosphorylation state can influence protein stability, conformation, subcellular localization, cofactor interactions, transactivation potential and transcriptional output. Here we show that the evolutionarily conserved transcription factor Eyes absent (Eya) belongs to the phosphatase subgroup of the haloacid dehalogenase (HAD) superfamily, and propose a function for it as a non-thiol-based protein tyrosine phosphatase. Experiments performed in cultured Drosophila cells and in vitro indicate that Eyes absent has intrinsic protein tyrosine phosphatase activity and can autocatalytically dephosphorylate itself. Confirming the biological significance of this function, mutations that disrupt the phosphatase active site severely compromise the ability of Eyes absent to promote eye specification and development in Drosophila. Given the functional importance of phosphorylation-dependent modulation of transcription factor activity, this evidence for a nuclear transcriptional coactivator with intrinsic phosphatase activity suggests an unanticipated method of fine-tuning transcriptional regulation.}, keywords = {Amino Acid Motifs, Amino Acid Sequence, Animals, Antibodies, Phospho-Specific, Drosophila melanogaster, Drosophila Proteins, Embryonic Induction, eye, Eye Proteins, Gene Expression Regulation, Kinetics, Mice, Models, Molecular, Molecular Sequence Data, Mutation, Phosphorylation, Protein Conformation, Protein Tyrosine Phosphatases, Substrate Specificity, Transcription Factors}, author = {Tootle, Tina L. and Silver, Serena J. and Davies, Erin L. and Newman, Victoria and Latek, Robert R. and Mills, Ishara A. and J. Selengut and Parlikar, Beth E. W. and Rebay, Ilaria} } @article {38295, title = {Genome sequence and comparative analysis of the model rodent malaria parasite Plasmodium yoelii yoelii}, journal = {NatureNature}, volume = {419}, year = {2002}, type = {10.1038/nature01099}, abstract = {Species of malaria parasite that infect rodents have long been used as models for malaria disease research. Here we report the whole-genome shotgun sequence of one species, Plasmodium yoelii yoelii, and comparative studies with the genome of the human malaria parasite Plasmodium falciparum clone 3D7. A synteny map of 2,212 P. y. yoelii contiguous DNA sequences (contigs) aligned to 14 P. falciparum chromosomes reveals marked conservation of gene synteny within the body of each chromosome. Of about 5,300 P. falciparum genes, more than 3,300 P. y. yoelii orthologues of predominantly metabolic function were identified. Over 800 copies of a variant antigen gene located in subtelomeric regions were found. This is the first genome sequence of a model eukaryotic parasite, and it provides insight into the use of such systems in the modelling of Plasmodium biology and disease.}, isbn = {0028-0836}, author = {Carlton, Jane M. and Angiuoli, Samuel V. and Suh, Bernard B. and Kooij, Taco W. and Pertea, Mihaela and Silva, Joana C. and Ermolaeva, Maria D. and Allen, Jonathan E. and J. Selengut and Koo, Hean L. and Peterson, Jeremy D. and M. Pop and Kosack, Daniel S. and Shumway, Martin F. and Bidwell, Shelby L. and Shallom, Shamira J. and Aken, Susan E. van and Riedmuller, Steven B. and Feldblyum, Tamara V. and Cho, Jennifer K. and Quackenbush, John and Sedegah, Martha and Shoaibi, Azadeh and Cummings, Leda M. and Florens, Laurence and Yates, John R. and Raine, J. Dale and Sinden, Robert E. and Harris, Michael A. and Cunningham, Deirdre A. and Preiser, Peter R. and Bergman, Lawrence W. and Vaidya, Akhil B. and Lin, Leo H. van and Janse, Chris J. and Waters, Andrew P. and Smith, Hamilton O. and White, Owen R. and Salzberg, Steven L. and Venter, J. Craig and Fraser, Claire M. and Hoffman, Stephen L. and Gardner, Malcolm J. and Carucci, Daniel J.} } @article {38304, title = {Genome sequence of the human malaria parasite Plasmodium falciparum}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368864?dopt=Abstract}, type = {10.1038/nature01097}, abstract = {The parasite Plasmodium falciparum is responsible for hundreds of millions of cases of malaria, and kills more than one million African children annually. Here we report an analysis of the genome sequence of P. falciparum clone 3D7. The 23-megabase nuclear genome consists of 14 chromosomes, encodes about 5,300 genes, and is the most (A + T)-rich genome sequenced to date. Genes involved in antigenic variation are concentrated in the subtelomeric regions of the chromosomes. Compared to the genomes of free-living eukaryotic microbes, the genome of this intracellular parasite encodes fewer enzymes and transporters, but a large proportion of genes are devoted to immune evasion and host-parasite interactions. Many nuclear-encoded proteins are targeted to the apicoplast, an organelle involved in fatty-acid and isoprenoid metabolism. The genome sequence provides the foundation for future studies of this organism, and is being exploited in the search for new drugs and vaccines to fight malaria.}, keywords = {Animals, Chromosome Structures, DNA Repair, DNA Replication, DNA, Protozoan, Evolution, Molecular, Genome, Protozoan, HUMANS, Malaria Vaccines, Malaria, Falciparum, Membrane Transport Proteins, Molecular Sequence Data, Plasmodium falciparum, Plastids, Proteome, Protozoan Proteins, Recombination, Genetic, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Hall, Neil and Fung, Eula and White, Owen and Berriman, Matthew and Hyman, Richard W. and Carlton, Jane M. and Pain, Arnab and Nelson, Karen E. and Bowman, Sharen and Paulsen, Ian T. and James, Keith and Eisen, Jonathan A. and Rutherford, Kim and Salzberg, Steven L. and Craig, Alister and Kyes, Sue and Chan, Man-Suen and Nene, Vishvanath and Shallom, Shamira J. and Suh, Bernard and Peterson, Jeremy and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and Haft, Daniel and Mather, Michael W. and Vaidya, Akhil B. and Martin, David M. A. and Fairlamb, Alan H. and Fraunholz, Martin J. and Roos, David S. and Ralph, Stuart A. and McFadden, Geoffrey I. and Cummings, Leda M. and Subramanian, G. Mani and Mungall, Chris and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Newbold, Chris and Davis, Ronald W. and Fraser, Claire M. and Barrell, Bart} } @article {38492, title = {Sequence of Plasmodium falciparum chromosomes 2, 10, 11 and 14}, journal = {NatureNature}, volume = {419}, year = {2002}, note = {http://www.ncbi.nlm.nih.gov/pubmed/12368868?dopt=Abstract}, type = {10.1038/nature01094}, abstract = {The mosquito-borne malaria parasite Plasmodium falciparum kills an estimated 0.7-2.7 million people every year, primarily children in sub-Saharan Africa. Without effective interventions, a variety of factors-including the spread of parasites resistant to antimalarial drugs and the increasing insecticide resistance of mosquitoes-may cause the number of malaria cases to double over the next two decades. To stimulate basic research and facilitate the development of new drugs and vaccines, the genome of Plasmodium falciparum clone 3D7 has been sequenced using a chromosome-by-chromosome shotgun strategy. We report here the nucleotide sequences of chromosomes 10, 11 and 14, and a re-analysis of the chromosome 2 sequence. These chromosomes represent about 35\% of the 23-megabase P. falciparum genome.}, keywords = {Animals, Chromosomes, DNA, Protozoan, Genome, Protozoan, Plasmodium falciparum, Proteome, Protozoan Proteins, Sequence Analysis, DNA}, author = {Gardner, Malcolm J. and Shallom, Shamira J. and Carlton, Jane M. and Salzberg, Steven L. and Nene, Vishvanath and Shoaibi, Azadeh and Ciecko, Anne and Lynn, Jeffery and Rizzo, Michael and Weaver, Bruce and Jarrahi, Behnam and Brenner, Michael and Parvizi, Babak and Tallon, Luke and Moazzez, Azita and Granger, David and Fujii, Claire and Hansen, Cheryl and Pederson, James and Feldblyum, Tamara and Peterson, Jeremy and Suh, Bernard and Angiuoli, Sam and Pertea, Mihaela and Allen, Jonathan and J. Selengut and White, Owen and Cummings, Leda M. and Smith, Hamilton O. and Adams, Mark D. and Venter, J. Craig and Carucci, Daniel J. and Hoffman, Stephen L. and Fraser, Claire M.} } @article {38368, title = {MDP-1 is a new and distinct member of the haloacid dehalogenase family of aspartate-dependent phosphohydrolases}, journal = {BiochemistryBiochemistry}, volume = {40}, year = {2001}, note = {http://www.ncbi.nlm.nih.gov/pubmed/11601995?dopt=Abstract}, abstract = {MDP-1 is a eukaryotic magnesium-dependent acid phosphatase with little sequence homology to previously characterized phosphatases. The presence of a conserved motif (Asp-X-Asp-X-Thr) in the N terminus of MDP-1 suggested a relationship to the haloacid dehalogenase (HAD) superfamily, which contains a number of magnesium-dependent acid phosphatases. These phosphatases utilize an aspartate nucleophile and contain a number of conserved active-site residues and hydrophobic patches, which can be plausibly aligned with conserved residues in MDP-1. Seven site-specific point mutants of MDP-1 were produced by modifying the catalytic aspartate, serine, and lysine residues to asparagine or glutamate, alanine, and arginine, respectively. The activity of these mutants confirms the assignment of MDP-1 as a member of the HAD superfamily. Detailed comparison of the sequence of the 15 MDP-1 sequences from various organisms with other HAD superfamily sequences suggests that MDP-1 is not closely related to any particular member of the superfamily. The crystal structures of several HAD family enzymes identify a domain proximal to the active site responsible for important interactions with low molecular weight substrates. The absence of this domain or any other that might perform the same function in MDP-1 suggests an "open" active site capable of interactions with large substrates such as proteins. This suggestion was experimentally confirmed by demonstration that MDP-1 is competent to catalyze the dephosphorylation of tyrosine-phosphorylated proteins.}, keywords = {Amino Acid Motifs, Amino Acid Sequence, Animals, Aspartic Acid, Catalytic Domain, HUMANS, Hydrolases, Mice, Molecular Sequence Data, Multigene Family, Mutagenesis, Site-Directed, Phosphoprotein Phosphatases, Protein Structure, Tertiary, Protein Tyrosine Phosphatases, Rats, Saccharomyces cerevisiae, sequence alignment, Sequence Homology, Amino Acid}, author = {J. Selengut} } @article {38143, title = {Carbonic anhydrase III: the phosphatase activity is extrinsic}, journal = {Archives of biochemistry and biophysicsArchives of biochemistry and biophysics}, volume = {377}, year = {2000}, note = {http://www.ncbi.nlm.nih.gov/pubmed/10845711?dopt=Abstract}, type = {10.1006/abbi.2000.1793}, abstract = {The carbonic anhydrases reversibly hydrate carbon dioxide to yield bicarbonate and hydrogen ion. They have a variety of physiological functions, although the specific roles of each of the 10 known isozymes are unclear. Carbonic anhydrase isozyme III is particularly rich in skeletal muscle and adipocytes, and it is unique among the isozymes in also exhibiting phosphatase activity. Previously published studies provided evidence that the phosphatase activity was intrinsic to carbonic anhydrase III, that it had specificity for tyrosine phosphate, and that activity was regulated by reversible glutathionylation of cysteine186. To study the mechanism of this phosphatase, we cloned and expressed the rat liver carbonic anhydrase III. The purified recombinant had the same specific activity as the carbonic anhydrase purified from rat liver, but it had virtually no phosphatase activity. We attempted to identify an activator of the phosphatase in rat liver and found a protein of approximately 14 kDa, the amount of which correlated with the phosphatase activity of the carbonic anhydrase III fractions. It was identified as liver fatty acid binding protein, which was then purified to test for activity as an activator of the phosphatase and for protein-protein interaction, but neither binding nor activation could be demonstrated. Immunoprecipitation experiments established that carbonic anhydrase III could be separated from the phosphatase activity. Finally, adding additional purification steps completely separated the phosphatase activity from the carbonic anhydrase activity. We conclude that the phosphatase activity previously considered to be intrinsic to carbonic anhydrase III is actually extrinsic. Thus, this isozyme exhibits only the carbon dioxide hydratase and esterase activities characteristic of the other mammalian isozymes, and the phosphatase previously shown to be activated by glutathionylation is not carbonic anhydrase III.}, keywords = {Animals, Carbonic Anhydrases, Chromatography, High Pressure Liquid, Cloning, Molecular, Enzyme Activation, Glutathione, Kinetics, Liver, Male, Muscles, Phosphoric Monoester Hydrolases, Precipitin Tests, Rabbits, Rats, Rats, Inbred F344, Recombinant Proteins, Spectrometry, Mass, Matrix-Assisted Laser Desorption-Ionization, Time factors}, author = {Kim, G. and J. Selengut and Levine, R. L.} } @article {38369, title = {MDP-1: A novel eukaryotic magnesium-dependent phosphatase}, journal = {BiochemistryBiochemistry}, volume = {39}, year = {2000}, note = {http://www.ncbi.nlm.nih.gov/pubmed/10889041?dopt=Abstract}, abstract = {We report here the purification, cloning, expression, and characterization of a novel phosphatase, MDP-1. In the course of investigating the reported acid phosphatase activity of carbonic anhydrase III preparations, several discrete phosphatases were discerned. One of these, a magnesium-dependent species of 18.6 kDa, was purified to homogeneity and yielded several peptide sequences from which the parent gene was identified by database searching. Although orthologous genes were identified in fungi and plants as well as mammalian species, there was no apparent homology to any known family of phosphatases. The enzyme was expressed in Escherichia coli with a fusion tag and purified by affinity methods. The recombinant enzyme showed magnesium-dependent acid phosphatase activity comparable to the originally isolated rabbit protein. The enzyme catalyzes the rapid hydrolysis of p-nitrophenyl phosphate, ribose-5-phosphate, and phosphotyrosine. The selectivity for phosphotyrosine over phosphoserine or phosphothreonine is considerable, but the enzyme did not show activity toward five phosphotyrosine-containing peptides. None of the various substrates assayed (including various nucleotide, sugar, amino acid and peptide phosphates, phosphoinositides, and phosphodiesters) exhibited K(M) values lower than 1 mM, and many showed negligible rates of hydrolysis. The enzyme is inhibited by vanadate and fluoride but not by azide, cyanide, calcium, lithium, or tartaric acid. Chemical labeling, refolding, dialysis, and mutagenesis experiments suggest that the enzymatic mechanism is not dependent on cysteine, histidine, or nonmagnesium metal ions. In recognition of these observations, the enzyme has been given the name magnesium-dependent phosphatase-1 (MDP-1).}, keywords = {Amino Acid Sequence, Animals, Catalysis, Cations, Chromatography, Affinity, Cloning, Molecular, Cysteine, Enzyme Inhibitors, Histidine, Hydrogen-Ion Concentration, Magnesium, Mice, Molecular Sequence Data, Phosphoprotein Phosphatases, Protein Phosphatase 1, Rabbits, Sequence Analysis, Protein, Sequence Homology, Amino Acid, Substrate Specificity}, author = {J. Selengut and Levine, R. L.} } @article {38220, title = {The effect of calprotectin on the nucleation and growth of struvite crystals as assayed by light microscopy in real-time}, journal = {The Journal of urologyThe Journal of urology}, volume = {159}, year = {1998}, note = {http://www.ncbi.nlm.nih.gov/pubmed/9507889?dopt=Abstract}, abstract = {PURPOSE: To use light microscopy to observe the urease-induced growth of struvite crystals in real-time, and to compare the effects of various proteins on that growth. MATERIALS AND METHODS: Artificial urine, with and without citrate, and a minimal urine solution containing only urea and the components of struvite and apatite were incubated with urease and test proteins in the depressions of culture slides. The number and size of rectangular and X-shaped struvite crystals were recorded using a low-power phase contrast microscope. RESULTS: The formation of crystalline struvite appears to occur after the formation of an amorphous calcium- and magnesium-containing phase. The extent of this amorphous phase is dependent on the presence of calcium and citrate, both of which strongly promote its formation over the crystalline phase. alpha-globulin, gamma-globulin and chymotrypsin inhibitor all result in the same amount of crystalline struvite as bovine serum albumin which is used as a control. Calprotectin, on the other hand, causes consistent and significant reductions in the number and size of struvite crystals under a wide range of conditions. No changes in the morphology of the struvite crystals were observed. CONCLUSIONS: Calprotectin, the dominant protein of infection stone matrix, has distinctive properties which affect the formation and growth of struvite crystals. The presence of citrate in synthetic urine dramatically reduces the number of struvite crystals observed. The present method for observing the effects of putative infection stone inhibitors appears to have merit.}, keywords = {Crystallization, Dose-Response Relationship, Drug, Leukocyte L1 Antigen Complex, Magnesium Compounds, Neural Cell Adhesion Molecules, Phosphates, Time factors}, author = {Asakura, H. and J. Selengut and Orme-Johnson, W. H. and Dretler, S. P.} } @article {38336, title = {Identification of the calcium-binding protein calgranulin in the matrix of struvite stones}, journal = {Journal of endourology / Endourological SocietyJournal of endourology / Endourological Society}, volume = {8}, year = {1994}, note = {http://www.ncbi.nlm.nih.gov/pubmed/8061680?dopt=Abstract}, abstract = {The identification of calcium-binding proteins in urine and kidney stones has led to a closer look at the role of matrix proteins in urolithiasis. We analyzed five struvite stones for protein content and identified two bands (8 and 14 KDa) that were confirmed by gel electrophoresis and amino acid sequencing to be calgranulin. This protein, which is known by several other names, has bacteriostatic antifungal activity. Its role in the formation of struvite stones warrants further investigation.}, keywords = {Amino Acid Sequence, Calcium-Binding Proteins, Cell Adhesion Molecules, Neuronal, Electrophoresis, Enzyme-Linked Immunosorbent Assay, HUMANS, Kidney Calculi, Leukocyte L1 Antigen Complex, Magnesium Compounds, Molecular Sequence Data, Phosphates}, author = {Bennett, J. and Dretler, S. P. and J. Selengut and Orme-Johnson, W. H.} }