@article {38294, title = {Genome Properties: a system for the investigation of prokaryotic genetic content for microbiology, genome annotation and comparative genomics}, journal = {Bioinformatics (Oxford, England)Bioinformatics (Oxford, England)}, volume = {21}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/15347579?dopt=Abstract}, type = {10.1093/bioinformatics/bti015}, abstract = {MOTIVATION: The presence or absence of metabolic pathways and structures provide a context that makes protein annotation far more reliable. Compiling such information across microbial genomes improves the functional classification of proteins and provides a valuable resource for comparative genomics. RESULTS: We have created a Genome Properties system to present key aspects of prokaryotic biology using standardized computational methods and controlled vocabularies. Properties reflect gene content, phenotype, phylogeny and computational analyses. The results of searches using hidden Markov models allow many properties to be deduced automatically, especially for families of proteins (equivalogs) conserved in function since their last common ancestor. Additional properties are derived from curation, published reports and other forms of evidence. Genome Properties system was applied to 156 complete prokaryotic genomes, and is easily mined to find differences between species, correlations between metabolic features and families of uncharacterized proteins, or relationships among properties. AVAILABILITY: Genome Properties can be found at http://www.tigr.org/Genome_Properties SUPPLEMENTARY INFORMATION: http://www.tigr.org/tigr-scripts/CMR2/genome_properties_references.spl.}, keywords = {Chromosome mapping, database management systems, Databases, Genetic, documentation, Gene Expression Profiling, Gene Expression Regulation, Genomics, Information Storage and Retrieval, Microbiological Techniques, natural language processing, Prokaryotic Cells, Proteome, signal transduction, software, User-Computer Interface, Vocabulary, Controlled}, author = {Haft, Daniel H. and J. Selengut and Brinkac, Lauren M. and Zafar, Nikhat and White, Owen} } @article {38325, title = {A guild of 45 CRISPR-associated (Cas) protein families and multiple CRISPR/Cas subtypes exist in prokaryotic genomes}, journal = {PLoS computational biologyPLOS Computational Biology}, volume = {1}, year = {2005}, note = {http://www.ncbi.nlm.nih.gov/pubmed/16292354?dopt=Abstract}, type = {10.1371/journal.pcbi.0010060}, abstract = {Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. Repeats of 21-37 bp typically show weak dyad symmetry and are separated by regularly sized, nonrepetitive spacer sequences. Four CRISPR-associated (Cas) protein families, designated Cas1 to Cas4, are strictly associated with CRISPR elements and always occur near a repeat cluster. Some spacers originate from mobile genetic elements and are thought to confer "immunity" against the elements that harbor these sequences. In the present study, we have systematically investigated uncharacterized proteins encoded in the vicinity of these CRISPRs and found many additional protein families that are strictly associated with CRISPR loci across multiple prokaryotic species. Multiple sequence alignments and hidden Markov models have been built for 45 Cas protein families. These models identify family members with high sensitivity and selectivity and classify key regulators of development, DevR and DevS, in Myxococcus xanthus as Cas proteins. These identifications show that CRISPR/cas gene regions can be quite large, with up to 20 different, tandem-arranged cas genes next to a repeat cluster or filling the region between two repeat clusters. Distinctive subsets of the collection of Cas proteins recur in phylogenetically distant species and correlate with characteristic repeat periodicity. The analyses presented here support initial proposals of mobility of these units, along with the likelihood that loci of different subtypes interact with one another as well as with host cell defensive, replicative, and regulatory systems. It is evident from this analysis that CRISPR/cas loci are larger, more complex, and more heterogeneous than previously appreciated.}, keywords = {Genes, Archaeal, Genes, Bacterial, Genes, Fungal, Genome, Genome, Bacterial, Haloarcula marismortui, Markov chains, Multigene Family, Oligonucleotide Array Sequence Analysis, Phylogeny, Prokaryotic Cells, Proteins, Repetitive Sequences, Nucleic Acid, Yersinia pestis}, author = {Haft, Daniel H. and J. Selengut and Mongodin, Emmanuel F. and Nelson, Karen E.} }