@book {49820, title = {Better Identification of Repeats in Metagenomic Scaffolding}, volume = {9838}, year = {2016}, pages = {174 - 184}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, isbn = {978-3-319-43680-7}, issn = {0302-9743}, doi = {10.1007/978-3-319-43681-410.1007/978-3-319-43681-4_14}, url = {http://link.springer.com/10.1007/978-3-319-43681-4http://link.springer.com/content/pdf/10.1007/978-3-319-43681-4}, author = {Ghurye, Jay and Pop, Mihai} } @article {49623, title = {Bayesian integration of genetics and epigenetics detects causal regulatory SNPs underlying expression variability}, journal = {Nature Communications}, volume = {6}, year = {2015}, month = {Dec-10-2015}, pages = {8555}, doi = {10.1038/ncomms9555}, url = {http://www.nature.com/doifinder/10.1038/ncomms9555}, author = {Das, Avinash and Morley, Michael and Moravec, Christine S. and Tang, W. H. W. and Hakonarson, Hakon and Ashley, Euan A. and Brandimarto, Jeffrey and Hu, Ray and Li, Mingyao and Li, Hongzhe and Liu, Yichuan and Qu, Liming and Sanchez, Pablo and Margulies, Kenneth B. and Cappola, Thomas P. and Jensen, Shane and Hannenhalli, Sridhar} } @article {49599, title = {BlindCall: ultra-fast base-calling of high-throughput sequencing data by blind deconvolution.}, volume = {30}, year = {2014}, month = {2014 May 1}, pages = {1214-9}, abstract = {
MOTIVATION: Base-calling of sequencing data produced by high-throughput sequencing platforms is a fundamental process in current bioinformatics analysis. However, existing third-party probabilistic or machine-learning methods that significantly improve the accuracy of base-calls on these platforms are impractical for production use due to their computational inefficiency.
RESULTS: We directly formulate base-calling as a blind deconvolution problem and implemented BlindCall as an efficient solver to this inverse problem. BlindCall produced base-calls at accuracy comparable to state-of-the-art probabilistic methods while processing data at rates 10 times faster in most cases. The computational complexity of BlindCall scales linearly with read length making it better suited for new long-read sequencing technologies.
}, keywords = {algorithms, High-Throughput Nucleotide Sequencing, HUMANS, Probability, Reproducibility of Results, Sequence Analysis, DNA, software, Time factors}, issn = {1367-4811}, doi = {10.1093/bioinformatics/btu010}, author = {Ye, Chengxi and Hsiao, Chiaowen and Corrada Bravo, Hector} } @article {49741, title = {BclAF1 restriction factor is neutralized by proteasomal degradation and microRNA repression during human cytomegalovirus infection.}, journal = {Proc Natl Acad Sci U S A}, volume = {109}, year = {2012}, month = {2012 Jun 12}, pages = {9575-80}, abstract = {Cell proteins can restrict the replication of viruses. Here, we identify the cellular BclAF1 protein as a human cytomegalovirus restriction factor and describe two independent mechanisms the virus uses to decrease its steady-state levels. Immediately following infection, the viral pp71 and UL35 proteins, which are delivered to cells within virions, direct the proteasomal degradation of BclAF1. Although BclAF1 reaccumulates through the middle stages of infection, it is subsequently down-regulated at late times by miR-UL112-1, a virus-encoded microRNA. In the absence of BclAF1 neutralization, viral gene expression and replication are inhibited. These data identify two temporally and mechanistically distinct functions used by human cytomegalovirus to down-regulate a cellular antiviral protein.
}, keywords = {Cytomegalovirus, Cytomegalovirus Infections, Genes, Immediate-Early, HUMANS, Hydrolysis, MicroRNAs, Proteasome Endopeptidase Complex, Repressor Proteins, Tumor Suppressor Proteins}, issn = {1091-6490}, doi = {10.1073/pnas.1207496109}, author = {Lee, Song Hee and Kalejta, Robert F and Kerry, Julie and Semmes, Oliver John and O{\textquoteright}Connor, Christine M and Khan, Zia and Garcia, Benjamin A and Shenk, Thomas and Murphy, Eain} } @article {49551, title = {BclAF1 restriction factor is neutralized by proteasomal degradation and microRNA repression during human cytomegalovirus infection}, volume = {109}, year = {2012}, month = {Dec-06-2012}, pages = {9575 - 9580}, issn = {0027-8424}, doi = {10.1073/pnas.1207496109}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.1207496109}, author = {Lee, S. H. and Kalejta, R. F. and Kerry, J. and Semmes, O. J. and O{\textquoteright}Connor, C. M. and Khan, Z. and Garcia, B. A. and Shenk, T. and Murphy, E.} } @article {49550, title = {BclAF1 restriction factor is neutralized by proteasomal degradation and microRNA repression during human cytomegalovirus infection}, journal = {Proceedings of the National Academy of Sciences}, volume = {109}, year = {2012}, month = {Dec-06-2012}, pages = {9575 - 9580}, issn = {0027-8424}, doi = {10.1073/pnas.1207496109}, url = {http://www.pnas.org/cgi/doi/10.1073/pnas.1207496109}, author = {Lee, S. H. and Kalejta, R. F. and Kerry, J. and Semmes, O. J. and O{\textquoteright}Connor, C. M. and Khan, Z. and Garcia, B. A. and Shenk, T. and Murphy, E.} } @article {38128, title = {BEAGLE: An Application Programming Interface and High-Performance Computing Library for Statistical Phylogenetics}, journal = {Systematic BiologySyst BiolSystematic BiologySyst Biol}, volume = {61}, year = {2012}, type = {10.1093/sysbio/syr100}, abstract = {Phylogenetic inference is fundamental to our understanding of most aspects of the origin and evolution of life, and in recent years, there has been a concentration of interest in statistical approaches such as Bayesian inference and maximum likelihood estimation. Yet, for large data sets and realistic or interesting models of evolution, these approaches remain computationally demanding. High-throughput sequencing can yield data for thousands of taxa, but scaling to such problems using serial computing often necessitates the use of nonstatistical or approximate approaches. The recent emergence of graphics processing units (GPUs) provides an opportunity to leverage their excellent floating-point computational performance to accelerate statistical phylogenetic inference. A specialized library for phylogenetic calculation would allow existing software packages to make more effective use of available computer hardware, including GPUs. Adoption of a common library would also make it easier for other emerging computing architectures, such as field programmable gate arrays, to be used in the future. We present BEAGLE, an application programming interface (API) and library for high-performance statistical phylogenetic inference. The API provides a uniform interface for performing phylogenetic likelihood calculations on a variety of compute hardware platforms. The library includes a set of efficient implementations and can currently exploit hardware including GPUs using NVIDIA CUDA, central processing units (CPUs) with Streaming SIMD Extensions and related processor supplementary instruction sets, and multicore CPUs via OpenMP. To demonstrate the advantages of a common API, we have incorporated the library into several popular phylogenetic software packages. The BEAGLE library is free open source software licensed under the Lesser GPL and available from http://beagle-lib.googlecode.com. An example client program is available as public domain software.}, keywords = {Bayesian phylogenetics, gpu, maximum likelihood, parallel computing}, isbn = {1063-5157, 1076-836X}, author = {Ayres, Daniel L. and Darling, Aaron and Zwickl, Derrick J. and Beerli, Peter and Holder, Mark T. and Lewis, Paul O. and Huelsenbeck, John P. and Ronquist, Fredrik and Swofford, David L. and Michael P. Cummings and Rambaut, Andrew and Suchard, Marc A.} } @article {38133, title = {Bioinformatics for the Human Microbiome Project}, journal = {PLOS Computational BiologyPLOS Computational Biology}, volume = {8}, year = {2012}, publisher = {Public Library of Science}, isbn = {1553-7358}, author = {Gevers, Dirk and M. Pop and Schloss, Patrick D. and Huttenhower, Curtis} } @article {38125, title = {Bacillus anthracis comparative genome analysis in support of the Amerithrax investigation}, journal = {Proceedings of the National Academy of SciencesProceedings of the National Academy of Sciences}, volume = {108}, year = {2011}, publisher = {National Acad Sciences}, author = {Rasko, D. A. and Worsham, P. L. and Abshire, T. G. and Stanley, S. T. and Bannan, J. D. and Wilson, M. R. and Langham, R. J. and Decker, R. S. and Jiang, L. and Read, T. D. and others,} } @article {38127, title = {Bambus 2: Scaffolding Metagenomes}, journal = {Bioinformatics}, volume = {27}, year = {2011}, type = {10.1093/bioinformatics/btr520}, abstract = {Motivation: Sequencing projects increasingly target samples from non-clonal sources. In particular, metagenomics has enabled scientists to begin to characterize the structure of microbial communities. The software tools developed for assembling and analyzing sequencing data for clonal organisms are, however, unable to adequately process data derived from non-clonal sources.Results: We present a new scaffolder, Bambus 2, to address some of the challenges encountered when analyzing metagenomes. Our approach relies on a combination of a novel method for detecting genomic repeats and algorithms that analyze assembly graphs to identify biologically meaningful genomic variants. We compare our software to current assemblers using simulated and real data. We demonstrate that the repeat detection algorithms have higher sensitivity than current approaches without sacrificing specificity. In metagenomic datasets, the scaffolder avoids false joins between distantly related organisms while obtaining long-range contiguity. Bambus 2 represents a first step toward automated metagenomic assembly. Availability: Bambus 2 is open source and available from http://amos.sf.net. Contact: mpop@umiacs.umd.edu Supplementary Information: Supplementary data are available at Bioinformatics online.}, isbn = {1367-4803, 1460-2059}, author = {Koren, Sergey and Todd Treangen and M. Pop} } @article {38140, title = {Broader incorporation of bioinformatics in education: opportunities and challenges}, journal = {Brief BioinformBrief Bioinform}, volume = {11}, year = {2010}, abstract = {The major opportunities for broader incorporation of bioinformatics in education can be placed into three general categories: general applicability of bioinformatics in life science and related curricula; inherent fit of bioinformatics for promoting student learning in most biology programs; and the general experience and associated comfort students have with computers and technology. Conversely, the major challenges for broader incorporation of bioinformatics in education can be placed into three general categories: required infrastructure and logistics; instructor knowledge of bioinformatics and continuing education; and the breadth of bioinformatics, and the diversity of students and educational objectives. Broader incorporation of bioinformatics at all education levels requires overcoming the challenges to using transformative computer- requiring learning activities, assisting faculty in collecting assessment data on mastery of student learning outcomes, as well as creating more faculty development opportunities that span diverse skill levels, with an emphasis placed on providing resource materials that are kept up-to-date as the field and tools change.}, author = {Michael P. Cummings and Temple, G. G.} } @article {38135, title = {Biological agent detection technologies}, journal = {Molecular Ecology ResourcesMolecular Ecology Resources}, volume = {9}, year = {2009}, type = {10.1111/j.1755-0998.2009.02632.x}, abstract = {The challenge for first responders, physicians in the emergency room, public health personnel, as well as for food manufacturers, distributors and retailers is accurate and reliable identification of pathogenic agents and their corresponding diseases. This is the weakest point in biological agent detection capability today.There is intense research for new molecular detection technologies that could be used for very accurate detection of pathogens that would be a concern to first responders. These include the need for sensors for multiple applications as varied as understanding the ecology of pathogenic micro-organisms, forensics, environmental sampling for detect-to-treat applications, biological sensors for {\textquoteleft}detect to warn{\textquoteright} in infrastructure protection, responses to reports of {\textquoteleft}suspicious powders{\textquoteright}, and customs and borders enforcement, to cite a few examples. The benefits of accurate detection include saving millions of dollars annually by reducing disruption of the workforce and the national economy and improving delivery of correct countermeasures to those who are most in need of the information to provide protective and/or response measures.}, keywords = {barcoding, biological agent, DETECTION, identification, sequencing}, isbn = {1755-0998}, author = {Jakupciak, John P. and Rita R. Colwell} } @article {38130, title = {Biofilms in water, its role and impact in human disease transmission}, journal = {Current Opinion in BiotechnologyCurrent Opinion in Biotechnology}, volume = {19}, year = {2008}, type = {10.1016/j.copbio.2008.04.005}, abstract = {Understanding the mechanism of biofilm formation is the first step in determining its function and, thereby, its impact and role in the environment. Extensive studies accomplished during the past few years have elucidated the genetics and biochemistry of biofilm formation. Cell-to-cell communication, that is, quorum sensing, is a key factor in the initiation of biofilm. Occurrence of viable but nonculturable bacteria, including Vibrio cholerae in biofilms has been reported and most likely such cells were overlooked previously because appropriate methods of detection were not employed. For this reason discovery and investigation of this important bacterial ecological niche in the environment were impeded.}, isbn = {0958-1669}, author = {Huq, Anwar and Whitehouse, Chris A. and Grim, Christopher J. and Alam, Munirul and Rita R. Colwell} } @article {38132, title = {Bioinformatics challenges of new sequencing technology}, journal = {Trends in GeneticsTrends in Genetics}, volume = {24}, year = {2008}, type = {10.1016/j.tig.2007.12.006}, abstract = {New DNA sequencing technologies can sequence up to one billion bases in a single day at low cost, putting large-scale sequencing within the reach of many scientists. Many researchers are forging ahead with projects to sequence a range of species using the new technologies. However, these new technologies produce read lengths as short as 35{\^a}{\texteuro}{\textquotedblleft}40 nucleotides, posing challenges for genome assembly and annotation. Here we review the challenges and describe some of the bioinformatics systems that are being proposed to solve them. We specifically address issues arising from using these technologies in assembly projects, both de novo and for resequencing purposes, as well as efforts to improve genome annotation in the fragmented assemblies produced by short read lengths.}, isbn = {0168-9525}, author = {M. Pop and Salzberg, Steven L.} } @article {38134, title = {BIOINFORMATICS REVIEW}, journal = {BIOINFORMATICSBioinformatics}, volume = {24}, year = {2008}, author = {Sridhar Hannenhalli} } @article {38129, title = {Biased data reduce efficiency and effectiveness of conservation reserve networks}, journal = {Ecology LettersEcology Letters}, volume = {10}, year = {2007}, type = {10.1111/j.1461-0248.2007.01025.x}, abstract = {Complementarity-based reserve selection algorithms efficiently prioritize sites for biodiversity conservation, but they are data-intensive and most regions lack accurate distribution maps for the majority of species. We explored implications of basing conservation planning decisions on incomplete and biased data using occurrence records of the plant family Proteaceae in South Africa. Treating this high-quality database as {\textquoteleft}complete{\textquoteright}, we introduced three realistic sampling biases characteristic of biodiversity databases: a detectability sampling bias and two forms of roads sampling bias. We then compared reserve networks constructed using complete, biased, and randomly sampled data. All forms of biased sampling performed worse than both the complete data set and equal-effort random sampling. Biased sampling failed to detect a median of 1{\textendash}5\% of species, and resulted in reserve networks that were 9{\textendash}17\% larger than those designed with complete data. Spatial congruence and the correlation of irreplaceability scores between reserve networks selected with biased and complete data were low. Thus, reserve networks based on biased data require more area to protect fewer species and identify different locations than those selected with randomly sampled or complete data.}, keywords = {Bias, biodiversity conservation, complementarity, efficiency, marxan, rarity, reserve networks, reserve selection algorithms, species detection}, isbn = {1461-0248}, author = {Grand, Joanna and Michael P. Cummings and Rebelo, Tony G. and Ricketts, Taylor H. and Neel, Maile C.} } @article {38136, title = {Bio-STEER: A Semantic Web workflow tool for Grid computing in the life sciences}, journal = {Future Generation Comp SystFuture Generation Comp Syst}, volume = {23}, year = {2007}, type = {DOI 10.1016/j.future.2006.07.011}, abstract = {Life science research is becoming evermore computationally intensive. Hence, from a computational resource perspective, Grid computing provides a logical approach to meeting many of the computational needs of life science research. However, there are several barriers to the widespread use of Grid computing in life sciences. In this paper, we attempt to address one particular barrier: the difficulty of using Grid computing by life scientists. Life science research often involves connecting multiple applications together to form a workflow. This process of constructing a workflow is complex. When combined with the difficulty of using Grid services, composing a meaningful workflow using Grid services can present a challenge to life scientists. Our proposed solution is a Semantic Web-enabled computing environment, called Bio-STEER. In BioSTEER, bioinformatics Grid services are mapped to Semantic Web services, described in OWL-S. We also defined an ontology in OWL to model bioinformatics applications. A graphical user interface helps to construct a scientific workflow by showing a list of services that are semantically sound: that is, the output of one service is semantically compatible with the input of the connecting service. Bio-STEER can help users take full advantaue of Grid services through a user-friendly graphical user interface (GUI), which allows them to easily construct the workflows they need. (c) 2006 Elsevier B.V. All rights reserved.}, keywords = {client/server, distributed, ENVIRONMENTS, integrated, interface, management, semantics, services, systems, user, web-base, workflow}, author = {Lee, S. and Wang, T. D. and Hashmi, N. and Michael P. Cummings} } @proceedings {38139, title = {Bridging art and science with creativity support tools}, year = {2007}, month = {2007}, publisher = {ACM}, type = {10.1145/1254960.1255044}, address = {New York, NY, USA}, isbn = {978-1-59593-712-4}, author = {Shneiderman, Ben and Rita R. Colwell and Diamond, Sara and Greenhalgh, Paul and Wulf, William} } @article {38131, title = {Bioinformatic Prediction of mRNA Targets of the Fragile X Mental Retardation Protein}, year = {2005}, author = {Simola, D. F. and Bucan, M. and Dalva, M. and Sridhar Hannenhalli and Liebhaber, S. and Ungar, L.} } @inbook {49521, title = {{BAMBE}, {DnaSP}, {ENCprime/SeqCount}, {LAMARC}, {MacClade}, {MEGA}, {Modeltest}, {MrBayes}, {PAML}, {PAUP*}, {PHYLIP}, r8s, readseq, {Seq-Gen}, {Sites}, {TreeView}}, booktitle = {Dictionary of Bioinformatics}, year = {2004}, pages = {39-40, 123-124, 146, 288-289, 305, 318, 337, 352, 388, 392, 398-399, 455, 457, 502, 522, 568}, publisher = {Wiley-Liss}, organization = {Wiley-Liss}, address = {Hoboken}, author = {Michael P. Cummings}, editor = {Hancock, JM and Zvelebil, MJ} } @article {38138, title = {A book like its cover}, journal = {HeredityHeredity}, volume = {93}, year = {2004}, type = {10.1038/sj.hdy.6800475}, abstract = {An official journal of the Genetics Society, Heredity publishes high-quality articles describing original research and theoretical insights in all areas of genetics. Research papers are complimented by News \& Commentary articles and reviews, keeping researchers and students abreast of hot topics in the field.}, keywords = {animal and plant breeding, biometrical and statistical genetics, cytogenetics, ecological, eukaryotes, Genetics, Genomics, human population genetics, population and evolutionary genetics, post-genomics}, isbn = {0018-067X}, author = {Michael P. Cummings} } @article {49519, title = {A book like its cover {\textendash}- The Phylogenetic Handbook: A Practical Approach to DNA and Protein Phylogeny, Edited by M. Salemi and A.-M. Vandamme}, journal = {Heredity}, volume = {93}, year = {2004}, month = {Aug}, pages = {234-235}, author = {Michael P. Cummings} } @article {38126, title = {Bacterial Start Site Prediction}, journal = {Nucleic Acids ResearchNucl. Acids Res.Nucleic Acids ResearchNucl. Acids Res.}, volume = {27}, year = {1999}, type = {10.1093/nar/27.17.3577}, abstract = {With the growing number of completely sequenced bacterial genes, accurate gene prediction in bacterial genomes remains an important problem. Although the existing tools predict genes in bacterial genomes with high overall accuracy, their ability to pinpoint the translation start site remains unsatisfactory. In this paper, we present a novel approach to bacterial start site prediction that takes into account multiple features of a potential start site, viz., ribosome binding site (RBS) binding energy, distance of the RBS from the start codon, distance from the beginning of the maximal ORF to the start codon, the start codon itself and the coding/non-coding potential around the start site. Mixed integer programing was used to optimize the discriminatory system. The accuracy of this approach is up to 90\%, compared to 70\%, using the most common tools in fully automated mode (that is, without expert human post-processing of results). The approach is evaluated using Bacillus subtilis, Escherichia coli and Pyrococcus furiosus. These three genomes cover a broad spectrum of bacterial genomes, since B.subtilis is a Gram-positive bacterium, E.coli is a Gram-negative bacterium and P.furiosus is an archaebacterium. A significant problem is generating a set of {\textquoteleft}true{\textquoteright} start sites for algorithm training, in the absence of experimental work. We found that sequence conservation between P.furiosus and the related Pyrococcus horikoshii clearly delimited the gene start in many cases, providing a sufficient training set.}, isbn = {0305-1048, 1362-4962}, author = {Sridhar Hannenhalli and Hayes, William S. and Hatzigeorgiou, Artemis G. and Fickett, James W.} }