@article {49605,
	title = {Minfi: a flexible and comprehensive Bioconductor package for the analysis of Infinium DNA methylation microarrays.},
	volume = {30},
	year = {2014},
	month = {2014 May 15},
	pages = {1363-9},
	abstract = {<p><b>MOTIVATION: </b>The recently released Infinium HumanMethylation450 array (the {\textquoteright}450k{\textquoteright} array) provides a high-throughput assay to quantify DNA methylation (DNAm) at \~{}450 000 loci across a range of genomic features. Although less comprehensive than high-throughput sequencing-based techniques, this product is more cost-effective and promises to be the most widely used DNAm high-throughput measurement technology over the next several years.</p><p><b>RESULTS: </b>Here we describe a suite of computational tools that incorporate state-of-the-art statistical techniques for the analysis of DNAm data. The software is structured to easily adapt to future versions of the technology. We include methods for preprocessing, quality assessment and detection of differentially methylated regions from the kilobase to the megabase scale. We show how our software provides a powerful and flexible development platform for future methods. We also illustrate how our methods empower the technology to make discoveries previously thought to be possible only with sequencing-based methods.</p><p><b>AVAILABILITY AND IMPLEMENTATION: </b>http://bioconductor.org/packages/release/bioc/html/minfi.html.</p><p><b>CONTACT: </b>khansen@jhsph.edu; rafa@jimmy.harvard.edu</p><p><b>SUPPLEMENTARY INFORMATION: </b>Supplementary data are available at Bioinformatics online.</p>},
	keywords = {Aged, algorithms, Colonic Neoplasms, DNA Methylation, Genome, High-Throughput Nucleotide Sequencing, HUMANS, Oligonucleotide Array Sequence Analysis, Polymorphism, Single Nucleotide, software},
	issn = {1367-4811},
	doi = {10.1093/bioinformatics/btu049},
	author = {Aryee, Martin J and Jaffe, Andrew E and Corrada-Bravo, Hector and Ladd-Acosta, Christine and Feinberg, Andrew P and Hansen, Kasper D and Irizarry, Rafael A}
}
@article {38276,
	title = {Gene expression anti-profiles as a basis for accurate universal cancer signatures},
	journal = {BMC bioinformaticsBMC Bioinformatics},
	volume = {13},
	year = {2012},
	note = {http://www.ncbi.nlm.nih.gov/pubmed/23088656?dopt=Abstract},
	type = {10.1186/1471-2105-13-272},
	abstract = {BACKGROUND: Early screening for cancer is arguably one of the greatest public health advances over the last fifty years. However, many cancer screening tests are invasive (digital rectal exams), expensive (mammograms, imaging) or both (colonoscopies). This has spurred growing interest in developing genomic signatures that can be used for cancer diagnosis and prognosis. However, progress has been slowed by heterogeneity in cancer profiles and the lack of effective computational prediction tools for this type of data. RESULTS: We developed anti-profiles as a first step towards translating experimental findings suggesting that stochastic across-sample hyper-variability in the expression of specific genes is a stable and general property of cancer into predictive and diagnostic signatures. Using single-chip microarray normalization and quality assessment methods, we developed an anti-profile for colon cancer in tissue biopsy samples. To demonstrate the translational potential of our findings, we applied the signature developed in the tissue samples, without any further retraining or normalization, to screen patients for colon cancer based on genomic measurements from peripheral blood in an independent study (AUC of 0.89). This method achieved higher accuracy than the signature underlying commercially available peripheral blood screening tests for colon cancer (AUC of 0.81). We also confirmed the existence of hyper-variable genes across a range of cancer types and found that a significant proportion of tissue-specific genes are hyper-variable in cancer. Based on these observations, we developed a universal cancer anti-profile that accurately distinguishes cancer from normal regardless of tissue type (ten-fold cross-validation AUC > 0.92). CONCLUSIONS: We have introduced anti-profiles as a new approach for developing cancer genomic signatures that specifically takes advantage of gene expression heterogeneity. We have demonstrated that anti-profiles can be successfully applied to develop peripheral-blood based diagnostics for cancer and used anti-profiles to develop a highly accurate universal cancer signature. By using single-chip normalization and quality assessment methods, no further retraining of signatures developed by the anti-profile approach would be required before their application in clinical settings. Our results suggest that anti-profiles may be used to develop inexpensive and non-invasive universal cancer screening tests.},
	keywords = {Area Under Curve, Colonic Neoplasms, Gene Expression Profiling, Genetic Variation, Genomics, HUMANS, Oligonucleotide Array Sequence Analysis, Prognosis, Transcriptome, Tumor Markers, Biological},
	author = {H{\'e}ctor Corrada Bravo and Pihur, Vasyl and McCall, Matthew and Irizarry, Rafael A. and Leek, Jeffrey T.}
}
@article {38522,
	title = {Tackling the widespread and critical impact of batch effects in high-throughput data},
	journal = {Nature reviews. GeneticsNature reviews. Genetics},
	volume = {11},
	year = {2010},
	note = {http://www.ncbi.nlm.nih.gov/pubmed/20838408?dopt=Abstract},
	type = {10.1038/nrg2825},
	abstract = {High-throughput technologies are widely used, for example to assay genetic variants, gene and protein expression, and epigenetic modifications. One often overlooked complication with such studies is batch effects, which occur because measurements are affected by laboratory conditions, reagent lots and personnel differences. This becomes a major problem when batch effects are correlated with an outcome of interest and lead to incorrect conclusions. Using both published studies and our own analyses, we argue that batch effects (as well as other technical and biological artefacts) are widespread and critical to address. We review experimental and computational approaches for doing so.},
	keywords = {biotechnology, Computational Biology, Genomics, Oligonucleotide Array Sequence Analysis, Periodicals as Topic, Research Design, Sequence Analysis, DNA},
	author = {Leek, Jeffrey T. and Scharpf, Robert B. and H{\'e}ctor Corrada Bravo and Simcha, David and Langmead, Benjamin and Johnson, W. Evan and Geman, Donald and Baggerly, Keith and Irizarry, Rafael A.}
}
@article {49749,
	title = {Measuring differential gene expression by short read sequencing: quantitative comparison to 2-channel gene expression microarrays.},
	journal = {BMC Genomics},
	volume = {10},
	year = {2009},
	month = {2009},
	pages = {221},
	abstract = {<p><b>BACKGROUND: </b>High-throughput cDNA synthesis and sequencing of poly(A)-enriched RNA is rapidly emerging as a technology competing to replace microarrays as a quantitative platform for measuring gene expression.</p><p><b>RESULTS: </b>Consequently, we compared full length cDNA sequencing to 2-channel gene expression microarrays in the context of measuring differential gene expression. Because of its comparable cost to a gene expression microarray, our study focused on the data obtainable from a single lane of an Illumina 1 G sequencer. We compared sequencing data to a highly replicated microarray experiment profiling two divergent strains of S. cerevisiae.</p><p><b>CONCLUSION: </b>Using a large number of quantitative PCR (qPCR) assays, more than previous studies, we found that neither technology is decisively better at measuring differential gene expression. Further, we report sequencing results from a diploid hybrid of two strains of S. cerevisiae that indicate full length cDNA sequencing can discover heterozygosity and measure quantitative allele-specific expression simultaneously.</p>},
	keywords = {algorithms, DNA, Complementary, DNA, Fungal, Gene Expression Profiling, Oligonucleotide Array Sequence Analysis, Saccharomyces cerevisiae, sequence alignment, Sequence Analysis, DNA},
	issn = {1471-2164},
	doi = {10.1186/1471-2164-10-221},
	author = {Bloom, Joshua S and Khan, Zia and Kruglyak, Leonid and Singh, Mona and Caudy, Amy A}
}
@article {49643,
	title = {Schistosoma mansoni: Microarray analysis of gene expression induced by host sex.},
	journal = {Exp Parasitol},
	volume = {120},
	year = {2008},
	month = {2008 Dec},
	pages = {357-63},
	abstract = {<p>Schistosoma mansoni is a digenetic trematode and a human parasite responsible for high social and economic impact. Although some authors have studied the effect of host hormones on parasites, not much is known about the effects of host sex on gene expression in Schistosomes. In order to study gene transcripts associated with the host sex, we compared the gene expression profiles of both male and female unisexual adult S. mansoni parasites raised on either male or female hosts, using DNA microarrays. Our results show that host sex caused differential expression of at least 11 genes in female parasites and of 134 in male parasites. Of the differentially expressed genes in female worms, 10 were preferentially expressed in female worms from male mice, while of the 134 differentially expressed genes in male parasites, 79 (59\%) were preferentially expressed in worms from female mice. Further investigation of the role of each of those genes will help understand better their importance in the pathogenesis of Schistosomiasis.</p>},
	keywords = {Animals, Biomphalaria, Female, Gene expression, Host-Parasite Interactions, Male, Mice, Oligonucleotide Array Sequence Analysis, Reverse Transcriptase Polymerase Chain Reaction, RNA, Helminth, Schistosoma mansoni, Schistosomiasis mansoni, Sex Factors},
	issn = {1090-2449},
	doi = {10.1016/j.exppara.2008.09.005},
	author = {Waisberg, M and Lobo, F P and Cerqueira, G C and Passos, L K J and Carvalho, O S and El-Sayed, N M and Franco, G R}
}
@article {38325,
	title = {A guild of 45 CRISPR-associated (Cas) protein families and multiple CRISPR/Cas subtypes exist in prokaryotic genomes},
	journal = {PLoS computational biologyPLOS Computational Biology},
	volume = {1},
	year = {2005},
	note = {http://www.ncbi.nlm.nih.gov/pubmed/16292354?dopt=Abstract},
	type = {10.1371/journal.pcbi.0010060},
	abstract = {Clustered regularly interspaced short palindromic repeats (CRISPRs) are a family of DNA direct repeats found in many prokaryotic genomes. Repeats of 21-37 bp typically show weak dyad symmetry and are separated by regularly sized, nonrepetitive spacer sequences. Four CRISPR-associated (Cas) protein families, designated Cas1 to Cas4, are strictly associated with CRISPR elements and always occur near a repeat cluster. Some spacers originate from mobile genetic elements and are thought to confer "immunity" against the elements that harbor these sequences. In the present study, we have systematically investigated uncharacterized proteins encoded in the vicinity of these CRISPRs and found many additional protein families that are strictly associated with CRISPR loci across multiple prokaryotic species. Multiple sequence alignments and hidden Markov models have been built for 45 Cas protein families. These models identify family members with high sensitivity and selectivity and classify key regulators of development, DevR and DevS, in Myxococcus xanthus as Cas proteins. These identifications show that CRISPR/cas gene regions can be quite large, with up to 20 different, tandem-arranged cas genes next to a repeat cluster or filling the region between two repeat clusters. Distinctive subsets of the collection of Cas proteins recur in phylogenetically distant species and correlate with characteristic repeat periodicity. The analyses presented here support initial proposals of mobility of these units, along with the likelihood that loci of different subtypes interact with one another as well as with host cell defensive, replicative, and regulatory systems. It is evident from this analysis that CRISPR/cas loci are larger, more complex, and more heterogeneous than previously appreciated.},
	keywords = {Genes, Archaeal, Genes, Bacterial, Genes, Fungal, Genome, Genome, Bacterial, Haloarcula marismortui, Markov chains, Multigene Family, Oligonucleotide Array Sequence Analysis, Phylogeny, Prokaryotic Cells, Proteins, Repetitive Sequences, Nucleic Acid, Yersinia pestis},
	author = {Haft, Daniel H. and J. Selengut and Mongodin, Emmanuel F. and Nelson, Karen E.}
}
@article {38514,
	title = {Structural flexibility in the Burkholderia mallei genome},
	journal = {Proceedings of the National Academy of Sciences of the United States of AmericaProceedings of the National Academy of Sciences of the United States of America},
	volume = {101},
	year = {2004},
	note = {http://www.ncbi.nlm.nih.gov/pubmed/15377793?dopt=Abstract},
	type = {10.1073/pnas.0403306101},
	abstract = {The complete genome sequence of Burkholderia mallei ATCC 23344 provides insight into this highly infectious bacterium{\textquoteright}s pathogenicity and evolutionary history. B. mallei, the etiologic agent of glanders, has come under renewed scientific investigation as a result of recent concerns about its past and potential future use as a biological weapon. Genome analysis identified a number of putative virulence factors whose function was supported by comparative genome hybridization and expression profiling of the bacterium in hamster liver in vivo. The genome contains numerous insertion sequence elements that have mediated extensive deletions and rearrangements of the genome relative to Burkholderia pseudomallei. The genome also contains a vast number (>12,000) of simple sequence repeats. Variation in simple sequence repeats in key genes can provide a mechanism for generating antigenic variation that may account for the mammalian host{\textquoteright}s inability to mount a durable adaptive immune response to a B. mallei infection.},
	keywords = {Animals, Base Composition, Base Sequence, Burkholderia mallei, Chromosomes, Bacterial, Cricetinae, Genome, Bacterial, Glanders, Liver, Mesocricetus, Molecular Sequence Data, Multigene Family, Oligonucleotide Array Sequence Analysis, Open Reading Frames, virulence},
	author = {Nierman, William C. and DeShazer, David and Kim, H. Stanley and Tettelin, Herv{\'e} and Nelson, Karen E. and Feldblyum, Tamara and Ulrich, Ricky L. and Ronning, Catherine M. and Brinkac, Lauren M. and Daugherty, Sean C. and Davidsen, Tanja D. and DeBoy, Robert T. and Dimitrov, George and Dodson, Robert J. and Durkin, A. Scott and Gwinn, Michelle L. and Haft, Daniel H. and Khouri, Hoda and Kolonay, James F. and Madupu, Ramana and Mohammoud, Yasmin and Nelson, William C. and Radune, Diana and Romero, Claudia M. and Sarria, Saul and J. Selengut and Shamblin, Christine and Sullivan, Steven A. and White, Owen and Yu, Yan and Zafar, Nikhat and Zhou, Liwei and Fraser, Claire M.}
}
@article {49629,
	title = {Analysis of stage-specific gene expression in the bloodstream and the procyclic form of Trypanosoma brucei using a genomic DNA-microarray.},
	journal = {Mol Biochem Parasitol},
	volume = {123},
	year = {2002},
	month = {2002 Aug 28},
	pages = {115-23},
	abstract = {<p>A microarray comprising 21,024 different PCR products spotted on glass slides was constructed for gene expression studies on Trypanosoma brucei. The arrayed fragments were generated from a T. brucei shotgun clone library, which had been prepared from randomly sheared and size-fractionated genomic DNA. For the identification of stage-specific gene activity, total RNA from in vitro cultures of the human, long slender form and the insect, procyclic form of the parasite was labelled and hybridised to the microarray. Approximately 75\% of the genomic fragments produced a signal and about 2\% exhibited significant differences between the transcript levels in the bloodstream and procyclic forms. A few results were confirmed by Northern blot analysis or reverse-transcription and PCR. Three hundred differentially regulated clones have been selected for sequencing. So far, of 33 clones that showed about 2-fold or more over-expression in bloodstream forms, 15 contained sequences similar to those of VSG expression sites and at least six others appeared non-protein-coding. Of 29 procyclic-specific clones, at least eight appeared not to be protein-coding. A surprisingly large proportion of known regulated genes was already identified in this small sample, and some new ones were found, illustrating the utility of genomic arrays.</p>},
	keywords = {Animals, Blotting, Northern, Escherichia coli, Gene expression, Gene Expression Profiling, Genes, Protozoan, HUMANS, Life Cycle Stages, Molecular Sequence Data, Oligonucleotide Array Sequence Analysis, Polymerase Chain Reaction, Transcription, Genetic, Trypanosoma brucei brucei},
	issn = {0166-6851},
	author = {Diehl, Susanne and Diehl, Frank and El-Sayed, Najib M and Clayton, Christine and Hoheisel, J{\"o}rg D}
}