Mercurial > repos > rnateam > chipseeker
changeset 1:95f779f4adb7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chipseeker commit 3419a5a5e19a93369c8c20a39babe5636a309292
author | rnateam |
---|---|
date | Tue, 29 May 2018 15:08:04 -0400 |
parents | 58ef4507ce5a |
children | cb133602cd9b |
files | chipseeker.R chipseeker.xml test-data/cached_locally/gene_sets.loc test-data/cached_locally/ref.gtf test-data/in.bed test-data/in.diffbind test-data/in.gtf test-data/in.interval test-data/out.int test-data/out.pdf test-data/out.tab test-data/outflank.tab test-data/outint.int test-data/outint.tab test-data/outtss.tab tool-data/gene_sets.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 18 files changed, 333 insertions(+), 87 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chipseeker.R Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,69 @@ +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library(ChIPseeker) + library(GenomicFeatures) + library(optparse) +}) + +option_list <- list( + make_option(c("-i","--infile"), type="character", help="Peaks file to be annotated"), + make_option(c("-G","--gtf"), type="character", help="GTF to create TxDb."), + make_option(c("-u","--upstream"), type="integer", help="TSS upstream region"), + make_option(c("-d","--downstream"), type="integer", help="TSS downstream region"), + make_option(c("-F","--flankgeneinfo"), type="logical", help="Add flanking gene info"), + make_option(c("-D","--flankgenedist"), type="integer", help="Flanking gene distance"), + make_option(c("-f","--format"), type="character", help="Output format (interval or tabular)."), + make_option(c("-p","--plots"), type="character", help="PDF of plots.") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +peaks = args$infile +gtf = args$gtf +up = args$upstream +down = args$downstream +format = args$format +plots = args$plots + +peaks <- readPeakFile(peaks) + +# Make TxDb from GTF +txdb <- makeTxDbFromGFF(gtf, format="gtf") +if (!is.null(args$flankgeneinfo)) { + peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down), addFlankGeneInfo=args$flankgeneinfo, flankDistance=args$flankgenedist) +} else { + peakAnno <- annotatePeak(peaks, TxDb=txdb, tssRegion=c(-up, down)) +} + +# Convert from 1-based to 0-based format +res <- as.GRanges(peakAnno) +metacols <- mcols(res) +if (format == "interval") { + metacols <- apply(as.data.frame(metacols), 1, function(col) paste(col, collapse="|")) + resout <- data.frame(Chrom=seqnames(res), + Start=start(res) - 1, + End=end(res), + Comment=metacols) +} else { + resout <- data.frame(Chrom=seqnames(res), + Start=start(res) - 1, + End=end(res), + metacols) +} + +write.table(resout, file="out.tab", sep="\t", row.names=FALSE, quote=FALSE) + +if (!is.null(plots)) { + pdf("out.pdf", width=14) + plotAnnoPie(peakAnno) + plotAnnoBar(peakAnno) + vennpie(peakAnno) + upsetplot(peakAnno) + plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") + dev.off() +} \ No newline at end of file
--- a/chipseeker.xml Thu May 24 18:25:40 2018 -0400 +++ b/chipseeker.xml Tue May 29 15:08:04 2018 -0400 @@ -1,89 +1,75 @@ -<tool id="chipseeker" name="ChIPseeker" version="1.14.2"> +<tool id="chipseeker" name="ChIPseeker" version="1.14.2.1"> <description>for ChIP peak annotation and visualization</description> <requirements> <requirement type="package" version="1.14.2">bioconductor-chipseeker</requirement> - <requirement type="package" version="3.4.0">bioconductor-txdb.hsapiens.ucsc.hg38.knowngene</requirement> - <requirement type="package" version="3.2.2">bioconductor-txdb.hsapiens.ucsc.hg19.knowngene</requirement> - <requirement type="package" version="3.4.0">bioconductor-txdb.Mmusculus.UCSC.mm10.knownGene</requirement> - <requirement type="package" version="3.5.0">bioconductor-org.hs.eg.db</requirement> - <requirement type="package" version="3.5.0">bioconductor-org.mm.eg.db</requirement> + <requirement type="package" version="1.4.4">r-optparse</requirement> </requirements> <version_command><![CDATA[ -echo $(R --version | grep version | grep -v GNU)", ChIPseeker version" $(R --vanilla --slave -e "library(ChIPseeker); cat(sessionInfo()\$otherPkgs\$ChIPseeker\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Hsapiens.UCSC.hg38.knownGene version" $(R --vanilla --slave -e "library(TxDb.Hsapiens.UCSC.hg38.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Hsapiens.UCSC.hg38.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Hsapiens.UCSC.hg19.knownGene version" $(R --vanilla --slave -e "library(TxDb.Hsapiens.UCSC.hg19.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Hsapiens.UCSC.hg19.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", TxDb.Mmusculus.UCSC.mm10.knownGene version" $(R --vanilla --slave -e "library(TxDb.Mmusculus.UCSC.mm10.knownGene); cat(sessionInfo()\$otherPkgs\$TxDb.Mmusculus.UCSC.mm10.knownGene\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Hs.eg.db version" $(R --vanilla --slave -e "library(org.Hs.eg.db); cat(sessionInfo()\$otherPkgs\$org.Hs.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", org.Mm.eg.db version" $(R --vanilla --slave -e "library(org.Mm.eg.db); cat(sessionInfo()\$otherPkgs\$org.Mm.eg.db\$Version)" 2> /dev/null | grep -v -i "WARNING: ") +echo $(R --version | grep version | grep -v GNU)", ChIPseeker version" $(R --vanilla --slave -e "library(ChIPseeker); cat(sessionInfo()\$otherPkgs\$ChIPseeker\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> <command detect_errors="exit_code"><![CDATA[ + #set gtf = "refgtf" + #if $gtf_source.gtf_source_select == "history": + ln -s '${gtf_source.gtf_hist}' $gtf && + #else if $gtf_source.gtf_source_select == "cached": + ln -s '${gtf_source.gtf_builtin.fields.path}' $gtf && + #end if + #if $rscript: - cp '${chipseeker_script}' '${out_rscript}' && + cp '$__tool_directory__/chipseeker.R' '$out_rscript' && #end if - Rscript '${chipseeker_script}' + + Rscript '$__tool_directory__/chipseeker.R' + + -i '$peaks' + -G '$gtf' + -u $upstream + -d $downstream + #if $flankgeneinfo: + -F $flankgeneinfo + -D $flankgenedist + #end if + -f $format + -p $pdf ]]> </command> - <configfiles> - <configfile name="chipseeker_script"><![CDATA[ -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) - -# we need that to not crash galaxy with an UTF8 error on German LC settings. -loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - -suppressPackageStartupMessages(library(ChIPseeker)) - -genome <- "${genome}" - -if (genome == "hg38") { - suppressPackageStartupMessages({ - library(TxDb.Hsapiens.UCSC.hg38.knownGene) - library(org.Hs.eg.db) - }) - txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene - annodb <- "org.Hs.eg.db" -} else if (genome == "hg19") { - suppressPackageStartupMessages({ - library(TxDb.Hsapiens.UCSC.hg19.knownGene) - library(org.Hs.eg.db) - }) - txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene - annodb <- "org.Hs.eg.db" -} else if (genome == "mm10") { - suppressPackageStartupMessages({ - library(TxDb.Mmusculus.UCSC.mm10.knownGene) - library(org.Mm.eg.db) - }) - txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene - annodb <- "org.Mm.eg.db" -} else { - cat(paste("Genome not supported", genome)) -} - -peaks <- readPeakFile('$peaks_file') -peakAnno <- annotatePeak(peaks, TxDb=txdb, annoDb=annodb) -write.table(peakAnno, file='$out_tab', sep="\t", row.names=FALSE, quote=FALSE) - -if (!is.null("${pdf}")) { - pdf("out.pdf", width=14) - plotAnnoPie(peakAnno) - plotAnnoBar(peakAnno) - vennpie(peakAnno) - upsetplot(peakAnno) - plotDistToTSS(peakAnno, title="Distribution of transcription factor-binding loci\nrelative to TSS") - dev.off() -} - ]]></configfile> - </configfiles> - <inputs> - <param name="peaks_file" type="data" format="bed" label="Peaks file" help="A peaks file in BED format." /> - <param name="genome" type="select" label="Genome" help="Select the genome. Options are hg38, hg19 or mm10."> - <option value="hg38">hg38</option> - <option value="hg19">hg19</option> - <option value="mm10">mm10</option> + <param name="peaks" type="data" format="bed,interval" label="Peaks file" help="A peaks file in BED format." /> + <conditional name="gtf_source"> + <param name="gtf_source_select" type="select" label="Annotation source" help="Select a GTF to use for annotation source."> + <option value="cached" selected="true">Use a built-in GTF</option> + <option value="history">Use a GTF from history</option> + </param> + <when value="cached"> + <param name="gtf_builtin" type="select" label="Select a built-in GTF" help="If the GTF file for your transcriptome of interest is not listed, contact your Galaxy administrator"> + <options from_data_table="gene_sets"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No GTF file is available." /> + </options> + </param> + </when> + <when value="history"> + <param name="gtf_hist" type="data" format="gtf" label="Select a history GTF" /> + </when> + </conditional> + <param name="upstream" type="integer" min="0" value="3000" label="TSS upstream region" help="User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb." /> + <param name="downstream" type="integer" min="0" value="3000" label="TSS downstream region" help="User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb."/> + <param name="flankgeneinfo" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Add flanking gene information?" help="If specified all genes within the flanking gene distance are reported for each peak. Default: No."/> + <param name="flankgenedist" type="integer" min="0" value="5000" label="Flanking gene distance" help="If flanking gene info is turned on the flanking distance can be specified. Default: 5000."/> + <param name="format" type="select" label="Output Format"> + <option value="interval" selected="True">Interval</option> + <option value="tabular">Tabular (tab-separated)</option> </param> - - <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="True" label="Output a PDF file of plots?" help="Default: Yes" /> + <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="True" label="Output PDF of plots?" help="Default: Yes" /> <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used to annotate the IDs will be provided as a text file in the output. Default: No" /> </inputs> <outputs> - <data name="out_tab" format="tabular" from_work_dir="out.tab" label="${tool.name} on ${on_string}: Annotated Peaks" /> + <data name="out_tab" format="interval" from_work_dir="out.tab" label="${tool.name} on ${on_string}: Annotated Peaks" > + <change_format> + <when input="format" value="tabular" format="tabular" /> + </change_format> + </data> <data name="out_plots" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots"> <filter>pdf</filter> </data> @@ -93,12 +79,13 @@ </outputs> <tests> - <!-- Ensure outputs work --> + <!-- Ensure bed and GTF inputs and all outputs work --> <test expect_num_outputs="3"> - <param name="peaks_file" value="in.diffbind" ftype="bed"/> - <param name="genome" value="hg19"/> + <param name="peaks" value="in.bed" ftype="bed"/> + <param name="gtf_source_select" value="history"/> + <param name="gtf_hist" value="in.gtf"/> <param name="rscript" value="True"/> - <output name="out_tab" file="out.tab" /> + <output name="out_tab" ftype="interval" file="out.int" /> <output name="out_plots" file="out.pdf" compare="sim_size"/> <output name="out_rscript" > <assert_contents> @@ -106,6 +93,43 @@ </assert_contents> </output> </test> + <!-- Ensure built-in GTF works --> + <test expect_num_outputs="2"> + <param name="peaks" value="in.interval" ftype="interval"/> + <param name="gtf_source_select" value="cached"/> + <output name="out_tab" ftype="interval" file="outint.int" /> + <output name="out_plots" file="out.pdf" compare="sim_size"/> + </test> + <!-- Ensure tabular output works --> + <test expect_num_outputs="2"> + <param name="peaks" value="in.interval" ftype="interval"/> + <param name="gtf_source_select" value="history"/> + <param name="gtf_hist" value="in.gtf"/> + <param name="format" value="tabular"/> + <output name="out_tab" ftype="tabular" file="outint.tab" /> + <output name="out_plots" file="out.pdf" compare="sim_size"/> + </test> + <!-- Ensure TSS region specification works --> + <test expect_num_outputs="2"> + <param name="peaks" value="in.interval" ftype="interval"/> + <param name="gtf_source_select" value="history"/> + <param name="gtf_hist" value="in.gtf"/> + <param name="upstream" value="1000" /> + <param name="downstream" value="1000" /> + <param name="format" value="tabular"/> + <output name="out_tab" ftype="tabular" file="outtss.tab" /> + <output name="out_plots" file="out.pdf" compare="sim_size"/> + </test> + <!-- Ensure flanking genes works --> + <test expect_num_outputs="2"> + <param name="peaks" value="in.interval" ftype="interval"/> + <param name="gtf_source_select" value="history"/> + <param name="gtf_hist" value="in.gtf"/> + <param name="flankgeneinfo" value="True" /> + <param name="format" value="tabular"/> + <output name="out_tab" ftype="tabular" file="outflank.tab" /> + <output name="out_plots" file="out.pdf" compare="sim_size"/> + </test> </tests> <help><![CDATA[ @@ -120,7 +144,21 @@ **Inputs** -A peaks file in BED format e.g from MACS2 or DiffBind. +A peaks file in BED or Interval format e.g from MACS2 or DiffBind. + +Example: + + ===== ====== ====== ======== ===== ====== + Chrom Start End Name Score Strand + ===== ====== ====== ======== ===== ====== + 18 394599 396513 DiffBind 0 . + 18 111566 112005 DiffBind 0 . + 18 346463 347342 DiffBind 0 . + 18 399013 400382 DiffBind 0 . + 18 371109 372102 DiffBind 0 . + ===== ====== ====== ======== ===== ====== + +A GTF file for annotation. ----- @@ -128,10 +166,56 @@ This tool outputs - * a table of annotated peaks + * a file of annotated peaks in Interval or Tabular format * a PDF of plots * the R script used by this tool +**Annotated peaks** + +Annotation similar to below will be added to the input file. + +Example - **Interval format**: + + ===== ====== ====== ===================================================================================================================================================== + Chrom Start End Comment + ===== ====== ====== ===================================================================================================================================================== + 18 394599 396513 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 3869 + 18 111566 112005 DiffBind|0|.|Promoter (<=1kb)|1|111568|112005| 438|1|ENSG00000263006|ENST00000608049| 0 + 18 346463 347342 DiffBind|0|.|Exon (ENST00000400256/ENSG00000158270, exon 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|53040 + 18 399013 400382 DiffBind|0|.|Promoter (<=1kb)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 0 + 18 371109 372102 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|28280 + ===== ====== ====== ===================================================================================================================================================== + + Columns contain the following data: + +* **Chrom**: Chromosome name +* **Start**: Start position of site +* **End**: End position of site +* **Comment**: The pipe ("|") separated values in this column correspond to: + + * *<Any additional input columns>* + * *annotation* (Promoter, 5’ UTR, 3’ UTR, Exon, Intron, Downstream, Intergenic) + * *geneChr* + * *geneStart* + * *geneEnd* + * *geneLength* + * *geneStrand* + * *geneId* + * *transcriptId* + * *distanceToTSS* + +Example - **Tabular format**: + + ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== ============= + Chrom Start End Name Score Strand Comment annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS + ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== ============= + 18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 3869 + 18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 Promoter (<=1kb) 1 111568 112005 438 1 ENSG00000263006 ENST00000608049 0 + 18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 Exon (ENST00000400256/ENSG00000158270, exon 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 53040 + 18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 Promoter (<=1kb) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 0 + 18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 28280 + ===== ====== ====== ======== ====== ====== =========================================== ======================================================= ======= ========= ======= ========== ========== =============== =============== ============= + .. _ChIPseeker: https://bioconductor.org/packages/release/bioc/html/ChIPseeker.html .. _`ChIPseeker vignette`: http://bioconductor.org/packages/release/bioc/vignettes/ChIPseeker/inst/doc/ChIPseeker.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/gene_sets.loc Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,1 @@ +hg38 hg38 hg38GTF ${__HERE__}/ref.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cached_locally/ref.gtf Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,17 @@ +18 pseudogene gene 111568 112005 . + . gene_id "ENSG00000263006"; gene_name "ROCK1P1"; gene_source "havana"; gene_biotype "pseudogene"; +18 processed_transcript transcript 111568 112005 . + . gene_id "ENSG00000263006"; transcript_id "ENST00000608049"; gene_name "ROCK1P1"; gene_source "havana"; gene_biotype "pseudogene"; transcript_name "ROCK1P1-003"; transcript_source "havana"; +18 protein_coding gene 346465 347342 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 371111 372102 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 394601 396513 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 399015 400382 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding transcript 346465 347342 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 371111 372102 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 394601 396513 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 399015 400382 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding exon 346465 347341 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; exon_id "ENSE00003544566"; +18 protein_coding CDS 346465 347341 . - 2 gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; protein_id "ENSP00000383115"; +18 retained_intron transcript 346465 347342 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 371111 372102 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 394601 396513 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 399015 400382 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron exon 346465 347341 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; exon_id "ENSE00003660294";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.bed Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Name Score Strand +18 394599 396513 DiffBind 0 . +18 111566 112005 DiffBind 0 . +18 346463 347342 DiffBind 0 . +18 399013 400382 DiffBind 0 . +18 371109 372102 DiffBind 0 .
--- a/test-data/in.diffbind Thu May 24 18:25:40 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value FDR -chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 -chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 -chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 -chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 -chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.gtf Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,17 @@ +18 pseudogene gene 111568 112005 . + . gene_id "ENSG00000263006"; gene_name "ROCK1P1"; gene_source "havana"; gene_biotype "pseudogene"; +18 processed_transcript transcript 111568 112005 . + . gene_id "ENSG00000263006"; transcript_id "ENST00000608049"; gene_name "ROCK1P1"; gene_source "havana"; gene_biotype "pseudogene"; transcript_name "ROCK1P1-003"; transcript_source "havana"; +18 protein_coding gene 346465 347342 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 371111 372102 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 394601 396513 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding gene 399015 400382 . - . gene_id "ENSG00000158270"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; +18 protein_coding transcript 346465 347342 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 371111 372102 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 394601 396513 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding transcript 399015 400382 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; +18 protein_coding exon 346465 347341 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; exon_id "ENSE00003544566"; +18 protein_coding CDS 346465 347341 . - 2 gene_id "ENSG00000158270"; transcript_id "ENST00000400256"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-001"; transcript_source "ensembl_havana"; tag "CCDS"; ccds_id "CCDS32782"; protein_id "ENSP00000383115"; +18 retained_intron transcript 346465 347342 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 371111 372102 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 394601 396513 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron transcript 399015 400382 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; +18 retained_intron exon 346465 347341 . - . gene_id "ENSG00000158270"; transcript_id "ENST00000582147"; exon_number "5"; gene_name "COLEC12"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "COLEC12-002"; transcript_source "havana"; exon_id "ENSE00003660294";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/in.interval Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Name Score Strand Comment +18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 +18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 +18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 +18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 +18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out.int Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Comment +18 394599 396513 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 3869 +18 111566 112005 DiffBind|0|.|Promoter (<=1kb)|1|111568|112005| 438|1|ENSG00000263006|ENST00000608049| 0 +18 346463 347342 DiffBind|0|.|Exon (ENST00000400256/ENSG00000158270, exon 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|53040 +18 399013 400382 DiffBind|0|.|Promoter (<=1kb)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 0 +18 371109 372102 DiffBind|0|.|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|28280
--- a/test-data/out.tab Thu May 24 18:25:40 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -seqnames start end width strand width.1 strand.1 Conc Conc_Responsive Conc_Resistant Fold p.value FDR annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS ENSEMBL SYMBOL GENENAME -chr18 394601 396513 1913 * 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 Intron (uc002kkm.3/81035, intron 2 of 9) 18 319355 500729 181375 2 81035 uc002kkm.3 104216 ENSG00000158270 COLEC12 collectin subfamily member 12 -chr18 111568 112005 438 * 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 Promoter (2-3kb) 18 109065 122222 13158 1 727758 uc002kke.3 2503 ENSG00000263006 ROCK1P1 Rho associated coiled-coil containing protein kinase 1 pseudogene 1 -chr18 346465 347342 878 * 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 Exon (uc002kkm.3/81035, exon 5 of 10) 18 225089 268059 42971 2 9984 uc002kkl.2 -78406 ENSG00000079134 THOC1 THO complex 1 -chr18 399015 400382 1368 * 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 Intron (uc002kkm.3/81035, intron 2 of 9) 18 319355 500729 181375 2 81035 uc002kkm.3 100347 ENSG00000158270 COLEC12 collectin subfamily member 12 -chr18 371111 372102 992 * 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 Intron (uc002kkm.3/81035, intron 2 of 9) 18 225089 268059 42971 2 9984 uc002kkl.2 -103052 ENSG00000079134 THOC1 THO complex 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outflank.tab Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Name Score Strand Comment annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS flank_txIds flank_geneIds flank_gene_distances +18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 3869 ENST00000400256;ENST00000582147 ENSG00000158270;ENSG00000158270 0;0 +18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 Promoter (<=1kb) 1 111568 112005 438 1 ENSG00000263006 ENST00000608049 0 ENST00000608049 ENSG00000263006 0 +18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 Exon (ENST00000400256/ENSG00000158270, exon 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 53040 ENST00000400256;ENST00000582147 ENSG00000158270;ENSG00000158270 0;0 +18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 Promoter (<=1kb) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 0 ENST00000400256;ENST00000582147 ENSG00000158270;ENSG00000158270 0;0 +18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 28280 ENST00000400256;ENST00000582147 ENSG00000158270;ENSG00000158270 0;0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outint.int Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Comment +18 394599 396513 DiffBind|0|.|1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 3869 +18 111566 112005 DiffBind|0|.|439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06|Promoter (<=1kb)|1|111568|112005| 438|1|ENSG00000263006|ENST00000608049| 0 +18 346463 347342 DiffBind|0|.|879|5|5.77|3.24|2.52|6.51e-06|0.00303|Exon (ENST00000400256/ENSG00000158270, exon 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|53040 +18 399013 400382 DiffBind|0|.|1369|7.62|7|8.05|-1.04|1.04e-05|0.00364|Promoter (<=1kb)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256| 0 +18 371109 372102 DiffBind|0|.|993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226|Intron (ENST00000400256/ENSG00000158270, intron 1 of 1)|1|346465|400382|53918|2|ENSG00000158270|ENST00000400256|28280
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outint.tab Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Name Score Strand Comment annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS +18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 3869 +18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 Promoter (<=1kb) 1 111568 112005 438 1 ENSG00000263006 ENST00000608049 0 +18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 Exon (ENST00000400256/ENSG00000158270, exon 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 53040 +18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 Promoter (<=1kb) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 0 +18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 28280
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/outtss.tab Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +Chrom Start End Name Score Strand Comment annotation geneChr geneStart geneEnd geneLength geneStrand geneId transcriptId distanceToTSS +18 394599 396513 DiffBind 0 . 1914|7.15|5.55|7.89|-2.35|7.06e-24|9.84e-21 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 3869 +18 111566 112005 DiffBind 0 . 439|5.71|6.53|3.63|2.89|1.27e-08|8.88e-06 Promoter 1 111568 112005 438 1 ENSG00000263006 ENST00000608049 0 +18 346463 347342 DiffBind 0 . 879|5|5.77|3.24|2.52|6.51e-06|0.00303 Exon (ENST00000400256/ENSG00000158270, exon 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 53040 +18 399013 400382 DiffBind 0 . 1369|7.62|7|8.05|-1.04|1.04e-05|0.00364 Promoter 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 0 +18 371109 372102 DiffBind 0 . 993|4.63|3.07|5.36|-2.3|8.1e-05|0.0226 Intron (ENST00000400256/ENSG00000158270, intron 1 of 1) 1 346465 400382 53918 2 ENSG00000158270 ENST00000400256 28280
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/gene_sets.loc.sample Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,15 @@ +# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format. +# +# The gene_sets.loc file syntax is: +#<unique_build_id> <dbkey> <display_name> <path> +# +# Please ensure that the above fields are tab separated. +# +# In case you have TWO or MORE providers PER dbkey, the one mentioned +# first in the file, should have the "default" priority. +# +#Example: +# +#Homo_sapiens.GRCh38.90 hg38 GRCh38 (hg38) annotation from Ensembl, release 90 /depot/data2/galaxy/hg38/gene_sets/Homo_sapiens.GRCh38.90.gtf +#Homo_sapiens.GRCh37.87 hg19 GRCh37 (hg19) annotation from Ensembl, release 87 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.87.gtf +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all gtf files with annotations of genome builds --> + <table name="gene_sets" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/gene_sets.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue May 29 15:08:04 2018 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="gene_sets" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/cached_locally/gene_sets.loc" /> + </table> +</tables>