Mercurial > repos > iuc > featurecounts
changeset 11:e803ca6407c0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit 4c095ddb0f2c369d2bf3fc5ca386f6981a2fa0eb
author | iuc |
---|---|
date | Fri, 16 Mar 2018 14:05:55 -0400 |
parents | 46cccc52be5f |
children | b714f4620411 |
files | featurecounts.xml gene_sets.loc gene_sets.loc.sample test-data/featurecounts_anno.loc test-data/gene_sets.loc test-data/output_1_jcounts_with_header.tab tool-data/featurecounts_anno.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 9 files changed, 120 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/featurecounts.xml Sun Jan 14 09:23:49 2018 -0500 +++ b/featurecounts.xml Fri Mar 16 14:05:55 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="featurecounts" name="featureCounts" version="1.6.0.2" profile="16.04"> +<tool id="featurecounts" name="featureCounts" version="1.6.0.3" profile="16.04"> <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> <requirements> <requirement type="package" version="1.6.0">subread</requirement> @@ -6,21 +6,22 @@ <version_command>featureCounts -v 2>&1 | grep .</version_command> <command detect_errors="exit_code"><![CDATA[ + ## Export fc path for its built-in annotation + export FC_PATH=\$(command -v featureCounts | sed 's@/bin/featureCounts$@@') && ## Check whether all alignments are from the same type (bam || sam) featureCounts - #if $anno.anno_select=="gtf": - #if $anno.gtf_source.ref_source=="history": - -a '$anno.gtf_source.reference_gene_sets' - #else: - -a '$anno.gtf_source.reference_gene_sets_builtin.fields.path' - #end if + #if $anno.anno_select=="history": + -a '$anno.reference_gene_sets' + -F "GTF" + #elif $anno.anno_select=="cached": + -a '$anno.reference_gene_sets_builtin.fields.path' -F "GTF" #elif $anno.anno_select=="builtin": - -a \${FC_PATH}/annotation/${anno.genome}_RefSeq_exon.txt + -a \${FC_PATH}/annotation/${anno.bgenome}_RefSeq_exon.txt -F "SAF" #end if @@ -131,42 +132,42 @@ multiple="false" format="bam,sam" label="Alignment file" - help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> + help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format. These files must have the database/genome attribute already specified e.g. hg38, not the default: ?" > + <validator type="unspecified_build"/> + </param> + <conditional name="anno"> <param name="anno_select" type="select" label="Gene annotation file"> <option value="builtin">featureCounts built-in</option> - <option value="gtf">GTF file</option> + <option value="cached" selected="True">locally cached</option> + <option value="history">in your history</option> </param> <when value="builtin"> - <param name="genome" type="select" label="Select built-in genome" help="Built-in gene annotations for genomes hg38, hg19, mm10 and mm9 are included in featureCounts"> - <option value="hg38">hg38</option> - <option value="hg19">hg19</option> - <option value="mm10">mm10</option> - <option value="mm9">mm9</option> + <param name="bgenome" type="select" label="Select built-in genome" help="Built-in gene annotations for genomes hg38, hg19, mm10 and mm9 are included in featureCounts"> + <options from_data_table="featurecounts_anno"> + <filter type="data_meta" key="dbkey" ref="alignment" column="0"/> + </options> </param> </when> - <when value="gtf"> - <conditional name="gtf_source"> - <param name="ref_source" type="select" label="Gene annotation file"> - <option value="cached">locally cached</option> - <option value="history">in your history</option> - </param> - <when value="cached"> - <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> - <options from_data_table="gene_sets"> - <filter type="sort_by" column="1" /> - <validator type="no_options" message="No annotations are available." /> - </options> - </param> - </when> - <when value="history"> - <param name="reference_gene_sets" - format="gff,gtf,gff3" - type="data" - label="Gene annotation file" - help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> - </when> - </conditional> + <when value="cached"> + <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> + <options from_data_table="gene_sets"> + <filter type="data_meta" key="dbkey" ref="alignment" column="0"/> + </options> + <validator type="no_options" message="An annotation file is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="reference_gene_sets" + format="gff,gtf,gff3" + type="data" + label="Gene annotation file" + help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment"> + <options> + <filter type="data_meta" key="dbkey" ref="alignment"/> + </options> + <validator type="no_options" message="The current history does not include a dataset with the build associated with the selected input file"/> + </param> </when> </conditional> @@ -270,7 +271,7 @@ label="On feature level" help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> - <param name ="contribute_to_multiple_features" + <param name="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue="" @@ -475,12 +476,11 @@ </outputs> <tests> <test expect_num_outputs="4"> - <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> - <param name="anno_select" value="gtf"/> - <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" /> + <param name="anno_select" value="history"/> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" /> <param name="format" value="tabdel_short_noheader" /> <param name="include_feature_length_file" value="true"/> - <param name="ref_source" value="history" /> <param name="count_exon_exon_junction_reads" value="-J"/> <output name="output_short" file="output_1_short.tab"> <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/> @@ -493,12 +493,11 @@ </output> </test> <test expect_num_outputs="3"> - <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> - <param name="anno_select" value="gtf"/> - <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" /> + <param name="anno_select" value="history"/> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" /> <param name="format" value="tabdel_medium" /> <param name="include_feature_length_file" value="true"/> - <param name="ref_source" value="history" /> <output name="output_medium" file="output_1_medium.tab"> <metadata name="column_names" value="Geneid,featureCounts_input1.bam,Length"/> </output> @@ -507,12 +506,11 @@ </output> </test> <test expect_num_outputs="3"> - <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> - <param name="anno_select" value="gtf"/> - <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" /> + <param name="anno_select" value="history"/> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" /> <param name="format" value="tabdel_full" /> <param name="include_feature_length_file" value="true"/> - <param name="ref_source" value="history" /> <output name="output_full" file="output_1_full.tab"> <metadata name="column_names" value="Geneid,Chr,Start,End,Strand,Length,featureCounts_input1.bam"/> </output> @@ -524,12 +522,11 @@ </output> </test> <test expect_num_outputs="4"> - <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> - <param name="anno_select" value="gtf"/> - <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" /> + <param name="anno_select" value="history"/> + <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" /> <param name="format" value="tabdel_short" /> <param name="include_feature_length_file" value="true"/> - <param name="ref_source" value="history" /> <param name="count_exon_exon_junction_reads" value="-J"/> <output name="output_short" file="output_1_short_with_header.tab"> <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/> @@ -541,17 +538,29 @@ <metadata name="column_names" value="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,featureCounts_input1.bam"/> </output> </test> - <!-- Ensure built-in annotation works --> + <!-- Ensure featureCounts built-in annotation works --> <test expect_num_outputs="2"> - <param name="alignment" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" ftype="bam" /> + <param name="alignment" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" ftype="bam" dbkey="hg19" /> <param name="anno_select" value="builtin"/> <param name="format" value="tabdel_short" /> - <param name="genome" value="hg19" /> <output name="output_short" file="output_builtin_hg19.tab"> <metadata name="column_names" value="Geneid,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> </output> <output name="output_summary" file="output_summary_builtin_hg19.tab"/> </test> + <!-- Ensure cached GTFs work --> + <test expect_num_outputs="3"> + <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" /> + <param name="anno_select" value="cached"/> + <param name="format" value="tabdel_medium" /> + <param name="include_feature_length_file" value="true"/> + <output name="output_medium" file="output_1_medium.tab"> + <metadata name="column_names" value="Geneid,featureCounts_input1.bam,Length"/> + </output> + <output name="output_summary" file="output_1_summary.tab"> + <metadata name="column_names" value="Status,featureCounts_input1.bam"/> + </output> + </test> </tests> <help><![CDATA[
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_sets.loc Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,1 @@ +hg38 hg38 hg38GTF ${__HERE__}/ref.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_sets.loc.sample Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,14 @@ +# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format. +# +# The gene_sets.loc file syntax is: +#<unique_build_id> <dbkey> <display_name> <path> +# +# Please ensure that the above fields are tab separated. +# +# In case you have TWO or MORE providers PER dbkey, the one mentioned +# first in the file, should have the "default" priority. +# +#Example: +# +#Homo_sapiens.GRCh37.74 hg19 GRCh37 (hg19) annotation from Ensembl, release 74 /depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.74.gtf +#Homo_sapiens.NCBI36.54 hg18 hg18 annotation from Ensembl, release 54 /depot/data2/galaxy/hg18/gene_sets/Homo_sapiens.NCBI36.54.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/featurecounts_anno.loc Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,11 @@ +# This is a file distributed with featureCounts that enables use of featureCounts built-in RefSeq annotation. +# +# This .loc file syntax is 2 tab-separated columns: + +# <unique_build_id> <dbkey> +# + +hg38 hg38 +hg19 hg19 +mm10 mm10 +mm9 mm9
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_sets.loc Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,1 @@ +hg38 hg38 hg38GTF ${__HERE__}/featureCounts_guide.gff
--- a/test-data/output_1_jcounts_with_header.tab Sun Jan 14 09:23:49 2018 -0500 +++ b/test-data/output_1_jcounts_with_header.tab Fri Mar 16 14:05:55 2018 -0400 @@ -1,1 +1,1 @@ -PrimaryGene SecondaryGenes Site1_chr Site1_location Site1_strand Site2_chr Site2_location Site2_strand featureCounts_input1.bam \ No newline at end of file +PrimaryGene SecondaryGenes Site1_chr Site1_location Site1_strand Site2_chr Site2_location Site2_strand featureCounts_input1.bam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/featurecounts_anno.loc.sample Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,11 @@ +# This is a file distributed with featureCounts that enables use of featureCounts built-in RefSeq annotation. +# +# This .loc file syntax is 2 tab-separated columns: +# +# <unique_build_id> <dbkey> +# + +hg38 hg38 +hg19 hg19 +mm10 mm10 +mm9 mm9
--- a/tool_data_table_conf.xml.sample Sun Jan 14 09:23:49 2018 -0500 +++ b/tool_data_table_conf.xml.sample Fri Mar 16 14:05:55 2018 -0400 @@ -4,4 +4,9 @@ <columns>value, dbkey, name, path</columns> <file path="tool-data/gene_sets.loc" /> </table> + <!-- Location of featureCounts built-in annotation keys--> + <table name="featurecounts_anno" comment_char="#"> + <columns>value, dbkey</columns> + <file path="tool-data/featurecounts_anno.loc" /> + </table> </tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Mar 16 14:05:55 2018 -0400 @@ -0,0 +1,10 @@ +<tables> + <table name="gene_sets" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/gene_sets.loc" /> + </table> + <table name="featurecounts_anno" comment_char="#"> + <columns>value, dbkey</columns> + <file path="${__HERE__}/test-data/featurecounts_anno.loc" /> + </table> +</tables>