Mercurial > repos > iuc > featurecounts

--- a/featurecounts.xml	Sun Jan 14 09:23:49 2018 -0500
+++ b/featurecounts.xml	Fri Mar 16 14:05:55 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="featurecounts" name="featureCounts" version="1.6.0.2" profile="16.04">
+<tool id="featurecounts" name="featureCounts" version="1.6.0.3" profile="16.04">
     <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description>
     <requirements>
         <requirement type="package" version="1.6.0">subread</requirement>
@@ -6,21 +6,22 @@

     <version_command>featureCounts -v 2&gt;&amp;1 | grep .</version_command>
     <command detect_errors="exit_code"><![CDATA[
+
         ## Export fc path for its built-in annotation
+
         export FC_PATH=\$(command -v featureCounts | sed 's@/bin/featureCounts$@@') &&

         ## Check whether all alignments are from the same type (bam || sam)
         featureCounts

-            #if $anno.anno_select=="gtf":
-                #if $anno.gtf_source.ref_source=="history":
-                    -a '$anno.gtf_source.reference_gene_sets'
-                #else:
-                    -a '$anno.gtf_source.reference_gene_sets_builtin.fields.path'
-                #end if
+            #if $anno.anno_select=="history":
+                -a '$anno.reference_gene_sets'
+                -F "GTF"
+            #elif $anno.anno_select=="cached":
+                -a '$anno.reference_gene_sets_builtin.fields.path'
                 -F "GTF"
             #elif $anno.anno_select=="builtin":
-                -a \${FC_PATH}/annotation/${anno.genome}_RefSeq_exon.txt
+                -a \${FC_PATH}/annotation/${anno.bgenome}_RefSeq_exon.txt
                 -F "SAF"
             #end if

@@ -131,42 +132,42 @@
                multiple="false"
                format="bam,sam"
                label="Alignment file"
-               help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" />
+               help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format. These files must have the database/genome attribute already specified e.g. hg38, not the default: ?" >
+               <validator type="unspecified_build"/>
+        </param>
+
         <conditional name="anno">
             <param name="anno_select" type="select" label="Gene annotation file">
                 <option value="builtin">featureCounts built-in</option>
-                <option value="gtf">GTF file</option>
+                <option value="cached" selected="True">locally cached</option>
+                <option value="history">in your history</option>
             </param>
             <when value="builtin">
-                <param name="genome" type="select" label="Select built-in genome" help="Built-in gene annotations for genomes hg38, hg19, mm10 and mm9 are included in featureCounts">
-                    <option value="hg38">hg38</option>
-                    <option value="hg19">hg19</option>
-                    <option value="mm10">mm10</option>
-                    <option value="mm9">mm9</option>
+                <param name="bgenome" type="select" label="Select built-in genome" help="Built-in gene annotations for genomes hg38, hg19, mm10 and mm9 are included in featureCounts">
+                    <options from_data_table="featurecounts_anno">
+                        <filter type="data_meta" key="dbkey" ref="alignment" column="0"/>
+                    </options>
                 </param>
             </when>
-            <when value="gtf">
-                <conditional name="gtf_source">
-                    <param name="ref_source" type="select" label="Gene annotation file">
-                        <option value="cached">locally cached</option>
-                        <option value="history">in your history</option>
-                    </param>
-                    <when value="cached">
-                        <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator">
-                            <options from_data_table="gene_sets">
-                                <filter type="sort_by" column="1" />
-                                <validator type="no_options" message="No annotations are available." />
-                            </options>
-                        </param>
-                    </when>
-                    <when value="history">
-                        <param name="reference_gene_sets"
-                               format="gff,gtf,gff3"
-                               type="data"
-                               label="Gene annotation file"
-                               help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" />
-                    </when>
-                </conditional>
+            <when value="cached">
+                <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator">
+                    <options from_data_table="gene_sets">
+                        <filter type="data_meta" key="dbkey" ref="alignment" column="0"/>
+                    </options>
+                    <validator type="no_options" message="An annotation file is not available for the build associated with the selected input file"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="reference_gene_sets"
+                       format="gff,gtf,gff3"
+                       type="data"
+                       label="Gene annotation file"
+                       help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment">
+                    <options>
+                        <filter type="data_meta" key="dbkey" ref="alignment"/>
+                    </options>
+                    <validator type="no_options" message="The current history does not include a dataset with the build associated with the selected input file"/>
+                </param>
             </when>
         </conditional>

@@ -270,7 +271,7 @@
                 label="On feature level"
                 help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." />

-            <param name ="contribute_to_multiple_features"
+            <param name="contribute_to_multiple_features"
                 type="boolean"
                 truevalue=" -O"
                 falsevalue=""
@@ -475,12 +476,11 @@
     </outputs>
     <tests>
         <test expect_num_outputs="4">
-            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
-            <param name="anno_select" value="gtf"/>
-            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
+            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" />
+            <param name="anno_select" value="history"/>
+            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" />
             <param name="format" value="tabdel_short_noheader" />
             <param name="include_feature_length_file" value="true"/>
-            <param name="ref_source" value="history" />
             <param name="count_exon_exon_junction_reads" value="-J"/>
             <output name="output_short" file="output_1_short.tab">
                 <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/>
@@ -493,12 +493,11 @@
             </output>
         </test>
         <test expect_num_outputs="3">
-            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
-            <param name="anno_select" value="gtf"/>
-            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
+            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" />
+            <param name="anno_select" value="history"/>
+            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" />
             <param name="format" value="tabdel_medium" />
             <param name="include_feature_length_file" value="true"/>
-            <param name="ref_source" value="history" />
             <output name="output_medium" file="output_1_medium.tab">
                 <metadata name="column_names" value="Geneid,featureCounts_input1.bam,Length"/>
             </output>
@@ -507,12 +506,11 @@
             </output>
         </test>
         <test expect_num_outputs="3">
-            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
-            <param name="anno_select" value="gtf"/>
-            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
+            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" />
+            <param name="anno_select" value="history"/>
+            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" />
             <param name="format" value="tabdel_full" />
             <param name="include_feature_length_file" value="true"/>
-            <param name="ref_source" value="history" />
             <output name="output_full" file="output_1_full.tab">
                 <metadata name="column_names" value="Geneid,Chr,Start,End,Strand,Length,featureCounts_input1.bam"/>
             </output>
@@ -524,12 +522,11 @@
             </output>
         </test>
         <test expect_num_outputs="4">
-            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" />
-            <param name="anno_select" value="gtf"/>
-            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" />
+            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" />
+            <param name="anno_select" value="history"/>
+            <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" dbkey="hg38" />
             <param name="format" value="tabdel_short" />
             <param name="include_feature_length_file" value="true"/>
-            <param name="ref_source" value="history" />
             <param name="count_exon_exon_junction_reads" value="-J"/>
             <output name="output_short" file="output_1_short_with_header.tab">
                 <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/>
@@ -541,17 +538,29 @@
                 <metadata name="column_names" value="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,featureCounts_input1.bam"/>
             </output>
         </test>
-        <!-- Ensure built-in annotation works -->
+        <!-- Ensure featureCounts built-in annotation works -->
         <test expect_num_outputs="2">
-            <param name="alignment" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" ftype="bam" />
+            <param name="alignment" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" ftype="bam" dbkey="hg19" />
             <param name="anno_select" value="builtin"/>
             <param name="format" value="tabdel_short" />
-            <param name="genome" value="hg19" />
             <output name="output_short" file="output_builtin_hg19.tab">
                 <metadata name="column_names" value="Geneid,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/>
             </output>
             <output name="output_summary" file="output_summary_builtin_hg19.tab"/>
         </test>
+        <!-- Ensure cached GTFs work -->
+        <test expect_num_outputs="3">
+            <param name="alignment" value="featureCounts_input1.bam" ftype="bam" dbkey="hg38" />
+            <param name="anno_select" value="cached"/>
+            <param name="format" value="tabdel_medium" />
+            <param name="include_feature_length_file" value="true"/>
+            <output name="output_medium" file="output_1_medium.tab">
+                <metadata name="column_names" value="Geneid,featureCounts_input1.bam,Length"/>
+            </output>
+            <output name="output_summary" file="output_1_summary.tab">
+                <metadata name="column_names" value="Status,featureCounts_input1.bam"/>
+            </output>
+        </test>
     </tests>

     <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_sets.loc	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,1 @@
+hg38	hg38	hg38GTF	${__HERE__}/ref.gtf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_sets.loc.sample	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,14 @@
+# This is a sample file distributed with featureCounts that enables it and other# tools to use gene/exon annotations in the GFF/GTF format.
+#
+# The gene_sets.loc file syntax is:
+#<unique_build_id>	<dbkey>	<display_name>	<path>
+#
+# Please ensure that the above fields are tab separated.
+#
+# In case you have TWO or MORE providers PER dbkey, the one mentioned
+# first in the file, should have the "default" priority.
+#
+#Example:
+#
+#Homo_sapiens.GRCh37.74	hg19	GRCh37 (hg19) annotation from Ensembl, release 74	/depot/data2/galaxy/hg19/gene_sets/Homo_sapiens.GRCh37.74.gtf
+#Homo_sapiens.NCBI36.54	hg18	hg18 annotation from Ensembl, release 54	/depot/data2/galaxy/hg18/gene_sets/Homo_sapiens.NCBI36.54.gtf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/featurecounts_anno.loc	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,11 @@
+# This is a file distributed with featureCounts that enables use of featureCounts built-in RefSeq annotation.
+#
+# This .loc file syntax is 2 tab-separated columns:
+
+# <unique_build_id>	<dbkey>
+#
+
+hg38	hg38
+hg19	hg19
+mm10	mm10
+mm9	mm9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_sets.loc	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,1 @@
+hg38	hg38	hg38GTF	${__HERE__}/featureCounts_guide.gff
--- a/test-data/output_1_jcounts_with_header.tab	Sun Jan 14 09:23:49 2018 -0500
+++ b/test-data/output_1_jcounts_with_header.tab	Fri Mar 16 14:05:55 2018 -0400
@@ -1,1 +1,1 @@
-PrimaryGene	SecondaryGenes	Site1_chr	Site1_location	Site1_strand	Site2_chr	Site2_location	Site2_strand	featureCounts_input1.bam
\ No newline at end of file
+PrimaryGene	SecondaryGenes	Site1_chr	Site1_location	Site1_strand	Site2_chr	Site2_location	Site2_strand	featureCounts_input1.bam
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/featurecounts_anno.loc.sample	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,11 @@
+# This is a file distributed with featureCounts that enables use of featureCounts built-in RefSeq annotation.
+#
+# This .loc file syntax is 2 tab-separated columns:
+#
+# <unique_build_id>	<dbkey>
+#
+
+hg38	hg38
+hg19	hg19
+mm10	mm10
+mm9	mm9
--- a/tool_data_table_conf.xml.sample	Sun Jan 14 09:23:49 2018 -0500
+++ b/tool_data_table_conf.xml.sample	Fri Mar 16 14:05:55 2018 -0400
@@ -4,4 +4,9 @@
         <columns>value, dbkey, name, path</columns>
         <file path="tool-data/gene_sets.loc" />
     </table>
+    <!-- Location of featureCounts built-in annotation keys-->
+    <table name="featurecounts_anno" comment_char="#">
+        <columns>value, dbkey</columns>
+        <file path="tool-data/featurecounts_anno.loc" />
+    </table>
 </tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Mar 16 14:05:55 2018 -0400
@@ -0,0 +1,10 @@
+<tables>
+    <table name="gene_sets" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/gene_sets.loc" />
+    </table>
+    <table name="featurecounts_anno" comment_char="#">
+        <columns>value,	dbkey</columns>
+        <file path="${__HERE__}/test-data/featurecounts_anno.loc" />
+    </table>
+</tables>