Mercurial > repos > iuc > featurecounts
view featurecounts.xml @ 7:3ce1c701b0df draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/featurecounts commit a705dfd329f2e917d549215715385f5ef5001d17
author | iuc |
---|---|
date | Mon, 30 Oct 2017 14:49:13 -0400 |
parents | 9d60a36b5c6a |
children | 2a8bb8223a45 |
line wrap: on
line source
<tool id="featurecounts" name="featureCounts" version="1.5.3" profile="16.04"> <description>Measure gene expression in RNA-Seq experiments from SAM or BAM files.</description> <requirements> <requirement type="package" version="1.5.3">subread</requirement> </requirements> <version_command>featureCounts -v 2>&1 | grep .</version_command> <command><![CDATA[ ## Check whether all alignments are from the same type (bam || sam) featureCounts #if $gtf_source.ref_source=="history": -a '$gtf_source.reference_gene_sets' #else: -a '$gtf_source.reference_gene_sets_builtin.fields.path' #end if -o "output" -T \${GALAXY_SLOTS:-2} -t '$extended_parameters.gff_feature_type' -g '$extended_parameters.gff_feature_attribute' $extended_parameters.summarization_level $extended_parameters.contribute_to_multiple_features -s $extended_parameters.strand_specificity $extended_parameters.multimapping_enabled.multimapping_counts #if str($extended_parameters.multimapping_enabled.multimapping_counts) == " -M" $extended_parameters.multimapping_enabled.fraction #end if $extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J" #if $extended_parameters.exon_exon_junction_read_counting_enabled.genome -G '$extended_parameters.exon_exon_junction_read_counting_enabled.genome' #end if #end if $extended_parameters.long_reads -Q $extended_parameters.mapping_quality $extended_parameters.largest_overlap --minOverlap $extended_parameters.min_overlap $extended_parameters.read_reduction $extended_parameters.primary $extended_parameters.ignore_dup #if str($extended_parameters.read_extension_5p) != "0" --readExtension5 $extended_parameters.read_extension_5p #end if #if str($extended_parameters.read_extension_3p) != "0" --readExtension3 $extended_parameters.read_extension_3p #end if $pe_parameters.fragment_counting_enabled.fragment_counting #if str($pe_parameters.fragment_counting_enabled.fragment_counting) == " -p" $pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance #if str($pe_parameters.fragment_counting_enabled.check_distance_enabled.check_distance) == " -P" -d $pe_parameters.fragment_counting_enabled.check_distance_enabled.minimum_fragment_length -D $pe_parameters.fragment_counting_enabled.check_distance_enabled.maximum_fragment_length #end if #end if $pe_parameters.only_both_ends $pe_parameters.exclude_chimerics '${alignment}' ## Removal of comment and column-header line && grep -v "^#" "output" | tail -n+2 > body.txt ## Set the right columns for the tabular formats #if $format.value == "tabdel_medium" && cut -f 1,7 body.txt > expression_matrix.txt ## Paste doesn't allow a non ordered list of columns: -f 1,7,8,6 will only return columns 1,7 and 8 ## Thus the gene length column (last column) has to be added separately && cut -f 6 body.txt > gene_lengths.txt && paste expression_matrix.txt gene_lengths.txt > expression_matrix.txt.bak && mv -f expression_matrix.txt.bak '${output_medium}' #elif $format.value == "tabdel_short" && cut -f 1,7 body.txt > '${output_short}' #else && cp body.txt '${output_full}' #end if #if str($include_feature_length_file) == "true" && cut -f 1,6 body.txt > '${output_feature_lengths}' #end if #if str($extended_parameters.exon_exon_junction_read_counting_enabled.count_exon_exon_junction_reads) == "-J" && tail -n+2 'output.jcounts' > '${output_jcounts}' #end if && tail -n+2 'output.summary' > '${output_summary}' ]]></command> <inputs> <param name="alignment" type="data" multiple="false" format="bam,sam" label="Alignment file" help="The input alignment file(s) where the gene expression has to be counted. The file can have a SAM or BAM format; but ALL files must be in the same format" /> <conditional name="gtf_source"> <param name="ref_source" type="select" label="Gene annotation file"> <option value="cached">locally cached</option> <option value="history">in your history</option> </param> <when value="cached"> <param name="reference_gene_sets_builtin" type="select" label="Using locally cached annotation" help="If the annotation file you require is not listed here, please contact the Galaxy administrator"> <options from_data_table="gene_sets"> <filter type="sort_by" column="1" /> <validator type="no_options" message="No annotations are available." /> </options> </param> </when> <when value="history"> <param name="reference_gene_sets" format="gff,gtf,gff3" type="data" label="Gene annotation file" help="The program assumes that the provided annotation file is in GTF format. Make sure that the gene annotation file corresponds to the same reference genome as used for the alignment" /> </when> </conditional> <param name="format" type="select" label="Output format" help="The output format will be tabular, select the preferred columns here"> <option value="tabdel_short" selected="true">Gene-ID "\t" read-count (DESeq2 IUC wrapper compatible)</option> <option value="tabdel_medium">Gene-ID "\t" read-count "\t" gene-length</option> <option value="tabdel_full">featureCounts 1.4.0+ default (includes regions provided by the GTF file)</option> </param> <param name="include_feature_length_file" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Create gene-length file" help="Creates a tabular file that contains the effective (nucleotides used for counting reads) length of the feature; might be useful for estimating FPKM/RPKM" /> <section name="pe_parameters" title="Options for paired-end reads"> <conditional name="fragment_counting_enabled"> <param name="fragment_counting" type="select" argument="-p" checked="true" label="Count fragments instead of reads" help="If specified, fragments (or templates) will be counted instead of reads."> <option value="" selected="true">Disabled; all reads/mates will be counted individually</option> <option value=" -p">Enabled; fragments (or templates) will be counted instead of reads</option> </param> <when value=" -p"> <conditional name="check_distance_enabled"> <param name="check_distance" type="boolean" truevalue=" -P" falsevalue="" argument="-P" label="Check paired-end distance" help="If specified, paired-end distance will be checked when assigning fragments to meta-features or features. This option is only applicable when -p (Count fragments instead of reads) is specified. The distance thresholds should be specified using -d and -D (minimum and maximum fragment/template length) options." /> <when value=" -P"> <param name="minimum_fragment_length" type="integer" value="50" argument="-d" label="Minimum fragment/template length." /> <param name="maximum_fragment_length" type="integer" value="600" argument="-D" label="Maximum fragment/template length." /> </when> <when value="" /> </conditional> </when> <when value="" /> </conditional> <param name="only_both_ends" type="boolean" truevalue=" -B" falsevalue="" argument="-B" label="Only allow fragments with both reads aligned" help="If specified, only fragments that have both ends successfully aligned will be considered for summarization. This option is only applicable for paired-end reads." /> <param name="exclude_chimerics" type="boolean" truevalue=" -C" falsevalue="" argument="-C" checked="true" label="Exclude chimeric fragments" help="If specified, the chimeric fragments (those fragments that have their two ends aligned to different chromosomes) will NOT be included for summarization. This option is only applicable for paired-end read data." /> </section> <section name="extended_parameters" title="Advanced options"> <param name="gff_feature_type" type="text" value="exon" argument="-t" label="GFF feature type filter" help="Specify the feature type. Only rows which have the matched matched feature type in the provided GTF annotation file will be included for read counting. `exon' by default." /> <param name="gff_feature_attribute" type="text" value="gene_id" argument="-g" label="GFF gene identifier" help="Specify the attribute type used to group features (eg. exons) into meta-features (eg. genes), when GTF annotation is provided. `gene_id' by default. This attribute type is usually the gene identifier. This argument is useful for the meta-feature level summarization." /> <param name="summarization_level" type="boolean" truevalue=" -f" falsevalue="" argument="-f" label="On feature level" help="If specified, read summarization will be performed at the feature level. By default (-f is not specified), the read summarization is performed at the meta-feature level." /> <param name ="contribute_to_multiple_features" type="boolean" truevalue=" -O" falsevalue="" argument="-O" label="Allow read to contribute to multiple features" help="If specified, reads (or fragments if -p is specified) will be allowed to be assigned to more than one matched meta-feature (or matched feature if -f is specified)" /> <param name="strand_specificity" type="select" label="Strand specificity of the protocol" argument="-s" help="Indicate if strand-specific read counting should be performed."> <option value="0" selected="true">Unstranded</option> <option value="1">Stranded (forwards)</option> <option value="2">Stranded (reverse)</option> </param> <conditional name="multimapping_enabled"> <param name="multimapping_counts" type="select" argument="-M" label="Count multi-mapping reads/fragments" help="If specified, multi-mapping reads/fragments will be counted (ie. a multi-mapping read will be counted up to N times if it has N reported mapping locations). The program uses the `NH' tag to find multi-mapping reads."> <option value="" selected="true">Disabled; multi-mapping reads are excluded (default)</option> <option value=" -M">Enabled; multi-mapping reads are included</option> </param> <when value=" -M"> <param name="fraction" type="boolean" truevalue="--fraction" falsevalue="" argument="--fraction" label="Assign fractions to multimapping reads" help="If specified, a fractional count 1/n will be generated for each multi-mapping read, where n is the number of alignments (indica- ted by 'NH' tag) reported for the read. This option must be used together with the '-M' option." /> </when> <when value="" /> </conditional> <param name="mapping_quality" type="integer" value="12" argument="-Q" label="Minimum mapping quality per read" help="The minimum mapping quality score a read must satisfy in order to be counted. For paired-end reads, at least one end should satisfy this criteria. 12 by default." /> <conditional name="exon_exon_junction_read_counting_enabled"> <param name="count_exon_exon_junction_reads" argument="-J" type="boolean" truevalue="-J" falsevalue="" label="Exon-exon junctions" help="If specified, reads supporting each exon-exon junction will be counted" /> <when value="-J"> <param name="genome" argument="-G" type="data" format="fasta" optional="true" label="Reference sequence file" help="The FASTA-format file that contains the reference sequences used in read mapping can be used to improve read counting for junctions" /> </when> <when value="" /> </conditional> <param name="long_reads" argument="-L" type="boolean" truevalue="-L" falsevalue="" label="Long reads" help="If specified, long reads such as Nanopore and PacBio reads will be counted. Long read counting can only run in one thread and only reads (not read-pairs) can be counted." /> <param name="largest_overlap" type="boolean" truevalue=" --largestOverlap" falsevalue="" argument="--largestOverlap" label="Largest overlap" help="If specified, reads (or fragments) will be assigned to the target that has the largest number of overlapping bases" /> <param name="min_overlap" type="integer" value="1" argument="--minOverlap" label="Minimum overlap" help="Specify the minimum required number of overlapping bases between a read (or a fragment) and a feature. 1 by default. If a negative value is provided, the read will be extended from both ends." /> <param name="read_extension_5p" type="integer" value="0" argument="--readExtension5" label="Read 5' extension" help="Reads are extended upstream by ... bases from their 5' end" /> <param name="read_extension_3p" type="integer" value="0" argument="--readExtension3" label="Read 3' extension" help="Reads are extended upstream by ... bases from their 3' end" /> <param name="read_reduction" type="select" label="Reduce read to single position" argument="--read2pos" help="The read is reduced to its 5' most base or 3'most base. Read summarization is then performed based on the single base the the read is reduced to."> <option value="" selected="true">Leave the read as it is</option> <option value="--read2pos 5">Reduce it to the 5' end</option> <option value="--read2pos 3">Reduce it to the 3' end</option> </param> <param name="primary" type="boolean" truevalue=" --primary" falsevalue="" argument="--primary" label="Only count primary alignments" help="If specified, only primary alignments will be counted. Primary and secondary alignments are identified using bit 0x100 in theFlag field of SAM/BAM files. All primary alignments in a dataset will be counted regardless of whether they are from multi-mapping reads or not ('-M' is ignored)." /> <param name="ignore_dup" type="boolean" truevalue=" --ignoreDup" falsevalue="" argument="--ignoreDup" label="Ignore reads marked as duplicate" help="If specified, reads that were marked as duplicates will be ignored. Bit Ox400 in the FLAG field of a SAM/BAM file is used for identifying duplicate reads. In paired end data, the entire read pair will be ignored if at least one end is found to be a duplicate read." /> <param name="count_split_alignments_only" type="boolean" truevalue=" --countSplitAlignmentsOnly" falsevalue="" argument="--countSplitAlignmentsOnly" label="Ignore unspliced alignments" help="If specified, only split alignments (CIGAR strings containing the letter `N') will be counted. All the other alignments will be ignored. An example of split alignments are exon-spanning reads in RNA-seq data." /> </section> </inputs> <outputs> <data format="tabular" name="output_medium" label="${tool.name} on ${on_string}"> <filter>format == "tabdel_medium"</filter> <actions> <action name="column_names" type="metadata" default="Geneid,${alignment.name},Length" /> </actions> </data> <data format="tabular" name="output_short" label="${tool.name} on ${on_string}"> <filter>format == "tabdel_short"</filter> <actions> <action name="column_names" type="metadata" default="Geneid,${alignment.name}" /> </actions> </data> <data format="tabular" name="output_full" label="${tool.name} on ${on_string}: count table"> <filter>format == "tabdel_full"</filter> <actions> <action name="column_names" type="metadata" default="Geneid,Chr,Start,End,Strand,Length,${alignment.name}" /> </actions> </data> <data format="tabular" name="output_summary" hidden="true" label="${tool.name} on ${on_string}: summary"> <actions> <action name="column_names" type="metadata" default="Status,${alignment.name}" /> </actions> </data> <data format="tabular" name="output_feature_lengths" label="${tool.name} on ${on_string}: feature lengths"> <filter>include_feature_length_file</filter> <actions> <action name="column_names" type="metadata" default="Feature,Length" /> </actions> </data> <data name="output_jcounts" format="tabular" label="${tool.name} on ${on_string}: junction counts"> <filter>extended_parameters['exon_exon_junction_read_counting_enabled']['count_exon_exon_junction_reads']</filter> <actions> <action name="column_names" type="metadata" default="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,${alignment.name}" /> </actions> </data> </outputs> <tests> <test expect_num_outputs="4"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <param name="format" value="tabdel_short" /> <param name="include_feature_length_file" value="true"/> <param name="ref_source" value="history" /> <param name="count_exon_exon_junction_reads" value="-J"/> <output name="output_short" file="output_1_short.tab"> <metadata name="column_names" value="Geneid,featureCounts_input1.bam"/> </output> <output name="output_summary" file="output_1_summary.tab"> <metadata name="column_names" value="Status,featureCounts_input1.bam"/> </output> <output name="output_jcounts" file="output_1_jcounts.tab"> <metadata name="column_names" value="PrimaryGene,SecondaryGene,Site1_chr,Site1_location,Site1_strand,Site2_chr,Site2_location,Site2_strand,featureCounts_input1.bam"/> </output> </test> <test expect_num_outputs="3"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <param name="format" value="tabdel_medium" /> <param name="include_feature_length_file" value="true"/> <param name="ref_source" value="history" /> <output name="output_medium" file="output_1_medium.tab"> <metadata name="column_names" value="Geneid,featureCounts_input1.bam,Length"/> </output> <output name="output_summary" file="output_1_summary.tab"> <metadata name="column_names" value="Status,featureCounts_input1.bam"/> </output> </test> <test expect_num_outputs="3"> <param name="alignment" value="featureCounts_input1.bam" ftype="bam" /> <param name="reference_gene_sets" value="featureCounts_guide.gff" ftype="gff" /> <param name="format" value="tabdel_full" /> <param name="include_feature_length_file" value="true"/> <param name="ref_source" value="history" /> <output name="output_full" file="output_1_full.tab"> <metadata name="column_names" value="Geneid,Chr,Start,End,Strand,Length,featureCounts_input1.bam"/> </output> <output name="output_summary" file="output_1_summary.tab"> <metadata name="column_names" value="Status,featureCounts_input1.bam"/> </output> <output name="output_feature_lengths" file="output_feature_lengths.tab"> <metadata name="column_names" value="Feature,Length"/> </output> </test> </tests> <help><![CDATA[ featureCounts ############# Overview -------- FeatureCounts is a light-weight read counting program written entirely in the C programming language. It can be used to count both gDNA-seq and RNA-seq reads for genomic features in in SAM/BAM files. Input formats ------------- Alignments should be provided in either: - SAM format, http://samtools.sourceforge.net/samtools.shtml#5 - BAM format Gene regions should be provided in the GFF/GTF format: - http://genome.ucsc.edu/FAQ/FAQformat.html#format3 - http://www.ensembl.org/info/website/upload/gff.html Output format ------------- FeatureCounts produces a table containing counted reads, per gene, per row. Optionally the last column can be set to be the effective gene-length. These tables are compatible with the DESeq2 Galaxy wrapper by IUC. Column names are added as metadata object. ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btt656</citation> </citations> </tool>