Mercurial > repos > iuc > deseq2
view deseq2.xml @ 15:9a616afdbda5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 83eb5b2665d87c02b270596f8175499e69061032
author | iuc |
---|---|
date | Sat, 19 May 2018 03:55:48 -0400 |
parents | d0c39b5e78cf |
children | a416957ee305 |
line wrap: on
line source
<tool id="deseq2" name="DESeq2" version="2.11.40.2"> <description>Determines differentially expressed features from count tables</description> <requirements> <requirement type="package" version="1.18.1">bioconductor-deseq2</requirement> <requirement type="package" version="1.6.0">bioconductor-tximport</requirement> <requirement type="package" version="1.30.0">bioconductor-genomicfeatures</requirement> <requirement type="package" version="0.6.5">r-ggrepel</requirement> <requirement type="package" version="1.0.8">r-pheatmap</requirement> </requirements> <stdio> <regex match="Execution halted" source="both" level="fatal" description="Execution halted." /> <regex match="Error in" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> <regex match="Fatal error" source="both" level="fatal" description="An undefined error occurred, please check your input carefully and contact your administrator." /> </stdio> <version_command><![CDATA[ echo $(R --version | grep version | grep -v GNU)", DESeq2 version" $(R --vanilla --slave -e "library(DESeq2); cat(sessionInfo()\$otherPkgs\$DESeq2\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> <command><![CDATA[ #if $tximport.tximport_selector == 'tximport': #if $tximport.mapping_format.mapping_format_selector == 'gtf': ln -s '$tximport.mapping_format.gtf_file' mapping.gtf && #else: ln -s '$tximport.mapping_format.tabular_file' mapping.txt && #end if #end if #import json #import os Rscript '${__tool_directory__}/deseq2.R' -o '$deseq_out' #if $pdf: -p '$plots' #end if #if $normCounts: -n '$counts_out' #end if #set $filename_to_element_identifiers = {} #set $temp_factor_names = list() #for $factor in $rep_factorName: #set $temp_factor = list() #for $level in $factor.rep_factorLevel: #set $count_files = list() #for $file in $level.countsFile: $count_files.append(str($file)) $filename_to_element_identifiers.__setitem__(os.path.basename(str($file)), $file.element_identifier) #end for $temp_factor.append( {str($level.factorLevel): $count_files} ) #end for $temp_factor.reverse() $temp_factor_names.append([str($factor.factorName), $temp_factor]) #end for $header -f '#echo json.dumps(temp_factor_names)#' -l '#echo json.dumps(filename_to_element_identifiers)#' -t $fit_type #if $outlier_replace_off: -a #end if #if $outlier_filter_off: -b #end if #if $auto_mean_filter_off: -c #end if #if $many_contrasts: -m #end if #if $tximport.tximport_selector == 'tximport': -i -y $tximport.txtype #if $tximport.mapping_format.mapping_format_selector == 'gtf': -x mapping.gtf #else: -x mapping.txt #end if #end if ]]></command> <inputs> <repeat name="rep_factorName" title="Factor" min="1"> <param name="factorName" type="text" value="FactorName" label="Specify a factor name, e.g. effects_drug_x or cancer_markers" help="Only letters, numbers and underscores will be retained in this field"> <sanitizer> <valid initial="string.letters,string.digits"><add value="_" /></valid> </sanitizer> </param> <repeat name="rep_factorLevel" title="Factor level" min="2" default="2"> <param name="factorLevel" type="text" value="FactorLevel" label="Specify a factor level, typical values could be 'tumor', 'normal', 'treated' or 'control'" help="Only letters, numbers and underscores will be retained in this field"> <sanitizer> <valid initial="string.letters,string.digits"><add value="_" /></valid> </sanitizer> </param> <param name="countsFile" type="data" format="tabular" multiple="true" label="Counts file(s)"/> </repeat> </repeat> <param name="header" type="boolean" truevalue="-H" falsevalue="" checked="true" label="Files have header?" help="If this option is set to Yes, the tool will assume that the count files have column headers in the first row. Default: Yes" /> <conditional name="tximport"> <param name="tximport_selector" type="select" label="Choice of Input data"> <option value="count" selected="True">Count data (e.g. from HTSeq-count, featureCounts or StringTie)</option> <option value="tximport">TPM values (e.g. from kallisto, sailfish or salmon)</option> </param> <when value="tximport"> <param name="txtype" type="select" label="Program used to generate TPMs"> <option value="kallisto">kallisto</option> <option value="sailfish">Sailfish</option> <option value="salmon">Salmon</option> </param> <conditional name="mapping_format"> <param name="mapping_format_selector" type="select" label="Gene mapping format"> <option value="gtf" selected="True">GTF</option> <option value="tabular">Transcript-ID and Gene-ID mapping file</option> </param> <when value="gtf"> <param name="gtf_file" type="data" format="gtf,gff3" label="GTF/GFF3 file with Transcript - Gene mapping"/> </when> <when value="tabular"> <param name="tabular_file" type="data" format="tabular" label="Tabular file with Transcript - Gene mapping"/> </when> </conditional> </when> <when value="count" /> </conditional> <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Visualising the analysis results" help="output an additional PDF files" /> <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output normalized counts table" /> <param name="many_contrasts" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Output all levels vs all levels of primary factor (use when you have >2 levels for primary factor)" help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> <param name="fit_type" type="select" label="Fit type"> <option value="1" selected="true">parametric</option> <option value="2">local</option> <option value="3">mean</option> </param> <param name="outlier_replace_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off outliers replacement (only affects with >6 replicates)" help="When there are more than 6 replicates for a given sample, the DESeq2 will automatically replace counts with large Cook’s distance with the trimmed mean over all samples, scaled up by the size factor or normalization factor for that sample" /> <param name="outlier_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off outliers filtering (only affects with >2 replicates)" help="When there are more than 2 replicates for a given sample, the DESeq2 will automatically filter genes which contain a Cook’s distance above a cutoff" /> <param name="auto_mean_filter_off" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Turn off independent filtering" help=" DESeq2 performs independent filtering by default using the mean of normalized counts as a filter statistic" /> </inputs> <outputs> <data format="tabular" name="deseq_out" label="DESeq2 result file on ${on_string}"> <filter>many_contrasts is False</filter> <actions> <action name="column_names" type="metadata" default="GeneID,Base mean,log2(FC),StdErr,Wald-Stats,P-value,P-adj" /> </actions> </data> <collection name="split_output" type="list" label="DESeq2 result files on ${on_string}"> <filter>many_contrasts is True</filter> <discover_datasets pattern="None.(?P<designation>.+_vs_.+)" format="tabular" directory="." visible="false"/> </collection> <data format="pdf" name="plots" label="DESeq2 plots on ${on_string}"> <filter>pdf == True</filter> </data> <data format="tabular" name="counts_out" label="Normalized counts file on ${on_string}"> <filter>normCounts == True</filter> </data> </outputs> <tests> <!--Ensure counts files with header works --> <test expect_num_outputs="2"> <repeat name="rep_factorName"> <param name="factorName" value="Treatment"/> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Treated"/> <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/> </repeat> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Untreated"/> <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/> </repeat> </repeat> <param name="pdf" value="False"/> <param name="normCounts" value="True"/> <output name="counts_out"> <assert_contents> <has_text_matching expression="untreat1\tuntreat2\tuntreat3\tuntreat4\ttreat1\ttreat2\ttreat3" /> <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> </assert_contents> </output> <output name="deseq_out" > <assert_contents> <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" /> </assert_contents> </output> </test> <!--Ensure counts files without header works --> <test expect_num_outputs="2"> <repeat name="rep_factorName"> <param name="factorName" value="Treatment"/> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Treated"/> <param name="countsFile" value="GSM461179_treat_single.counts.noheader,GSM461180_treat_paired.counts.noheader,GSM461181_treat_paired.counts.noheader"/> </repeat> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Untreated"/> <param name="countsFile" value="GSM461176_untreat_single.counts.noheader,GSM461177_untreat_paired.counts.noheader,GSM461178_untreat_paired.counts.noheader,GSM461182_untreat_single.counts.noheader"/> </repeat> </repeat> <param name="header" value="False"/> <param name="pdf" value="False"/> <param name="normCounts" value="True"/> <output name="counts_out"> <assert_contents> <has_text_matching expression="GSM461176_untreat_single.counts.noheader\tGSM461177_untreat_paired.counts.noheader\tGSM461178_untreat_paired.counts.noheader\tGSM461182_untreat_single.counts.noheader\tGSM461179_treat_single.counts.noheader\tGSM461180_treat_paired.counts.noheader\tGSM461181_treat_paired.counts.noheader" /> <has_text_matching expression="FBgn0000003\t0\t0\t0\t0\t0\t0\t0" /> </assert_contents> </output> <output name="deseq_out" > <assert_contents> <has_text_matching expression="FBgn0003360\t1933.9504.*\t-2.8399.*\t0.1309.*-21.6851.*2.831.*8.024" /> </assert_contents> </output> </test> <!--Ensure Sailfish/Salmon input with tx2gene table works--> <test expect_num_outputs="1"> <repeat name="rep_factorName"> <param name="factorName" value="Treatment"/> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Treated"/> <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/> </repeat> <repeat name="rep_factorLevel"> <param name="factorLevel" value="Untreated"/> <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/> </repeat> </repeat> <param name="pdf" value="False"/> <param name="tximport_selector" value="tximport"/> <param name="txtype" value="sailfish"/> <param name="mapping_format_selector" value="tabular"/> <param name="tabular_file" value="tx2gene.tab"/> <output name="deseq_out" > <assert_contents> <has_text_matching expression="MIR6859-2\t1.1858.*\t-1.5832.*\t1.2956.*\t-1.2219.*\t0.2217.*\t0.8868.*" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution ----- **Inputs** **Count Files** DESeq2_ takes count tables generated from **featureCounts**, **HTSeq-count** or **StringTie** as input. Count tables must be generated for each sample individually. One header row is assumed, but files with no header (e.g from HTSeq) can be input with the *Files have header?* option set to No. DESeq2 is capable of handling multiple factors that affect your experiment. The first factor you input is considered as the primary factor that affects gene expressions. Optionally, you can input one or more secondary factors that might influence your experiment. But the final output will be changes in genes due to primary factor in presence of secondary factors. Each factor has two levels/states. You need to select appropriate count table from your history for each factor level. The following table gives some examples of factors and their levels: ========= ============== =============== Factor Factor level 1 Factor level 2 --------- -------------- --------------- Treatment Treated Untreated --------- -------------- --------------- Condition Knockdown Wildtype --------- -------------- --------------- TimePoint Day4 Day1 --------- -------------- --------------- SeqType SingleEnd PairedEnd --------- -------------- --------------- Gender Female Male ========= ============== =============== *Note*: Output log2 fold changes are based on primary factor level 1 vs. factor level2. Here the order of factor levels is important. For example, for the factor 'Treatment' given in above table, DESeq2 computes fold changes of 'Treated' samples against 'Untreated', i.e. the values correspond to up or down regulations of genes in Treated samples. DESeq2_ can also take transcript-level counts from quantification tools such as, **kallisto**, **Salmon** and **Sailfish**, and this Galaxy wrapper incorporates the Bioconductor tximport_ package to process the transcript counts for DESeq2. **Salmon or Sailfish Files** Salmon or Sailfish ``quant.sf`` files can be imported by setting type to *Salmon* or *Sailfish* respectively above. Note: for previous version of Salmon or Sailfish, in which the quant.sf files start with comment lines you will need to remove the comment lines before inputting here. An example of the format is shown below. Example: ============ ========== =============== =========== =========== Name Length EffectiveLength TPM NumReads ------------ ---------- --------------- ----------- ----------- NR_001526 164 20.4518 0 0 NR_001526_1 164 20.4518 0 0 NR_001526_2 164 20.4518 0 0 NM_130786 1764 1956.04 2.47415 109.165 NR_015380 2129 2139.53 1.77331 85.5821 NM_001198818 9360 7796.58 2.38616e-07 4.19648e-05 NM_001198819 9527 7964.62 0 0 NM_001198820 9410 7855.78 0 0 NM_014576 9267 7714.88 0.0481114 8.37255 ============ ========== =============== =========== =========== **kallisto Files** kallisto ``abundance.tsv`` files can be imported by setting type to *kallisto* above. An example of the format is shown below. Example: ============ ========== =============== =========== =========== target_id length eff_length est_counts tpm ------------ ---------- --------------- ----------- ----------- NR_001526 164 20.4518 0 0 NR_001526_1 164 20.4518 0 0 NR_001526_2 164 20.4518 0 0 NM_130786 1764 1956.04 109.165 2.47415 NR_015380 2129 2139.53 85.5821 1.77331 NM_001198818 9360 7796.58 4.19648e-05 2.38616e-07 NM_001198819 9527 7964.62 0 0 NM_001198820 9410 7855.78 0 0 NM_014576 9267 7714.88 8.37255 0.0481114 ============ ========== =============== =========== =========== ----- **Output** DESeq2_ generates a tabular file containing the different columns and optional visualized results as PDF. ====== ========================================================== Column Description ------ ---------------------------------------------------------- 1 Gene Identifiers 2 mean normalised counts, averaged over all samples from both conditions 3 the logarithm (to basis 2) of the fold change (See the note in inputs section) 4 standard error estimate for the log2 fold change estimate 5 Wald statistic 6 p value for the statistical significance of this change 7 p value adjusted for multiple testing with the Benjamini-Hochberg procedure which controls false discovery rate (FDR) ====== ========================================================== .. _DESeq2: http://master.bioconductor.org/packages/release/bioc/html/DESeq2.html .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html ]]></help> <citations> <citation type="doi">10.1186/s13059-014-0550-8</citation> </citations> </tool>