Mercurial > repos > jjohnson > cummerbund
diff cuffdiff_wrapper.xml @ 2:fdf01b3c1841
Update to new cuffdiff wrapper, add cuffdb_info.txt to cummerbund html output
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 08 Nov 2013 14:54:01 -0600 |
parents | da7241f92ecf |
children | f109453ecfa2 |
line wrap: on
line diff
--- a/cuffdiff_wrapper.xml Mon Feb 04 21:23:20 2013 -0600 +++ b/cuffdiff_wrapper.xml Fri Nov 08 14:54:01 2013 -0600 @@ -1,78 +1,30 @@ -<tool id="cuffdiff_cummerbund" name="Cuffdiff" version="0.0.6"> - <!-- Wrapper supports Cuffdiff versions v1.3.0-v2.0 --> +<tool id="cuffdiff_cummerbund" name="Cuffdiff for cummeRbund" version="0.0.7"> + <!-- Wrapper supports Cuffdiff versions 2.1.0-2.1.1 --> <description>find significant changes in transcript expression, splicing, and promoter use</description> <requirements> - <requirement type="package">cufflinks</requirement> + <requirement type="package" version="2.1.1">cufflinks</requirement> </requirements> - <command interpreter="python"> - #set sel_outputs = $output_sel.__str__.split(',') - cuffdiff_wrapper.py + <version_command>cuffdiff 2>&1 | head -n 1</version_command> + <command> + cuffdiff + --no-update-check --FDR=$fdr --num-threads="4" --min-alignment-count=$min_alignment_count - - #if 'cuffdata' in $sel_outputs or not $output_sel: - --cuffdatadir=$cuffdata.extra_files_path - #end if - #if 'cummeRbund_db' in $sel_outputs: - --cummeRbund_db=$cummeRbund_db - #end if + --library-norm-method=$library_norm_method + --dispersion-method=$dispersion_method - #if 'isoforms_fpkm_tracking' in $sel_outputs: - --isoforms_fpkm_tracking_output=$isoforms_fpkm_tracking - #end if - #if 'genes_fpkm_tracking' in $sel_outputs: - --genes_fpkm_tracking_output=$genes_fpkm_tracking - #end if - #if 'cds_fpkm_tracking' in $sel_outputs: - --cds_fpkm_tracking_output=$cds_fpkm_tracking - #end if - #if 'tss_groups_fpkm_tracking' in $sel_outputs: - --tss_groups_fpkm_tracking_output=$tss_groups_fpkm_tracking - #end if - #if 'isoforms_exp_diff' in $sel_outputs: - --isoforms_exp_output=$isoforms_exp_diff - #end if - #if 'genes_exp_diff' in $sel_outputs: - --genes_exp_output=$genes_exp_diff - #end if - #if 'tss_groups_exp_diff' in $sel_outputs: - --tss_groups_exp_output=$tss_groups_exp_diff - #end if - #if 'cds_exp_fpkm_tracking' in $sel_outputs: - --cds_exp_fpkm_tracking_output=$cds_exp_fpkm_tracking - #end if - #if 'splicing_diff' in $sel_outputs: - --splicing_diff_output=$splicing_diff - #end if - #if 'cds_diff' in $sel_outputs: - --cds_diff_output=$cds_diff - #end if - #if 'promoters_diff' in $sel_outputs: - --promoters_diff_output=$promoters_diff - #end if - #if 'cds_read_group_tracking' in $sel_outputs: - --cds_read_group_tracking=$cds_read_group_tracking - #end if - #if 'tss_groups_read_group_tracking' in $sel_outputs: - --tss_groups_read_group_tracking=$tss_groups_read_group_tracking - #end if - #if 'genes_read_group_tracking' in $sel_outputs: - --genes_read_group_tracking=$genes_read_group_tracking - #end if - #if 'isoforms_read_group_tracking' in $sel_outputs: - --isoforms_read_group_tracking=$isoforms_read_group_tracking - #end if - ## Set advanced data parameters? #if $additional.sAdditional == "Yes": - -m $additional.frag_mean_len - -s $additional.frag_len_std_dev - #end if - - ## Normalization? - #if str($do_normalization) == "Yes": - -N + #if $additional.frag_mean_len: + -m $additional.frag_mean_len + #end if + #if $additional.frag_len_std_dev: + -s $additional.frag_len_std_dev + #end if + #if $additional.max_bundle_frags: + --max-bundle-frags="$additional.max_bundle_frags" + #end if #end if ## Multi-read correct? @@ -82,67 +34,77 @@ ## Bias correction? #if $bias_correction.do_bias_correction == "Yes": - -b + -b #if $bias_correction.seq_source.index_source == "history": - --ref_file=$bias_correction.seq_source.ref_file + ## Custom genome from history. + $bias_correction.seq_source.ref_file #else: - --ref_file="None" + ## Built-in genome. + ${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')} #end if - --dbkey=${gtf_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} - #end if - - ## Inputs. - --inputA=$gtf_input - #if $group_analysis.do_groups == "No": - --input1=$aligned_reads1 - --input2=$aligned_reads2 - #else: - ## Replicates. - --labels - #for $group in $group_analysis.groups - ${group.group} - #end for - --files - #for $group in $group_analysis.groups - #for $file in $group.files: - ${file.file} - #end for - , - #end for #end if + #set labels = '\'' + '\',\''.join( [ str( $condition.name ) for $condition in $conditions ] ) + '\'' + --labels $labels + + ## Inputs. + $gtf_input + #for $condition in $conditions: + #set samples = ','.join( [ str( $sample.sample ) for $sample in $condition.samples ] ) + $samples + #end for + + ## If build cummerbund db + #if $build_cummerbund_db: + && echo 'library(cummeRbund)' > cuffData.r + #if $bias_correction.do_bias_correction == "Yes": + #if $bias_correction.seq_source.index_source == "history": + ## Custom genome from history. + && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_input", genome = "$bias_correction.seq_source.ref_file", rebuild = T)' >> cuffData.r + #else: + ## Built-in genome. + ${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')} + && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", gtfFile = "$gtf_input", genome = "${__get_data_table_entry__('sam_fa_indexes', 'value', $gtf_input.dbkey, 'path')}", rebuild = T)' >> cuffData.r + #end if + #else + && echo 'cuff<-readCufflinks( dbFile = "cuffdata.db", rebuild = T)' >> cuffData.r + #end if + && Rscript --vanilla cuffData.r + && cp cuffdata.db $cummerbund_db + #end if </command> <inputs> <param format="gtf,gff3" name="gtf_input" type="data" label="Transcripts" help="A transcript GFF3 or GTF file produced by cufflinks, cuffcompare, or other source."/> - <conditional name="group_analysis"> - <param name="do_groups" type="select" label="Perform replicate analysis" help="Perform cuffdiff with replicates in each group."> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <when value="Yes"> - <repeat name="groups" title="Group"> - <param name="group" title="Group name" type="text" label="Group name (no spaces or commas)"/> - <repeat name="files" title="Replicate"> - <param name="file" label="Add file" type="data" format="sam,bam"/> - </repeat> - </repeat> - </when> - <when value="No"> - <param format="sam,bam" name="aligned_reads1" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/> - <param format="sam,bam" name="aligned_reads2" type="data" label="SAM or BAM file of aligned RNA-Seq reads" help=""/> - </when> - </conditional> + + <repeat name="conditions" title="Condition" min="2"> + <param name="name" title="Condition name" type="text" label="Name"/> + <repeat name="samples" title="Replicate" min="1"> + <param name="sample" label="Add replicate" type="data" format="sam,bam"/> + </repeat> + </repeat> + <param name="time_series" type="boolean" checked="false" truevalue="--time-series" falsevalue="" optional="true" label="treat samples as a time-series"> + <help> + Instructs Cuffdiff to analyze the provided samples as a time series, rather than testing for differences between all pairs of samples. + Samples should be provided in increasing time order at the command line (e.g first time point SAM, second timepoint SAM, etc.) + </help> + </param> + + <param name="library_norm_method" type="select" label="Library normalization method"> + <option value="geometric" selected="True">geometric</option> + <option value="classic-fpkm">classic-fpkm</option> + <option value="quartile">quartile</option> + </param> + + <param name="dispersion_method" type="select" label="Dispersion estimation method" help="If using only one sample per condition, you must use 'blind.'"> + <option value="pooled" selected="True">pooled</option> + <option value="per-condition">per-condition</option> + <option value="blind">blind</option> + </param> <param name="fdr" type="float" value="0.05" label="False Discovery Rate" help="The allowed false discovery rate."/> <param name="min_alignment_count" type="integer" value="10" label="Min Alignment Count" help="The minimum number of alignments in a locus for needed to conduct significance testing on changes in that locus observed between samples."/> - <param name="do_normalization" type="select" label="Perform quartile normalization" help="Removes top 25% of genes from FPKM denominator to improve accuracy of differential expression calls for low abundance transcripts."> - <option value="No">No</option> - <option value="Yes">Yes</option> - </param> - <param name="multiread_correct" type="select" label="Use multi-read correct" help="Tells Cufflinks to do an initial estimation procedure to more accurately weight reads mapping to multiple locations in the genome."> <option value="No" selected="true">No</option> <option value="Yes">Yes</option> @@ -168,142 +130,95 @@ <when value="No"></when> </conditional> + <param name="include_read_group_files" type="select" label="Include Read Group Datasets" help="Read group datasets provide information on replicates."> + <option value="No" selected="true">No</option> + <option value="Yes">Yes</option> + </param> + <param name="build_cummerbund_db" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="Build cummeRbund database"/> <conditional name="additional"> - <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended)"> + <param name="sAdditional" type="select" label="Set Additional Parameters? (not recommended for paired-end reads)"> <option value="No">No</option> <option value="Yes">Yes</option> </param> <when value="No"></when> <when value="Yes"> - <param name="frag_mean_len" type="integer" value="200" label="Average Fragment Length"/> - <param name="frag_len_std_dev" type="integer" value="80" label="Fragment Length Standard Deviation"/> + <param name="frag_mean_len" type="integer" value="" optional="true" label="Average Fragment Length Default: 200"> + <help> + Note: Cufflinks now learns the fragment length mean for each SAM file, + so using this option is no longer recommended with paired-end reads. + </help> + </param> + <param name="frag_len_std_dev" type="integer" value="" optional="true" label="Fragment Length Standard Deviation Default: 80"> + <help> + Note: Cufflinks now learns the fragment length mean for each SAM file, + so using this option is no longer recommended with paired-end reads. + </help> + </param> + <param name="max_bundle_frags" type="integer" value="" optional="true" label="--max-bundle-frags"> + <help> + Sets the maximum number of fragments a locus may have before being skipped. Skipped loci are listed in skipped.gtf. Default: 1000000 + </help> + <validator type="in_range" message="Value greater than 0" min="1"/> + </param> </when> </conditional> + </inputs> - <param name="output_sel" type="select" multiple="true" display="checkboxes" force_select="true" label="Select outputs for history datasets"> - <option value="cuffdata">cuffdata - html page with links to cuffdiff outputs</option> - <option value="cummeRbund_db">cummeRbund database</option> - <option value="run_info">run.info</option> - <option value="read_groups_info">read_groups.info</option> - <option value="splicing_diff">splicing.diff</option> - <option value="promoters_diff">promoters.diff</option> - <option value="genes_exp_diff">genes_exp.diff</option> - <option value="genes_fpkm_tracking">genes.fpkm_tracking</option> - <option value="genes_count_tracking">genes.count_tracking</option> - <option value="genes_read_group_tracking">genes.read_group_tracking</option> - <option value="isoforms_exp_diff">isoforms.exp_diff</option> - <option value="isoforms_fpkm_tracking">isoforms.fpkm_tracking</option> - <option value="isoforms_count_tracking">isoforms.count_tracking</option> - <option value="isoforms_read_group_tracking">isoforms.read_group_tracking</option> - <option value="cds_diff">cds.diff</option> - <option value="cds_exp_diff">cds_exp.diff</option> - <option value="cds_fpkm_tracking">cds.fpkm_tracking</option> - <option value="cds_count_tracking">cds.count_tracking</option> - <option value="cds_read_group_tracking">cds.read_group_tracking</option> - <option value="tss_groups_exp_diff">tss_groups_exp.diff</option> - <option value="tss_groups_fpkm_tracking">tss_groups.fpkm_tracking</option> - <option value="tss_groups_count_tracking">tss_groups.count_tracking</option> - <option value="tss_groups_read_group_tracking">tss_groups.read_group_tracking</option> - </param> - - </inputs> + <stdio> + <regex match="Error" source="both" level="fatal" description="Error"/> + <regex match=".*" source="both" level="log" description="tool progress"/> + </stdio> <outputs> - <data format="text" name="run_info" label="${tool.name} on ${on_string}: run.info"> - <filter>output_sel and 'run_info' in output_sel</filter> - </data> - <data format="tabular" name="read_groups_info" label="${tool.name} on ${on_string}: read_groups.info"> - <filter>output_sel and 'read_groups_info' in output_sel</filter> - </data> - <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing"> - <filter>output_sel and 'splicing_diff' in output_sel</filter> - </data> - <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing"> - <filter>output_sel and 'promoters_diff' in output_sel</filter> - </data> - <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing"> - <filter>output_sel and 'cds_diff' in output_sel</filter> - </data> - <data format="tabular" name="cds_exp_diff" label="${tool.name} on ${on_string}: CDS differential expression testing"> - <filter>output_sel and 'cds_exp_diff' in output_sel</filter> + <!-- Optional read group datasets. --> + <data format="cuffdatadb" name="cummerbund_db" label="${tool.name} on ${on_string}: cummeRbund sqlite Database" > + <filter>build_cummerbund_db</filter> </data> - <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking"> - <filter>output_sel and 'cds_fpkm_tracking' in output_sel</filter> - </data> - <data format="tabular" name="cds_count_tracking" label="${tool.name} on ${on_string}: CDS counts"> - <filter>output_sel and 'cds_count_tracking' in output_sel</filter> - </data> - <data format="tabular" name="cds_read_group_tracking" label="${tool.name} on ${on_string}: CDS Read Group tracking"> - <filter>output_sel and 'cds_read_group_tracking' in output_sel</filter> + <data format="tabular" name="isoforms_read_group" label="${tool.name} on ${on_string}: isoforms read group tracking" from_work_dir="isoforms.read_group_tracking" > + <filter>(params['include_read_group_files'] == 'Yes'</filter> </data> - <data format="tabular" name="tss_groups_exp_diff" label="${tool.name} on ${on_string}: TSS groups differential expression testing"> - <filter>output_sel and 'tss_groups_exp_diff' in output_sel</filter> + <data format="tabular" name="genes_read_group" label="${tool.name} on ${on_string}: genes read group tracking" from_work_dir="genes.read_group_tracking" > + <filter>(params['include_read_group_files'] == 'Yes'</filter> </data> - <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking"> - <filter>output_sel and 'tss_groups_fpkm_tracking' in output_sel</filter> - </data> - <data format="tabular" name="tss_groups_count_tracking" label="${tool.name} on ${on_string}: TSS groups counts"> - <filter>output_sel and 'tss_groups_count_tracking' in output_sel</filter> + <data format="tabular" name="cds_read_group" label="${tool.name} on ${on_string}: CDs read group tracking" from_work_dir="cds.read_group_tracking" > + <filter>(params['include_read_group_files'] == 'Yes'</filter> </data> - <data format="tabular" name="tss_groups_read_group_tracking" label="${tool.name} on ${on_string}: TSS groups Read Group tracking"> - <filter>output_sel and 'tss_groups_read_group_tracking' in output_sel</filter> - </data> - <data format="tabular" name="isoforms_exp_diff" label="${tool.name} on ${on_string}: transcript differential expression testing"> - <filter>output_sel and 'isoforms_exp_diff' in output_sel</filter> - </data> - <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking"> - <filter>output_sel and 'isoforms_fpkm_tracking' in output_sel</filter> - </data> - <data format="tabular" name="isoforms_count_tracking" label="${tool.name} on ${on_string}: transcript counts"> - <filter>output_sel and 'isoforms_count_tracking' in output_sel</filter> - </data> - <data format="tabular" name="isoforms_read_group_tracking" label="${tool.name} on ${on_string}: transcript Read Group tracking"> - <filter>output_sel and 'isoforms_read_group_tracking' in output_sel</filter> - </data> - <data format="tabular" name="genes_exp_diff" label="${tool.name} on ${on_string}: gene differential expression testing"> - <filter>output_sel and 'genes_exp_diff' in output_sel</filter> + <data format="tabular" name="tss_groups_read_group" label="${tool.name} on ${on_string}: TSS groups read group tracking" from_work_dir="tss_groups.read_group_tracking" > + <filter>(params['include_read_group_files'] == 'Yes'</filter> </data> - <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking"> - <filter>output_sel and 'genes_fpkm_tracking' in output_sel</filter> - </data> - <data format="tabular" name="genes_count_tracking" label="${tool.name} on ${on_string}: gene counts"> - <filter>output_sel and 'genes_count_tracking' in output_sel</filter> - </data> - <data format="tabular" name="genes_read_group_tracking" label="${tool.name} on ${on_string}: gene Read Group tracking"> - <filter>output_sel and 'genes_read_group_tracking' in output_sel</filter> - </data> - <data format="cuffdata" name="cuffdata" label="${tool.name} on ${on_string}: cuffdata" > - <filter>not output_sel or output_sel and 'cuffdata' in output_sel</filter> - </data> - <data format="cuffdatadb" name="cummeRbund_db" label="${tool.name} on ${on_string}: cummeRbund sqlite Database" > - <filter>output_sel and 'cummeRbund_db' in output_sel</filter> - </data> + + <!-- Standard datasets. --> + <data format="tabular" name="splicing_diff" label="${tool.name} on ${on_string}: splicing differential expression testing" from_work_dir="splicing.diff" /> + <data format="tabular" name="promoters_diff" label="${tool.name} on ${on_string}: promoters differential expression testing" from_work_dir="promoters.diff" /> + <data format="tabular" name="cds_diff" label="${tool.name} on ${on_string}: CDS overloading diffential expression testing" from_work_dir="cds.diff" /> + <data format="tabular" name="cds_exp_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM differential expression testing" from_work_dir="cds_exp.diff" /> + <data format="tabular" name="cds_fpkm_tracking" label="${tool.name} on ${on_string}: CDS FPKM tracking" from_work_dir="cds.fpkm_tracking" /> + <data format="tabular" name="tss_groups_exp" label="${tool.name} on ${on_string}: TSS groups differential expression testing" from_work_dir="tss_group_exp.diff" /> + <data format="tabular" name="tss_groups_fpkm_tracking" label="${tool.name} on ${on_string}: TSS groups FPKM tracking" from_work_dir="tss_groups.fpkm_tracking" /> + <data format="tabular" name="genes_exp" label="${tool.name} on ${on_string}: gene differential expression testing" from_work_dir="gene_exp.diff" /> + <data format="tabular" name="genes_fpkm_tracking" label="${tool.name} on ${on_string}: gene FPKM tracking" from_work_dir="genes.fpkm_tracking" /> + <data format="tabular" name="isoforms_exp" label="${tool.name} on ${on_string}: transcript differential expression testing" from_work_dir="isoform_exp.diff" /> + <data format="tabular" name="isoforms_fpkm_tracking" label="${tool.name} on ${on_string}: transcript FPKM tracking" from_work_dir="isoforms.fpkm_tracking" /> </outputs> - <stdio> - <exit_code range="1:" level="fatal" description="Cufflinks Err" /> - </stdio> - <tests> <test> <!-- cuffdiff cuffcompare_out5.gtf cuffdiff_in1.sam cuffdiff_in2.sam --> + <!-- + NOTE: as of version 0.0.6 of the wrapper, tests cannot be run because multiple inputs to a repeat + element are not supported. <param name="gtf_input" value="cuffcompare_out5.gtf" ftype="gtf" /> <param name="do_groups" value="No" /> <param name="aligned_reads1" value="cuffdiff_in1.sam" ftype="sam" /> <param name="aligned_reads2" value="cuffdiff_in2.sam" ftype="sam" /> - <!-- Defaults. --> <param name="fdr" value="0.05" /> <param name="min_alignment_count" value="0" /> <param name="do_bias_correction" value="No" /> <param name="do_normalization" value="No" /> <param name="multiread_correct" value="No"/> <param name="sAdditional" value="No"/> - <!-- - Line diffs are needed because cuffdiff does not produce deterministic output. - TODO: can we find datasets that lead to deterministic behavior? - --> <output name="splicing_diff" file="cuffdiff_out9.txt"/> <output name="promoters_diff" file="cuffdiff_out10.txt"/> <output name="cds_diff" file="cuffdiff_out11.txt"/> @@ -315,6 +230,7 @@ <output name="genes_fpkm_tracking" file="cuffdiff_out6.txt" lines_diff="200"/> <output name="isoforms_exp" file="cuffdiff_out1.txt" lines_diff="200"/> <output name="isoforms_fpkm_tracking" file="cuffdiff_out5.txt" lines_diff="200"/> + --> </test> </tests>