Mercurial > repos > iuc > nanocompore_sampcomp
view sampcomp.xml @ 3:b833eff63b10 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit c2985b5c91a932e175cbfaf9b54a76e23beff9e9"
author | iuc |
---|---|
date | Thu, 09 Jul 2020 02:38:37 -0400 |
parents | 25abc2c72ff9 |
children | c0fb573c1e8d |
line wrap: on
line source
<?xml version="1.0"?> <tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+galaxy0"> <description>to compare Nanopolished datasets</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <version_command><![CDATA[nanocompore --version]]></version_command> <command detect_errors="exit_code"><![CDATA[ ## initialize ## requires a minimum of 3 threads threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && ## same name pattern required #for $i, $current in enumerate($file1_rep) ln -s '$current.file' 'sample_1_${i}.tsv' && ln -s '$current.index' 'sample_1_${i}.tsv.idx' && #end for #for $i, $current in enumerate($file2_rep) ln -s '$current.file' 'sample_2_${i}.tsv' && ln -s '$current.index' 'sample_2_${i}.tsv.idx' && #end for ## run nanocompore sampcomp ## required --label1 '$label1' #set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))]) --file_list1 '$files1' --label2 '$label2' #set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))]) --file_list2 '$files2' --fasta '$fasta' ## optional #if $ap.bed --bed '$ap.bed' #end if --max_invalid_kmers_freq $ap.max_invalid_kmers_freq --min_coverage $ap.min_coverage --min_ref_length $ap.min_ref_length --comparison_methods '$ap.comparison_methods' --sequence_context $ap.sequence_context --sequence_context_weights '$ap.sequence_context_weights' --pvalue_thr $ap.pvalue_thr $ap.logit $ap.allow_warnings --outpath 'results' --nthreads \$threads --log_level debug && tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat' ]]></command> <inputs> <param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/> <repeat name="file1_rep" min="1" title="First condition files"> <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/> <param name="index" type="data" format="tabular" label="Select index file"/> </repeat> <param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/> <repeat name="file2_rep" min="1" title="Second condition files"> <param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/> <param name="index" type="data" format="tabular" label="Select index file"/> </repeat> <param argument="--fasta" type="data" format="fasta" label="Select reference mapping fasta"/> <section name="ap" title="Advanced parameters"> <param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/> <param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/> <param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/> <param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/> <param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods"> <option value="GMM" selected="true">GMM</option> <option value="KS" selected="true">KS</option> <option value="TT">TT</option> <option value="MW">MW</option> </param> <param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/> <param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values"> <option value="uniform" selected="true">Uniform</option> <option value="harmonic">Harmonic</option> </param> <param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/> <param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/> <param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/> <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)"> <option value="results" selected="true">Results</option> <option value="shift" selected="true">Shift stats</option> <option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option> <option value="log">Log</option> </param> </section> </inputs> <outputs> <data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results"> <filter>'results' in ap['out']</filter> </data> <data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats"> <filter>'shift' in ap['out']</filter> </data> <data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database"> <filter>'db' in ap['out']</filter> </data> <data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log"> <filter>'log' in ap['out']</filter> </data> </outputs> <tests> <!-- #1 --> <test expect_num_outputs="3"> <repeat name="file1_rep"> <param name="file" value="sample1.tsv"/> <param name="index" value="sample1.tsv.idx"/> </repeat> <repeat name="file2_rep"> <param name="file" value="sample2.tsv"/> <param name="index" value="sample2.tsv.idx"/> </repeat> <param name="fasta" value="reference.fa"/> <output name="out_results"> <assert_contents> <has_n_lines n="3"/> <has_text_matching expression="pos	chr.+"/> <has_text_matching expression="22102	NA.+"/> </assert_contents> </output> <output name="out_shift"> <assert_contents> <has_n_lines n="3"/> <has_text_matching expression="ref\_id	pos.+"/> <has_text_matching expression="chr	22102.+"/> </assert_contents> </output> <output name="out_db"> <assert_contents> <has_size value="5416960" delta="10000"/> </assert_contents> </output> </test> <!-- #2 --> <test expect_num_outputs="4"> <param name="label1" value="C1"/> <repeat name="file1_rep"> <param name="file" value="sample1.tsv"/> <param name="index" value="sample1.tsv.idx"/> </repeat> <param name="label2" value="C2"/> <repeat name="file2_rep"> <param name="file" value="sample2.tsv"/> <param name="index" value="sample2.tsv.idx"/> </repeat> <param name="fasta" value="reference.fa"/> <section name="ap"> <param name="max_invalid_kmers_freq" value="0.2"/> <param name="min_coverage" value="31"/> <param name="min_ref_length" value="101"/> <param name="comparison_methods" value="GMM,KS,TT,MW"/> <param name="sequence_context" value="1"/> <param name="sequence_context_weights" value="harmonic"/> <param name="pvalue_thr" value="0.06"/> <param name="logit" value="true"/> <param name="allow_warnings" value="true"/> <param name="out" value="results,shift,db,log"/> </section> <output name="out_results"> <assert_contents> <has_n_lines n="3"/> <has_text_matching expression="pos	chr.+"/> <has_text_matching expression="22102	NA.+"/> </assert_contents> </output> <output name="out_shift"> <assert_contents> <has_n_lines n="3"/> <has_text_matching expression="ref\_id	pos.+"/> <has_text_matching expression="chr	22102.+"/> </assert_contents> </output> <output name="out_db"> <assert_contents> <has_size value="5416960" delta="10000"/> </assert_contents> </output> <output name="out_log"> <assert_contents> <has_n_lines n="31"/> <has_text_matching expression=".+package\_name.+"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** @WID@ SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters. First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage. **Input** SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended. A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step. Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space. **Output** The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB. .. class:: infomark **References** @REFERENCES@ ]]></help> <expand macro="citations"/> </tool>