Mercurial > repos > iuc > polypolish
diff polypolish.xml @ 0:aaa868913641 draft
planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/polypolish commit 95f351736787f04c65e830cd9daf9c9c8521893a
author | iuc |
---|---|
date | Thu, 22 Sep 2022 07:51:48 +0000 |
parents | |
children | bd2a15dbcea1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polypolish.xml Thu Sep 22 07:51:48 2022 +0000 @@ -0,0 +1,422 @@ +<tool id="polypolish" name="Polypolish" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description> + Short-read polishing of long-read bacterial genome assemblies + </description> + <macros> + <import>macro.xml</import> + </macros> + <expand macro='xrefs'/> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="aggressive"><![CDATA[ + ln -s '$input.fasta_file' input_data && + #*====================================== + For single SAM + ======================================*# + #if $input.sam_data_type.sam_selector == 'single' + #if $input.sam_data_type.single_sam.ext == 'unsorted.bam' + samtools view -h $input.sam_data_type.single_sam > input_sam && + #elif $input.sam_data_type.single_sam.ext == 'sam' + ln -s $input.sam_data_type.single_sam input_sam && + #end if + polypolish input_data input_sam > '$polished_fasta' + #*====================================== + For paired SAM + ======================================*# + #elif $input.sam_data_type.sam_selector == 'paired' + #if $input.sam_data_type.R1_sam.ext == 'unsorted.bam' + samtools view -h $input.sam_data_type.R1_sam > sample_R1.sam && + #elif $input.sam_data_type.R1_sam.ext == 'sam' + ln -s '$input.sam_data_type.R1_sam' sample_R1.sam && + #end if + #if $input.sam_data_type.R2_sam.ext == 'unsorted.bam' + samtools view -h $input.sam_data_type.R2_sam > sample_R2.sam && + #elif $input.sam_data_type.R2_sam.ext == 'sam' + ln -s '$input.sam_data_type.R2_sam' sample_R2.sam && + #end if + #if $input.sam_data_type.insert_filter.filter_select == 'filter' + polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low' + --high '$input.sam_data_type.insert_filter.high' + --in1 sample_R1.sam + --in2 sample_R2.sam + --out1 'filtered_1.sam' + --out2 'filtered_2.sam' && + polypolish input_data 'filtered_1.sam' 'filtered_2.sam' > $polished_fasta + #else + polypolish input_data sample_R1.sam sample_R2.sam > $polished_fasta + #end if + #*====================================== + For multiple single-end SAM + ======================================*# + #elif $input.sam_data_type.sam_selector == 'multiple_single' + mkdir single_collection && + #for $value, $single_sam in enumerate($input.sam_data_type.single_collection): + #if $single_sam.ext == 'unsorted.bam' + samtools view -h $single_sam > 'single_collection/$(single_sam.element_identifier).sam' && + #elif $single_sam.ext == 'sam' + ln -s $single_sam 'single_collection/$(single_sam.element_identifier).$(single_sam.ext)' && + #end if + #end for + polypolish input_data single_collection/*.sam > '$polished_fasta' + #*====================================== + For multiple paired-end SAM + ======================================*# + #elif $input.sam_data_type.sam_selector == "multiple_paired" + mkdir paired_collection && + #for $value, $paired_sam in enumerate($input.sam_data_type.paired_collection): + #if $paired_sam.forward.ext == 'unsorted.bam' + samtools view -h $paired_sam.forward > 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' && + #else + ln -s '$paired_sam.forward' 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' && + #end if + #if $paired_sam.reverse.ext == 'unsorted.bam' + samtools view -h $paired_sam.reverse > 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' && + #else + ln -s '$paired_sam.reverse' 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' && + #end if + #if $input.sam_data_type.insert_filter.filter_select == 'filter' + polypolish_insert_filter.py --low '$input.sam_data_type.insert_filter.low' + --high '$input.sam_data_type.insert_filter.high' + --in1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier).sam' + --in2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier).sam' + --out1 'paired_collection/forward_input$(value)$(paired_sam.forward.element_identifier)_filtered.sam' + --out2 'paired_collection/reverse_input$(value)$(paired_sam.reverse.element_identifier)_filtered.sam' && + #end if + #end for + #*====================================== + Filtering option + ======================================*# + #if $input.sam_data_type.insert_filter.filter_select == 'filter' + polypolish input_data paired_collection/*_filtered.sam > '$polished_fasta' + #else + polypolish input_data paired_collection/*.sam > '$polished_fasta' + #end if + #end if + #*====================================== + For debug file output + ======================================*# + #if $options.debug == 'true' + --debug $debug_file + #end if + #*====================================== + For LOGFILE OUTPUT + ======================================*# + #if $options.keep_logfile == 'true' + | tee '$logfile' + #end if + ]]> + </command> + <inputs> + <section name="input" title="Input sequences" expanded="True"> + <param name="fasta_file" type="data" format="fasta" label="Select a draft genome for polishing" + help="Fasta sequence to be cleaned using short-reads data"/> + <conditional name="sam_data_type"> + <param name="sam_selector" type="select" label="Select aligned data to polish" help="Choose number of aligned sam/bam files. Need aligned file with all possible locations in aligner option"> + <option value="single">Single SAM/BAM file</option> + <option value="paired">Paired SAM/BAM files</option> + <option value="multiple_single">Multiple single SAM/BAM files</option> + <option value="multiple_paired">Multiple paired SAM/BAM files</option> + </param> + <when value="single"> + <param name="single_sam" type="data" format="sam,unsorted.bam" label="Select a SAM/BAM file" help="Specify dataset with only one SAM/BAM file"/> + </when> + <when value="paired"> + <param name="R1_sam" type="data" format="sam,unsorted.bam" label="Select forward SAM/BAM file" help="Specify the forward SAM/BAM files"/> + <param name="R2_sam" type="data" format="sam,unsorted.bam" label="Select reverse SAM/BAM file" help="Specify the reverse SAM/BAM files"/> + <expand macro="filter_option"/> + </when> + <when value="multiple_single"> + <param name="single_collection" format="sam,unsorted.bam" type="data_collection" collection_type="list" label="Single-end collection" help="Specify a list of single-end dataset"/> + </when> + <when value="multiple_paired"> + <param name="paired_collection" format="sam,unsorted.bam" type="data_collection" collection_type="list:paired" label="Single-end collection" help="Specify a list of single-end dataset"/> + <expand macro="filter_option"/> + </when> + </conditional> + </section> + <section name="options" title="Options" expanded="False"> + <param name="min_depth" argument="--min_depth" type="integer" min="0" value="5" label="Minimal depth" + help="A base must occur at least this many times in the pileup to be considered valid [default: 5]"/> + <param name="fraction_invalid" argument="--fraction_invalid" type="float" min="0" value="0.2" max="1" label="Minimal invalid fraction" + help="A base must make up less than this fraction of the read depth to be considered invalid [default: 0.2]"/> + <param name="max_errors" argument="--max_errors" type="integer" min="0" value="10" label="Number of mismatch/indels to ignore alignments" + help="Ignore alignments with more than this many mismatches and indels [default: 10]"/> + <param name="fraction_valid" argument="--fraction_valid" type="float" min="0" value="0.5" max="1" label="Minimal valid fraction" + help="A base must make up at least this fraction of the read depth to be considered valid [default: 0.5"/> + <param name="keep_logfile" type="boolean" truevalue="true" falsevalue="false" label="Keep log file"/> + <param name="debug" argument="--debug" type="boolean" truevalue="true" falsevalue="false" label="Keep per base information file"/> + </section> + </inputs> + <outputs> + <data name="polished_fasta" format="fasta" label="${tool.name} on ${on_string}: polished fasta"/> + <data name="debug_file" format="tabular" label="${tool.name} on ${on_string}: Per base informations"> + <filter> options['debug'] == True </filter> + </data> + <data name="logfile" format="txt" from_work_dir="output" label="${tool.name} on ${on_string}: log report"> + <filter> options['keep_logfile'] == True </filter> + </data> + </outputs> + <tests> + <!-- Test_1 with default values and single SAM --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="single"/> + <param name="single_sam" value="aligned_test_file/alignement_R1.sam"/> + </conditional> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_2 with default values and paired SAM --> + <test expect_num_outputs="2"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="paired"/> + <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/> + <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/> + </conditional> + </section> + <section name="options"> + <param name="debug" value="true"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + <output name="debug_file" value="debug_file_test_2.tsv"/> + </test> + <!-- Test_3 with default values and single-end multiple SAM --> + <test expect_num_outputs="2"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="multiple_single"/> + <param name="single_collection"> + <collection type="list"> + <element name="R1_sam" value="aligned_test_file/alignement_R1.sam" ftype="sam"/> + <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.sam" ftype="sam"/> + <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.sam" ftype="sam"/> + </collection> + </param> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="true"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + <output name="logfile" value="logfile_test_3.log" lines_diff="15"/> + </test> + <!-- Test_4 with default values and paired collection SAM --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="multiple_paired"/> + <param name="paired_collection"> + <collection type="list:paired"> + <element name="paired_1"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1.sam" ftype="sam"/> + <element name="reverse" value="aligned_test_file/alignement_R2.sam" ftype="sam"/> + </collection> + </element> + <element name="paired_2"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1_bis.sam" ftype="sam"/> + <element name="reverse" value="aligned_test_file/alignement_R2_bis.sam" ftype="sam"/> + </collection> + </element> + <element name="paired_3"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1_ter.sam" ftype="sam"/> + <element name="reverse" value="aligned_test_file/alignement_R2_ter.sam" ftype="sam"/> + </collection> + </element> + </collection> + </param> + </conditional> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_5 paired-end without filtering and whitout log file --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="paired"/> + <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/> + <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/> + </conditional> + </section> + <section name="options"> + <conditional name="insert_filter"> + <param name="filter_select" value="non_filter"/> + </conditional> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_6 paired-end with filter, user defined values and whitout log file --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="paired"/> + <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/> + <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/> + <conditional name="insert_filter"> + <param name="filter_select" value="filter"/> + <param name="low" value="1"/> + <param name="high" value="98.7"/> + </conditional> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_7 paired-end with all customized filters --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="paired"/> + <param name="R1_sam" value="aligned_test_file/alignement_R1.sam"/> + <param name="R2_sam" value="aligned_test_file/alignement_R2.sam"/> + <conditional name="insert_filter"> + <param name="filter_select" value="filter"/> + <param name="low" value="1.4"/> + <param name="high" value="96.6"/> + </conditional> + </conditional> + </section> + <section name="options"> + <param name="min_depth" value="10"/> + <param name="fraction_invalid" value="0.5"/> + <param name="max_errors" value="8"/> + <param name="fraction_valid" value="0.6"/> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_8 single with bam input --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="single"/> + <param name="single_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_9 paired-end with bam input --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="paired"/> + <param name="R1_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> + <param name="R2_sam" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_10 single collection with bam input --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="multiple_single"/> + <param name="single_collection"> + <collection type="list"> + <element name="R1_sam" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> + <element name="R1_bis_sam" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/> + <element name="R1_ter_sam" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/> + </collection> + </param> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + <!-- Test_11 paired-end with bam input --> + <test expect_num_outputs="1"> + <section name="input"> + <param name="fasta_file" value="contigs.fa"/> + <conditional name="sam_data_type"> + <param name="sam_selector" value="multiple_paired"/> + <param name="paired_collection"> + <collection type="list:paired"> + <element name="paired_1"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1.bam" ftype="unsorted.bam"/> + <element name="reverse" value="aligned_test_file/alignement_R2.bam" ftype="unsorted.bam"/> + </collection> + </element> + <element name="paired_2"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1_bis.bam" ftype="unsorted.bam"/> + <element name="reverse" value="aligned_test_file/alignement_R2_bis.bam" ftype="unsorted.bam"/> + </collection> + </element> + <element name="paired_3"> + <collection type="paired"> + <element name="forward" value="aligned_test_file/alignement_R1_ter.bam" ftype="unsorted.bam"/> + <element name="reverse" value="aligned_test_file/alignement_R2_ter.bam" ftype="unsorted.bam"/> + </collection> + </element> + </collection> + </param> + </conditional> + </section> + <section name="options"> + <param name="debug" value="false"/> + <param name="keep_logfile" value="false"/> + </section> + <output name="polished_fasta" value="polished.fasta"/> + </test> + </tests> + <help><![CDATA[ + **What it does** + Polypolish is a tool for polishing genome assemblies with short reads. + Polypolish uses SAM/BAM files where each read has been aligned to all possible locations (not just a single best location). + This allows it to repair errors in repeat regions that other alignment-based polishers cannot fix. + + **Polypolish pipeline steps** + 1. [Optional] Filter aligned reads + - Exclude some alignments based on their insert size + - This should reduce the number of excessive alignments, particularly near the edges of repeat sequences, improving Polypolish's ability to fix errors in those regions. + 2. Clean assembly with filtered reads + + **Inputs** + Polypolish need SAM/BAM input format obtain from aligner with option to keep all possible location + Polypolish take on or more assembly as input fasta. + It need also raw data reads in single or paired-end SAM/BAM format. + You can use multiple aligned data to polish the same assembly. + **WARNING It can only work if multiple location information is available in sam/bam files** + For example using bwa mem to align raw data before use, you need : + 1. To align each read data independantly (also for paired data) + 2. Set the option "Output all alignments for single-ends or unpaired paired-ends" in Select analysis mode>Set input/output options + - This allow multiple ailgnemnt output need to use polypolish + + + ]]></help> + <expand macro="citations"/> +</tool>