Mercurial > repos > menegidio > novoplasty
changeset 2:832bcbdc48b1 draft
Uploaded
author | menegidio |
---|---|
date | Thu, 17 Jun 2021 21:20:08 +0000 |
parents | 2d2d2f6017fe |
children | 4715df9a6358 |
files | novoplasty/.shed.yml novoplasty/novoplasty.xml |
diffstat | 2 files changed, 699 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novoplasty/.shed.yml Thu Jun 17 21:20:08 2021 +0000 @@ -0,0 +1,10 @@ +name: novoplasty +owner: menegidio +remote_repository_url: "github.com/menegidio-lab/mitogalaxy/tree/master/tools-shed/novoplasty" +homepage_url: "https://github.com/ndierckx/NOVOPlasty" +type: unrestricted +description: NOVOPlasty is a de novo assembler and heteroplasmy/variance caller for short circular genomes. +long_description: | + NOVOPlasty is a de novo assembler and heteroplasmy/variance caller for short circular genomes. +categories: +- "Assembly"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novoplasty/novoplasty.xml Thu Jun 17 21:20:08 2021 +0000 @@ -0,0 +1,689 @@ +<?xml version="1.0"?> +<tool id="novoplasty" name="NOVOplasty" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="18.01"> + <description>de novo assembler for short circular genomes</description> + <macros> + <token name="@TOOL_VERSION@">4.2</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="content" token_min="" token_max=""> + <param name="genome_range_min" type="text" value="@MIN@" label="Set lower limit for the expected genome size range" help="(Genome Range)"/> + <param name="genome_range_max" type="text" value="@MAX@" label="Set upper limit for the expected genome size range" help="(Genome Range)"/> + </xml> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">novoplasty</requirement> + </requirements> + <version_command><![CDATA[NOVOPlasty.pl 2>&1 | grep 'Version' | awk -F' ' '{print $2}']]></version_command> + <command detect_errors="exit_code"><![CDATA[ +## initialize +## samples require extension .fasta or .fasta.gz +#if $reads_cond.reads_sel == 'combined' + ln -s '$reads_cond.combined_reads' 'combined.${reads_cond.combined_reads.datatype.file_ext}' && +#else if $reads_cond.reads_sel == 'separate' + ln -s '$reads_cond.forward_reads' 'forward.${reads_cond.forward_reads.datatype.file_ext}' && + ln -s '$reads_cond.reverse_reads' 'reverse.${reads_cond.reverse_reads.datatype.file_ext}' && +#end if + +## run +NOVOPlasty.pl -c '$config' + +## postprocessing +&& cp '$config' 'config.txt' + ]]></command> + <configfiles> + <configfile name="config"><![CDATA[ +Project: +----------------------- +Project name = result +Type = ${type_cond.type_sel} +Genome Range = ${type_cond.genome_range_min}-${type_cond.genome_range_max} +K-mer = ${kmer} +Max memory = +Extended log = 1 +Save assembled reads = ${save_assembled_reads} +Seed Input = ${seed_input} +Extend seed directly = ${extend_seed_directly} +Reference sequence = #if $reference then $reference else ''# +Variance detection = #if $platform_cond.platform_sel == 'illumina' and $platform_cond.mode_cond.mode_sel == 'variance' then 'yes' else ''# +Heteroplasmy = +MAF = #if $platform_cond.platform_sel == 'illumina' and $platform_cond.mode_cond.mode_sel == 'heteroplasmy' then $platform_cond.mode_cond.maf else ''# +HP exclude list = +Chloroplast sequence = #if $type_cond.type_sel == 'mito_plant' and $type_cond.chloroplast_sequence then $type_cond.chloroplast_sequence else ''# + +Dataset 1: +----------------------- +Read Length = ${read_length} +Insert size = ${insert_size} +Platform = ${$platform_cond.platform_sel} +Single/Paired = ${single_paired} +Combined reads = #if $reads_cond.reads_sel == 'combined' then 'combined.'+$reads_cond.combined_reads.datatype.file_ext else ''# +Forward reads = #if $reads_cond.reads_sel == 'separate' then 'forward.'+$reads_cond.forward_reads.datatype.file_ext else ''# +Reverse reads = #if $reads_cond.reads_sel == 'separate' then 'reverse.'+$reads_cond.reverse_reads.datatype.file_ext else ''# + +Optional: +----------------------- +Insert size auto = ${insert_size_auto} +Use Quality Scores = ${use_quality_scores} + ]]></configfile> + </configfiles> + <inputs> + <conditional name="reads_cond"> + <param name="reads_sel" type="select" label="Select read file type" help="(Combined reads/Forward reads/Reverse reads)"> + <option value="combined">Combined reads</option> + <option value="separate">Separate reads</option> + </param> + <when value="combined"> + <param name="combined_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Select file with combined reads" help="(Combined reads)"/> + </when> + <when value="separate"> + <param name="forward_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Select file with forward reads" help="(Forward reads)"/> + <param name="reverse_reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Select file with reverse reads" help="(Reverse reads)"/> + </when> + <section name="advanced" title="Advanced options"> + </conditional> + <!-- seed_input doesn't support fasta.gz, throws error INVALID SEED, PLEASE TRY AGAIN WITH A NEW ONE --> + <param name="seed_input" type="data" format="fasta" label="Select file with seed sequence" help="(Seed Input)"/> + <param name="reference" type="data" format="fasta,fasta.gz" optional="true" label="Select reference file" help="(Reference sequence)"/> + <conditional name="platform_cond"> + <param name="platform_sel" type="select" label="Select platform" help="(Platform)"> + <option value="illumina" selected="true">Illumina</option> + <option value="ion">ION</option> + </param> + <when value="illumina"> + <conditional name="mode_cond"> + <param name="mode_sel" type="select" label="Select mode" help="(Variance detection, Heteroplasmy)"> + <option value="none" selected="true">None</option> + <option value="variance">Variance detection</option> + <option value="heteroplasmy">Heteroplasmy</option> + </param> + <when value="none"/> + <when value="variance"/> + <when value="heteroplasmy"> + <param name="maf" type="float" value="0.007" min="0.007" max="0.49" label="Set minimum minor allele frequency" help="If you want to detect heteroplasmy, first assemble the genome without this option. Then give the resulting sequence as a reference and as a seed input. (Heteroplasmy)"/> + </when> + </conditional> + </when> + <when value="ion"/> + </conditional> + <conditional name="type_cond"> + <param name="type_sel" type="select" label="Select assembly type" help="(Type)"> + <option value="chloro" selected="true">Chloroplast assembly (chloro)</option> + <option value="mito">Mitochondrial assembly (mito)</option> + <option value="mito_plant">Mitochondrial assembly in plants (mito_plant)</option> + </param> + <when value="chloro"> + <expand macro="content" min="120000" max="200000"/> + </when> + <when value="mito"> + <expand macro="content" min="12000" max="20000"/> + </when> + <when value="mito_plant"> + <expand macro="content" min="12000" max="20000"/> + <param name="chloroplast_sequence" type="data" format="fasta,fasta.gz" optional="true" label="Select chloroplast sequence" help="Chloroplast needs to be assembled before mitochondria. (Chloroplast sequence)"/> + </when> + </conditional> + <param name="read_length" type="integer" value="151" label="Set read length" help="(Read Length)"/> + <param name="insert_size" type="integer" value="300" label="Set total insert size of your paired end reads" help="It doesn't have to be accurate but should be close enough. (Insert size)"/> + <param name="single_paired" type="select" label="Select read type" help="(Single/Paired)"> + <option value="PE" selected="true">Paired</option> + <option value="SE">Single</option> + </param> + <param name="extend_seed_directly" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Extend seeds instead of finding matching reads" help="Only use this when your seed originates from the same sample and there are no possible mismatches. (Extend seed directly)"/> + <param name="kmer" type="integer" value="39" min="1" label="Set length of overlap between matching reads" help="If reads are shorter then 90 bp or you have low coverage data, this value should be decreased down to 23. For reads longer then 101 bp, this value can be increased, but this is not necessary."/> + <!-- Assembled reads are saved anyway. If this option is selected, the parameter is set to "2", otherwise "yes" --> + <param name="save_assembled_reads" type="boolean" truevalue="2" falsevalue="yes" label="Retain original IDs when saving assemled reads?"/> + <param name="insert_size_auto" type="boolean" truevalue="yes" falsevalue="no" checked="true" label="Finetune your insert size automatically?" help="(Insert size auto)"/> + <param name="use_quality_scores" type="boolean" truevalue="yes" falsevalue="no" label="Take quality scores into account?" help="Only use this when reads have low quality, like with the 300 bp reads of Illumina. (Use Quality Scores)"/> + </section> + <param name="out" type="select" multiple="true" optional="false" label="Select output file(s)" help="Only shown in history if selected here and generated by the specific NOVOplasty run."> + <!-- Assembly --> + <option value="ar" selected="true">Assembled reads</option> + <option value="c" selected="true">Contigs</option> + <option value="ca" selected="true">Circularized assembly</option> + <option value="cc" selected="true">Contig combinations</option> + <option value="ct" selected="true">Contigs Tmp</option> + <option value="mc" selected="true">Merged contigs</option> + <option value="ua" selected="true">Uncircularized assemblies</option> + <!-- Variance --> + <option value="v" selected="true">Variance</option> + <!-- Heteroplasmy --> + <option value="h" selected="true">Heteroplasmy</option> + <option value="ha" selected="true">Heteroplasmy assemblies</option> + <option value="lth" selected="true">Linkage table heteroplasmy</option> + <option value="pn" selected="true">Possible NUMTs</option> + <option value="pna" selected="true">Possible NUMTs assemblies</option> + <option value="ltn" selected="true">Linkage table NUMTs</option> + <option value="cm" selected="true">Circos mutations</option> + <option value="cl" selected="true">Circos links</option> + <!-- Others --> + <option value="config">Config file</option> + <option value="el">Extended log</option> + <option value="l">Log</option> + </param> + </inputs> + <outputs> + <!-- Assembly --> + <data name="out_c" format="fasta" from_work_dir="Contigs_1_result.fasta" label="${tool.name} on ${on_string}: Contigs"> + <filter>'c' in out</filter> + </data> + <data name="out_ct" format="txt" from_work_dir="contigs_tmp_result.txt" label="${tool.name} on ${on_string}: Contigs Tmp"> + <filter>'ct' in out</filter> + </data> + <data name="out_mc" format="txt" from_work_dir="Merged_contigs_result.txt" label="${tool.name} on ${on_string}: Merged Contigs"> + <filter>'mc' in out</filter> + </data> + <data name="out_cc" format="txt"> + <discover_datasets pattern="Option\_(?P<designation>.+)\_result\.txt" format="tabular" directory="" visible="true"/> + <filter>'cc' in out</filter> + </data> + <data name="out_ar1" format="fasta" from_work_dir="Assembled_reads_result_R1.fasta" label="${tool.name} on ${on_string}: Assembled reads R1"> + <filter>'ar' in out</filter> + </data> + <data name="out_ar2" format="fasta" from_work_dir="Assembled_reads_result_R2.fasta" label="${tool.name} on ${on_string}: Assembled reads R2"> + <filter>'ar' in out</filter> + </data> + <data name="out_ua" format="fasta" from_work_dir="Uncircularized_assemblies_1_result.fasta" label="${tool.name} on ${on_string}: Uncircularized assemblies"> + <filter>'ua' in out</filter> + </data> + <data name="out_ca" format="fasta" from_work_dir="Circularized_assembly_1_result.fasta" label="${tool.name} on ${on_string}: Circularized assembly"> + <filter>'ca' in out</filter> + </data> + <!-- Variance --> + <data name="out_v" format="vcf" from_work_dir="Variance_result.vcf" label="${tool.name} on ${on_string}: Variance"> + <filter>'illumina' == platform_cond['platform_sel'] and 'variance' == platform_cond['mode_cond']['mode_sel'] and 'v' in out</filter> + </data> + <!-- Heteroplasmy --> + <data name="out_h" format="vcf" from_work_dir="Heteroplasmy_result.vcf" label="${tool.name} on ${on_string}: Heteroplasmy"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'h' in out</filter> + </data> + <data name="out_ha" format="fasta" from_work_dir="Heteroplasmy_assemblies_result.fasta" label="${tool.name} on ${on_string}: Heteroplasmy assemblies"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'ha' in out</filter> + </data> + <data name="out_lth" format="tabular" from_work_dir="Linkage_table_heteroplasmy_result.txt" label="${tool.name} on ${on_string}: Linkage table heteroplasmy"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'lth' in out</filter> + </data> + <data name="out_pn" format="vcf" from_work_dir="Possible_NUMTs_result.vcf" label="${tool.name} on ${on_string}: Possible NUMTs"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'pn' in out</filter> + </data> + <data name="out_pna" format="fasta" from_work_dir="Possible_NUMTs_assemblies_result.fasta" label="${tool.name} on ${on_string}: Possible_NUMTs_assemblies_project.fasta"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'pna' in out</filter> + </data> + <data name="out_ltn" format="tabular" from_work_dir="Linkage_table_NUMTs_result.txt" label="${tool.name} on ${on_string}: Linkage table NUMTs"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'ltn' in out</filter> + </data> + <data name="out_cm" format="txt" from_work_dir="Circos_mutations_result.txt" label="${tool.name} on ${on_string}: Circos mutations"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'cm' in out</filter> + </data> + <data name="out_cl" format="txt" from_work_dir="Circos_links_result.txt" label="${tool.name} on ${on_string}: Circos links"> + <filter>'heteroplasmy' == platform_cond['mode_cond']['mode_sel'] and 'cl' in out</filter> + </data> + <!-- Others --> + <data name="out_l" format="txt" from_work_dir="log_result.txt" label="${tool.name} on ${on_string}: Log"> + <filter>'l' in out</filter> + </data> + <data name="out_el" format="txt" from_work_dir="log_extended_result.txt" label="${tool.name} on ${on_string}: Extended log"> + <filter>'el' in out</filter> + </data> + <data name="out_config" format="txt" from_work_dir="config.txt" label="${tool.name} on ${on_string}: Config file"> + <filter>'config' in out</filter> + </data> + </outputs> + <tests> + <!-- #1 mito, default, separate, compressed inputs --> + <test expect_num_outputs="8"> + <conditional name="reads_cond"> + <param name="reads_sel" value="separate"/> + <param name="forward_reads" value="forward.fasta.gz"/> + <param name="reverse_reads" value="reverse.fasta.gz"/> + </conditional> + <conditional name="type_cond"> + <param name="type_sel" value="mito"/> + </conditional> + <!-- fasta.gz not supported --> + <param name="seed_input" value="seed.fasta"/> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="110"/> + <has_line line=">Contig01+seed"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">seed"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">676/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">676/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <!-- #2 mito, combined, compressed inputs --> + <test expect_num_outputs="11"> + <conditional name="reads_cond"> + <param name="reads_sel" value="combined"/> + <param name="combined_reads" value="combined.fasta.gz"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <param name="reference" value="reference.fasta.gz"/> + <conditional name="type_cond"> + <param name="type_sel" value="mito"/> + <param name="genome_range_min" value="12001"/> + <param name="genome_range_max" value="20001"/> + </conditional> + <param name="read_length" value="150"/> + <param name="insert_size" value="301"/> + <param name="extend_seed_directly" value="no"/> + <param name="kmer" value="40"/> + <param name="save_assembled_reads" value="2"/> + <param name="insert_size_auto" value="yes"/> + <param name="use_quality_scores" value="yes"/> + <param name="out" value="c,ca,ua,mc,cc,ct,ar,l,el,config"/> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">4191"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="142"/> + <has_line line=">38326//1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="142"/> + <has_line line=">38326//2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="13"/> + <has_text_matching expression=">Contig01.+"/> + </assert_contents> + </output> + <!-- Others --> + <output name="out_l"> + <assert_contents> + <has_text_matching expression=".+Assembly 1 finished.+"/> + </assert_contents> + </output> + <output name="out_el"> + <assert_contents> + <has_text_matching expression=".+Assembly 1 finished.+"/> + </assert_contents> + </output> + <output name="out_config"> + <assert_contents> + <has_n_lines n="34"/> + <has_line line="Project:"/> + <has_text_matching expression=".+"/> + </assert_contents> + </output> + </test> + <!-- #3 mito, variance, decompressed inputs --> + <test expect_num_outputs="12"> + <conditional name="reads_cond"> + <param name="reads_sel" value="combined"/> + <param name="combined_reads" value="combined.fasta"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <param name="reference" value="reference.fasta"/> + <conditional name="platform_cond"> + <param name="platform_sel" value="illumina"/> + <conditional name="mode_cond"> + <param name="mode_sel" value="variance"/> + </conditional> + </conditional> + <conditional name="type_cond"> + <param name="type_sel" value="mito"/> + </conditional> + <param name="out" value="c,ca,ua,mc,cc,ct,ar,v,l,el,config"/> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">seed"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="110"/> + <has_text_matching expression=">Contig01.+"/> + </assert_contents> + </output> + <!-- Variance --> + <output name="out_v"> + <assert_contents> + <has_n_lines n="6"/> + <has_line line="##fileformat=VCFv4.0"/> + <has_text_matching expression="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"/> + </assert_contents> + </output> + <!-- Others --> + <output name="out_l"> + <assert_contents> + <has_text_matching expression=".+Assembly 1 finished.+"/> + </assert_contents> + </output> + <output name="out_el"> + <assert_contents> + <has_text_matching expression=".+Assembly 1 finished.+"/> + </assert_contents> + </output> + <output name="out_config"> + <assert_contents> + <has_n_lines n="34"/> + <has_line line="Project:"/> + <has_text_matching expression=".+"/> + </assert_contents> + </output> + </test> + <!-- #4 mito, untested parameters, compressed inputs --> + <test expect_num_outputs="16"> + <conditional name="reads_cond"> + <param name="reads_sel" value="separate"/> + <param name="forward_reads" value="forward.fasta.gz"/> + <param name="reverse_reads" value="reverse.fasta.gz"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <conditional name="platform_cond"> + <param name="platform_sel" value="ion"/> + </conditional> + <conditional name="type_cond"> + <param name="type_sel" value="mito"/> + </conditional> + <param name="single_paired" value="SE"/> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="110"/> + <has_line line=">Contig01+seed"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">seed"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">676/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">676/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <!-- #5 mito, heteroplasmy --> + <test expect_num_outputs="16"> + <conditional name="reads_cond"> + <param name="reads_sel" value="combined"/> + <param name="combined_reads" value="combined.fasta"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <param name="reference" value="reference.fasta"/> + <conditional name="platform_cond"> + <param name="platform_sel" value="illumina"/> + <conditional name="mode_cond"> + <param name="mode_sel" value="heteroplasmy"/> + <param name="maf" value="0.008"/> + </conditional> + </conditional> + <conditional name="type_cond"> + <param name="type_sel" value="mito"/> + </conditional> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">332/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">332/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <!-- Heteroplasmy --> + <output name="out_h"> + <assert_contents> + <has_n_lines n="8"/> + <has_text_matching expression="#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO"/> + </assert_contents> + </output> + <output name="out_ha"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_lth"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_pn"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_pna"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ltn"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_cm"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_cl"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <!-- #6 chloro, default --> + <test expect_num_outputs="8"> + <conditional name="reads_cond"> + <param name="reads_sel" value="combined"/> + <param name="combined_reads" value="combined.fasta"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <conditional name="type_cond"> + <param name="type_sel" value="chloro"/> + </conditional> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="110"/> + <has_line line=">Contig01+seed"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + <has_line line=">seed"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <!-- #7 mito_plant, default --> + <test expect_num_outputs="8"> + <conditional name="reads_cond"> + <param name="reads_sel" value="combined"/> + <param name="combined_reads" value="combined.fasta"/> + </conditional> + <param name="seed_input" value="seed.fasta"/> + <param name="reference" value="reference.fasta"/> + <conditional name="type_cond"> + <param name="type_sel" value="mito_plant"/> + <param name="chloroplast_sequence" value="chloroplast.fasta"/> + </conditional> + <!-- Assembly --> + <output name="out_c"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="out_ct"> + <assert_contents> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output name="out_ar1"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/1"/> + </assert_contents> + </output> + <output name="out_ar2"> + <assert_contents> + <has_n_lines n="10"/> + <has_line line=">20/2"/> + </assert_contents> + </output> + <output name="out_ua"> + <assert_contents> + <has_text_matching expression=">Contig01.+"/> + <has_n_lines n="110"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +*NOVOPlasty* is a de novo assembler and heteroplasmy/variance caller for short circular genomes. + +**Input** + +- suitable seed: There are different types of seed possible: A single read from the dataset that originates from the organelle genome. A organelle sequence derived from the same or a related species. A complete organelle sequence of a more distant species (recommended when there is no close related sequence available) +- input reads: Either two separate files(forward and reverse) or a merged fastq/fasta file. Multiple libraries as input is not yet supported. There is also an Ion Torrent option, but it does not produce the best results. +- reference: The assembly will still be de novo, but references of the same genus can be used as a guide to resolve duplicated regions in the plant mitochondria or the inverted repeat in the chloroplast. + +**Output** + +*Assembly* + +- Contigs: This file contains all the assembled contigs. +- Circularized assembly: When NOVOPlasty is able to circularize one contig, without any additional contigs being produced, it will just output this circularized fasta file. +- Uncircularized assembly +- Merged contigs: When there are multiple contigs, NOVOPlasty will try to combine all contigs in to a complete circular genome, all the different possibilities can be found in this file. +- Contigs Tmp: If non of the above files are outputted or are empty, you can retrieve some contigs from this file. + +*Variance* + +- Variance: When the variance detection option is selected, an additional VCF file will be outputted. + +*Heteroplasmy* + +- Heteroplasmy: This VCF file contains all the detected heteroplasmy positions. +- Heteroplasmy assemblies: NOVOPlasty will assemble around each position of the above VCF output. The resulting assemblies will be outputted in this fasta file. +- Linkage table heteroplasmy: This file contains all the polymorphisms that are fully/partially/not linked with each detected heteroplasmic position. +- Possible NUMTs / Possible NUMTs assemblies / Linkage table NUMTs: These are the same files as the above three, but then for all the polymorphisms that were identified as NUMT origin. +- Circos mutations / Circos links: These files can be used to draw figures with Circos. + +*Others* + +- Log: This is a basic log file with the information that shows up on your terminal. +- Extended log: An elaborate log file. +- Config file + +.. class:: infomark + +**References** + +More information are available on `GitHub <https://github.com/ndierckx/NOVOPlasty/>`_. + ]]></help> + <citations> + <citation type="doi">10.1093/nar/gkw955</citation> + <citation type="doi">10.1093/nargab/lqz011</citation> + </citations> +</tool>