comparison edena_ass_wrapper.xml @ 2:b8c6a38530eb draft default tip

Support Edena v. 3.131028 (new <version_command>, official overlapping log file, covStats output file removed, -lph and -sph options instead of -peHorizon). Use $GALAXY_SLOTS instead of $EDENA _SITE_OPTIONS. Directly call edena, remove edena_ovl_wrapper.py and edena_ass_wrapper.py . Discard stderr instead of redirecting to stdout. Do not redirect stdout to logfile. Add readme.rst .
author crs4
date Fri, 31 Jan 2014 12:08:21 -0500
parents cd6cc6d76708
children
comparison
equal deleted inserted replaced
1:cd6cc6d76708 2:b8c6a38530eb
1 <tool id="edena_ass_wrapper" name="Edena (assembling)" version="0.2.1"> 1 <tool id="edena_ass_wrapper" name="Edena (assembling)" version="0.3">
2 <description></description> 2 <description></description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="3.130110">edena</requirement> 4 <requirement type="package" version="3.131028">edena</requirement>
5 </requirements> 5 </requirements>
6 <version_command>edena -v</version_command> 6 <version_command>edena | head -n 1</version_command>
7 <command interpreter="python"> 7 <command>
8 edena_ass_wrapper.py --ovl_input=$ovl_input 8 edena -e $ovl_input
9 #if str($overlapCutoff) 9 #if str($overlapCutoff)
10 --overlapCutoff=$overlapCutoff 10 -m $overlapCutoff
11 #end if 11 #end if
12 #if $cc 12 #if $cc
13 --cc 13 -cc yes
14 #else
15 -cc no
14 #end if 16 #end if
15 #if $discardNonUsable 17 #if $discardNonUsable
16 --discardNonUsable 18 -discardNonUsable yes
19 #else
20 -discardNonUsable no
17 #end if 21 #end if
18 #if str($minContigSize) 22 #if str($minContigSize)
19 --minContigSize=$minContigSize 23 -c $minContigSize
20 #end if 24 #end if
21 #if str($minCoverage) 25 #if str($minCoverage)
22 --minCoverage=$minCoverage 26 -minCoverage $minCoverage
23 #end if 27 #end if
24 #if str($trim) 28 #if str($trim)
25 --trim=$trim 29 -trim $trim
26 #end if 30 #end if
27 #if str($peHorizon) 31 #if str($sph)
28 --peHorizon=$peHorizon 32 -sph $sph
29 #end if 33 #end if
30 --covStats=$covStats --out_contigs_cov=$out_contigs_cov --out_contigs_fasta=$out_contigs_fasta --out_contigs_lay=$out_contigs_lay --out_log_txt=$out_log_txt --out_nodesInfo=$out_nodesInfo --out_nodesPosition=$out_nodesPosition 34 #if str($lph)
31 --logfile=$logfile 35 -lph $lph
36 #end if
37 2&gt;/dev/null ## need to discard stderr because edena writes some progress info there (e.g. "Condensing overlaps graph...")
32 </command> 38 </command>
33 39
34 <inputs> 40 <inputs>
35 <param name="ovl_input" type="data" format="ovl" label="Edena .ovl file (-e)" help="Specify here the Edena “.ovl” file obtained from the overlapping step" /> 41 <param name="ovl_input" type="data" format="ovl" label="Edena overlap (.ovl) file (-e)" help="Specify here the Edena “.ovl” file obtained from the overlapping step" />
36 42
37 <param name="overlapCutoff" type="integer" value="" optional="true" label="Overlap cutoff (-m)" help="The overlap cutoff is by default set to half of the reads length L (see the log output by the overlapping step to identify it). It is however still worth trying to increase this setting since it can greatly simplify highly connected overlaps graphs, and thus speed up the assembly. If one step during the assembly hangs, increasing the overlap cutoff is the first thing to do." /> 43 <param name="overlapCutoff" type="integer" value="" optional="true" label="Overlap cutoff (-m)" help="The overlap cutoff is by default set to half of the reads length L (see the log output by the overlapping step to identify it). It is however still worth trying to increase this setting since it can greatly simplify highly connected overlaps graphs, and thus speed up the assembly. If one step during the assembly hangs, increasing the overlap cutoff is the first thing to do." />
38 44
39 <param name="cc" type="boolean" checked="true" label="Contextual cleaning (-cc)" help="This option is enabled by default. Contextual cleaning is a procedure that efficiently identifies and removes false positive edges, improving thus the assembly. This procedure can be seen as a dynamic overlap cutoff on the overlaps graph. It is possible however for this step to be slow on ultra-high covered sequencing data. In such cases, try to increase the overlap cutoff value, or to simply disable this option." /> 45 <param name="cc" type="boolean" checked="true" label="Contextual cleaning of spurious edges (-cc)" help="Contextual cleaning is a procedure that efficiently identifies and removes false positive edges, improving thus the assembly. This procedure can be seen as a dynamic overlap cutoff on the overlaps graph. It is possible however for this step to be slow on ultra-high covered sequencing data. In such cases, try to increase the overlap cutoff value, or to simply disable this option." />
40 46
41 <param name="discardNonUsable" type="boolean" checked="true" label="Discard non usable nodes (-discardNonUsable)" help="Enabled by default, this procedure discards nodes smaller than 1.5*readLength and that are not connected to any other nodes." /> 47 <param name="discardNonUsable" type="boolean" checked="true" label="Discard non usable nodes (-discardNonUsable)" help="This procedure discards orphan nodes smaller than 1.5*readLength." />
42 48
43 <param name="minContigSize" type="integer" value="" optional="true" label="Minimum size of the contigs to output (-c)" help="If not specified, this value is set to 1.5*readLength." /> 49 <param name="minContigSize" type="integer" value="" optional="true" label="Minimum size of the contigs to output (-c)" help="If not specified, this value is set to 1.5*readLength." />
44 50
45 <param name="minCoverage" type="float" value="" optional="true" label="Minimum required coverage for the contigs (-minCoverage)" help="If not specified, this value is automatically determined from the nodes coverage distribution. This estimation however supposes a uniform coverage. It could be worth overriding this parameter in some cases, i.e. with transcriptome data, or a mix of PCR product assemblies." /> 51 <param name="minCoverage" type="float" value="" optional="true" label="Minimum required coverage for the contigs (-minCoverage)" help="If not specified, this value is automatically determined from the nodes coverage distribution. This estimation however supposes a uniform coverage. It could be worth overriding this parameter in some cases, i.e. with transcriptome data, or a mix of PCR product assemblies." />
46 52
47 <param name="trim" type="integer" value="4" optional="true" label="Coverage cutoff for contigs ends (-trim)" help="Contig interruptions are caused either because of a non-resolved ambiguity, or because of a lack of overlapping reads. In the latter case, the contig end may be inaccurate. This option will trim such ends until a minimum coverage is reached. By default, this value is set to 4. To disable contigs ends trimming, set this value to 1." /> 53 <param name="trim" type="integer" value="4" optional="true" label="Coverage cutoff for contigs ends (-trim)" help="Contig interruptions are caused either because of a non-resolved ambiguity, or because of a lack of overlapping reads. In the latter case, the contig end may be inaccurate. This option will trim a few bases from these ends until a minimum coverage is reached. By default, this value is set to 4. To disable contigs ends trimming, set this value to 1." />
48 54 <param name="sph" type="integer" value="1000" optional="true" label="Maximum search distance for paired-end (forward-reverse) sampling (-sph)" help="Edena samples the overlaps graph to accurately determine the paired distance distribution. This parameter specifies the maximum distance that is searched during this sampling. This value has to be set to at least 2X the expected size of the longest paired-end library." />
49 <param name="peHorizon" type="integer" value="" optional="true" label="Maximum search distance for paired-end reads connection (-peHorizon)" help="Edena samples the overlaps graph to accurately determine the paired distance distribution. This parameter specifies the maximum distance that is searched during this sampling. By default, this value is set to 1000 if solely direct-reverse mates are used and 10000 if reverse-direct mates are also used. This value has to be set to at least 2X the expected size of the longest mate library." /> 55 <param name="lph" type="integer" value="15000" optional="true" label="Maximum search distance for mate-pair (reverse-forward) sampling (-lph)" help="Edena samples the overlaps graph to accurately determine the paired distance distribution. This parameter specifies the maximum distance that is searched during this sampling. This value has to be set to at least 2X the expected size of the longest mate-pair library." />
50 </inputs> 56 </inputs>
51 57
52 <outputs> 58 <outputs>
53 <data name="covStats" format="tabular" label="${tool.name} on ${on_string}: CovStats" /> 59 <data name="out_contigs_cov" format="txt" label="${tool.name} on ${on_string}: ContigsCov" from_work_dir="out_contigs.cov" />
54 <data name="out_contigs_cov" format="txt" label="${tool.name} on ${on_string}: ContigsCov" /> 60 <data name="out_contigs_fasta" format="fasta" label="${tool.name} on ${on_string}: ContigsFasta" from_work_dir="out_contigs.fasta" />
55 <data name="out_contigs_fasta" format="fasta" label="${tool.name} on ${on_string}: ContigsFasta" /> 61 <data name="out_contigs_lay" format="txt" label="${tool.name} on ${on_string}: ContigsLay" from_work_dir="out_contigs.lay" />
56 <data name="out_contigs_lay" format="txt" label="${tool.name} on ${on_string}: ContigsLay" /> 62 <data name="out_log_txt" format="txt" label="${tool.name} on ${on_string}: log" from_work_dir="out_assembling.log" />
57 <data name="out_log_txt" format="txt" label="${tool.name} on ${on_string}: log" /> 63 <data name="out_nodesInfo" format="txt" label="${tool.name} on ${on_string}: nodes info" from_work_dir="out_nodesInfo" />
58 <data name="out_nodesInfo" format="txt" label="${tool.name} on ${on_string}: nodes info" /> 64 <data name="out_nodesPosition" format="txt" label="${tool.name} on ${on_string}: nodes position" from_work_dir="out_nodesPosition" />
59 <data name="out_nodesPosition" format="txt" label="${tool.name} on ${on_string}: nodes position" />
60 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log (terminal)" />
61 </outputs> 65 </outputs>
62 66
63 <tests> 67 <tests>
64 68
65 </tests> 69 </tests>