comparison sculpt_sequences.xml @ 1:ab59c68b6985 draft default tip

planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit db4ac861e1d03fcdfe94321d858839124e493930-dirty
author tduigou
date Wed, 23 Jul 2025 09:47:44 +0000
parents a05746a5560f
children
comparison
equal deleted inserted replaced
0:a05746a5560f 1:ab59c68b6985
1 <tool id="sculpt_sequences" name="Sculpt Sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09"> 1 <tool id="sculpt_sequences" name="Sculpt Sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
2 <description>Optimize DNA sequences</description> 2 <description>Optimize DNA sequences</description>
3 <macros> 3 <macros>
4 <token name="@VERSION_SUFFIX@">0</token> 4 <token name="@VERSION_SUFFIX@">1</token>
5 <token name="@TOOL_VERSION@">0.1.0</token> 5 <token name="@TOOL_VERSION@">0.2.0</token>
6 </macros> 6 </macros>
7 <requirements> 7 <requirements>
8 <requirement type="package" version="0.1.11">flametree</requirement> 8 <requirement type="package" version="0.1.11">flametree</requirement>
9 <requirement type="package" version="1.85"> biopython </requirement> 9 <requirement type="package" version="1.85"> biopython </requirement>
10 <requirement type="package" version="0.1.10">proglog</requirement> 10 <requirement type="package" version="0.1.10">proglog</requirement>
16 <requirement type="package" version="0.3.9">pdf-reports</requirement> 16 <requirement type="package" version="0.3.9">pdf-reports</requirement>
17 <requirement type="package" version="0.1.8">sequenticon</requirement> 17 <requirement type="package" version="0.1.8">sequenticon</requirement>
18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement> 18 <requirement type="package" version="3.1.5">dna_features_viewer</requirement>
19 </requirements> 19 </requirements>
20 <command detect_errors="exit_code"><![CDATA[ 20 <command detect_errors="exit_code"><![CDATA[
21 #set avoid_patterns_list = [] 21 #if str($json_use.use_json_param) == "false":
22 #for $p in $rep_avoid_pattern 22 #set avoid_list = [line.strip() for line in str($avoid_patterns).strip().split('\n') if line.strip()]
23 #silent avoid_patterns_list.append(str($p.avoid_pattern)) 23 #set avoid_patterns = ','.join($avoid_list)
24 #end for 24
25 #set avoid_patterns = ','.join($avoid_patterns_list) 25 #set hairpin_lines = [line.strip() for line in str($json_use.hairpin_constraints).strip().split('\n') if line.strip()]
26 #set gc_constraints_list = [] 26 #set hairpin_constraints = '__cn__'.join($hairpin_lines)
27 #for $gc in $adv.rep_gc_constraints 27
28 #silent gc_constraints_list.append(str($gc.gc_min) + ';' + str($gc.gc_max) + ';' + str($gc.gc_window)) 28 #set gc_lines = [line.strip() for line in str($json_use.gc_constraints).strip().split('\n') if line.strip()]
29 #end for 29 #set gc_constraints = '__cn__'.join($gc_lines)
30 #set enforce_gc_content = ' '.join($gc_constraints_list) 30
31 #set hairpin_constraints_list = [] 31 #set kmer_size = $json_use.kmer_size
32 #for $h in $adv.rep_avoid_hairpins 32
33 #silent hairpin_constraints_list.append(str($h.hairpin_stem_size) + ';' + str($h.hairpin_window)) 33 #else:
34 #end for 34 #set avoid_patterns = ''
35 #set hairpin_constraints = ' '.join($hairpin_constraints_list) 35 #set hairpin_constraints = ''
36 #set gc_constraints = ''
37 #set kmer_size = ''
38 #end if
39
36 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files]) 40 #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
37 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files]) 41 #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
42
38 mkdir 'outdir_scul' && 43 mkdir 'outdir_scul' &&
39 mkdir 'outdir_unscul' && 44 mkdir 'outdir_unscul' &&
40 python '$__tool_directory__/sculpt_sequences.py' 45 python '$__tool_directory__/sculpt_sequences.py'
46 --use_json_param '$json_use.use_json_param'
41 --files_to_sculpt '$genbank_file_paths' 47 --files_to_sculpt '$genbank_file_paths'
42 --file_name_mapping '$file_name_mapping' 48 --file_name_mapping '$file_name_mapping'
43 --outdir_scul 'outdir_scul' 49 --outdir_scul 'outdir_scul'
44 --outdir_unscul 'outdir_unscul' 50 --outdir_unscul 'outdir_unscul'
45 --use_file_names_as_id '$adv.use_file_names_as_ids' 51 --use_file_names_as_id '$use_file_names_as_ids'
46 --avoid_patterns '$avoid_patterns' 52 --avoid_patterns '$avoid_patterns'
47 --enforce_gc_content '$enforce_gc_content' 53 --gc_constraints '$gc_constraints'
48 --DnaOptimizationProblemClass '$DnaOptimizationProblemClass' 54 --kmer_size '$kmer_size'
49 --kmer_size '$adv.kmer_size'
50 --hairpin_constraints '$hairpin_constraints' 55 --hairpin_constraints '$hairpin_constraints'
56 #if $json_use.use_json_param:
57 --json_params '$json_use.json_params'
58 #else:
59 --json_params ''
60 --DnaOptimizationProblemClass '$json_use.DnaOptimizationProblemClass'
61 #end if
51 ]]></command> 62 ]]></command>
52 <inputs> 63 <inputs>
53 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/> 64 <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
54 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class"> 65 <conditional name='json_use'>
55 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option> 66 <param name="use_json_param" type="boolean" checked="false" label="Use parameter from a JSON file" />
56 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option> 67 <when value="false">
57 </param> 68 <param name="DnaOptimizationProblemClass" type="select" label="DnaOptimizationProblem Calss" help="select the assambly class">
58 <repeat name="rep_avoid_pattern" title="Avoid Pattern Constraints"> 69 <option value="DnaOptimizationProblem" selected="True">DnaOptimizationProblem</option>
59 <param name="avoid_pattern" type="text" label="Pattern to Avoid (e.g., BsaI_site and/or 8x1mer)" /> 70 <option value="CircularDnaOptimizationProblem">CircularDnaOptimizationProblem</option>
60 </repeat> 71 </param>
61 <section name="adv" title="Advanced Options" expanded="false"> 72 <param name="avoid_patterns" type="text" area="true" label="Avoid Pattern Constraints" help="Each pattern on a line" />
62 <repeat name="rep_gc_constraints" title="Enforce GC Content Constraints"> 73 <param name="hairpin_constraints" type="text" area="true" label="Hairpins Constraints" optional="true" help="e.g. (you can add others Hairpins Constraints on a new line): stem_size=20, hairpin_window=200"/>
63 <param name="gc_min" type="float" label="Minimum GC Content" value="0.1" optional="true"/> 74 <param name="gc_constraints" type="text" area="true" label="Enforce GC Content Constraints" optional="true" help="e.g. (you can add others Enforce GC Content Constraints on a new line): mini=0.3, maxi=0.7, window=100"/>
64 <param name="gc_max" type="float" label="Maximum GC Content" value="0.9" optional="true"/> 75 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="" optional="true" help="e.g.: 15"/>
65 <param name="gc_window" type="integer" label="GC Content Window Size" value="50" optional="true"/> 76 </when>
66 </repeat> 77 <when value="true">
67 <param name="kmer_size" type="integer" label="K-mer Uniqueness Size" value="15" optional="true"/> 78 <param name="json_params" type="data" format="json" optional="true" label="JSON parameters file" help="Contains tool's parameters" />
68 <repeat name="rep_avoid_hairpins" title="Avoid Hairpins"> 79 </when>
69 <param name="hairpin_stem_size" type="integer" label="Stem Size" value="20" optional="true"/> 80 </conditional>
70 <param name="hairpin_window" type="integer" label="Window Size" value="200" optional="true"/> 81 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" />
71 </repeat>
72 <param name="use_file_names_as_ids" type="boolean" checked="True" label="Use File Names As Sequence IDs" />
73 </section>
74 </inputs> 82 </inputs>
75 <outputs> 83 <outputs>
76 <collection name="scul" type="list" label="scul group" > 84 <collection name="scul" type="list" label="scul group" >
77 <discover_datasets pattern="(?P&lt;name&gt;.*).zip" format="zip" directory="outdir_scul" /> 85 <discover_datasets pattern="(?P&lt;name&gt;.*).zip" format="zip" directory="outdir_scul" />
78 </collection> 86 </collection>
80 <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_unscul" /> 88 <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir_unscul" />
81 </collection> 89 </collection>
82 </outputs> 90 </outputs>
83 <tests> 91 <tests>
84 <test> 92 <test>
85 <!-- test for DnaOptimizationProblem --> 93 <!-- test for DnaOptimizationProblem -->
86 <param name="genbank_files"> 94 <param name="genbank_files">
87 <collection type="list"> 95 <collection type="list">
88 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> 96 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
89 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> 97 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
90 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> 98 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
95 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> 103 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
96 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> 104 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
97 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> 105 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
98 </collection> 106 </collection>
99 </param> 107 </param>
100 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" /> 108 <conditional name="json_use">
101 <param name="adv|use_file_names_as_ids" value="True" /> 109 <param name="use_json_param" value="false" />
102 <!-- AvoidPatterns --> 110 <param name="DnaOptimizationProblemClass" value="DnaOptimizationProblem" />
103 <repeat name="rep_avoid_pattern"> 111 <!-- AvoidPatterns -->
104 <param name="avoid_pattern" value="BsaI_site" /> 112 <param name="avoid_patterns" value="BsaI_site
105 </repeat> 113 NotI_site
106 <repeat name="rep_avoid_pattern"> 114 XbaI_site
107 <param name="avoid_pattern" value="NotI_site" /> 115 ClaI_site
108 </repeat> 116 8x1mer" />
109 <repeat name="rep_avoid_pattern"> 117 <!-- EnforceGCContent -->
110 <param name="avoid_pattern" value="XbaI_site" /> 118 <param name="gc_constraints" value="mini=0.1, maxi=0.9, window=50"/>
111 </repeat> 119 </conditional>
112 <repeat name="rep_avoid_pattern"> 120 <param name="use_file_names_as_ids" value="True" />
113 <param name="avoid_pattern" value="ClaI_site" />
114 </repeat>
115 <repeat name="rep_avoid_pattern">
116 <param name="avoid_pattern" value="8x1mer" />
117 </repeat>
118 <!-- EnforceGCContent -->
119 <repeat name="adv|rep_gc_constraints">
120 <param name="gc_min" value="0.1" />
121 <param name="gc_max" value="0.9" />
122 </repeat>
123 <output_collection name="scul" count="10"> 121 <output_collection name="scul" count="10">
124 </output_collection> 122 </output_collection>
125 <output_collection name="unscul" count="10"> 123 <output_collection name="unscul" count="10">
126 </output_collection> 124 </output_collection>
127 </test> 125 </test>
128 <test> 126 <test>
129 <!-- test for CircularDnaOptimizationProblem --> 127 <!-- test for CircularDnaOptimizationProblem -->
130 <param name="genbank_files"> 128 <param name="genbank_files">
131 <collection type="list"> 129 <collection type="list">
132 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> 130 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
133 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> 131 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
134 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" /> 132 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
139 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> 137 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
140 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> 138 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
141 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> 139 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
142 </collection> 140 </collection>
143 </param> 141 </param>
144 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" /> 142 <conditional name="json_use">
145 <param name="adv|use_file_names_as_ids" value="True" /> 143 <param name="use_json_param" value="false" />
146 <!-- AvoidPatterns --> 144 <param name="DnaOptimizationProblemClass" value="CircularDnaOptimizationProblem" />
147 <repeat name="rep_avoid_pattern"> 145 <!-- AvoidPatterns -->
148 <param name="avoid_pattern" value="BsaI_site" /> 146 <param name="avoid_patterns" value="BsaI_site
149 </repeat> 147 NotI_site
150 <repeat name="rep_avoid_pattern"> 148 XbaI_site
151 <param name="avoid_pattern" value="NotI_site" /> 149 ClaI_site
152 </repeat> 150 8x1mer" />
153 <repeat name="rep_avoid_pattern"> 151 <!-- EnforceGCContent -->
154 <param name="avoid_pattern" value="XbaI_site" /> 152 <param name="gc_constraints" value="mini=0.1, maxi=0.9
155 </repeat> 153 mini=0.3, maxi=0.7, window=50"/>
156 <repeat name="rep_avoid_pattern"> 154 <param name="hairpin_constraints" value="stem_size=20, hairpin_window=200
157 <param name="avoid_pattern" value="ClaI_site" /> 155 stem_size=10, hairpin_window=100"/>
158 </repeat> 156 </conditional>
159 <repeat name="rep_avoid_pattern"> 157 <param name="use_file_names_as_ids" value="True" />
160 <param name="avoid_pattern" value="8x1mer" /> 158 <output_collection name="scul" count="10">
161 </repeat> 159 </output_collection>
162 <!-- EnforceGCContent --> 160 <output_collection name="unscul" count="10">
163 <repeat name="adv|rep_gc_constraints"> 161 </output_collection>
164 <param name="gc_min" value="0.1" /> 162 </test>
165 <param name="gc_max" value="0.9" /> 163 <test>
166 </repeat> 164 <!-- test json params -->
165 <param name="genbank_files">
166 <collection type="list">
167 <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" />
168 <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" />
169 <element name="p15_Pup9_mTagBFP2roR" value="10_emma_genbanks/p9_mTagBFP2.gb" />
170 <element name="p15_p9_BSDRPuroR" value="10_emma_genbanks/p9_BSDR.gb" />
171 <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" />
172 <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" />
173 <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" />
174 <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" />
175 <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" />
176 <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" />
177 </collection>
178 </param>
179 <conditional name="json_use">
180 <param name="use_json_param" value="true" />
181 <param name="json_params" value="test_json_workflow2.json" />
182 </conditional>
183 <param name="use_file_names_as_ids" value="True" />
167 <output_collection name="scul" count="10"> 184 <output_collection name="scul" count="10">
168 </output_collection> 185 </output_collection>
169 <output_collection name="unscul" count="10"> 186 <output_collection name="unscul" count="10">
170 </output_collection> 187 </output_collection>
171 </test> 188 </test>
177 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_) 194 Sculpt Sequences is a Python library from the EGF Biofoundry for problem detection and sequence optimization using `dnachisel <https://github.com/Edinburgh-Genome-Foundry/DnaChisel/tree/master/dnachisel>`_ (Complete documentation available `here <https://edinburgh-genome-foundry.github.io/DnaChisel/>`_)
178 195
179 **Parameters**: 196 **Parameters**:
180 --------------- 197 ---------------
181 * **GenBank File(s)**: List of GenBank files to be processed. 198 * **GenBank File(s)**: List of GenBank files to be processed.
199 * **Use parameter from a JSON file**:
200 Yes/No parameter to indicate if user want to set parameter manually or using a json file
201 If Yes, user should provide a JSON file contains all parameters
182 * **DnaOptimizationProblem Class**: 202 * **DnaOptimizationProblem Class**:
183 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics. 203 - "DnaOptimizationProblem": is the class to define and solve an optimization problems. Its methods implement all the solver logics.
184 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular. 204 - "CircularDnaOptimizationProblem": is a variant of DnaOptimizationProblem whose optimization algorithm assumes that the sequence is circular.
185 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation. 205 * **Avoid Pattern Constraints**: is a sequence design rules that can be used as constraints. It define pattern(s) to avoid during problem optimisation.
186 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row. 206 This can include enzyme sites like "BsaI_site", "NotI_site", "XbaI_site"... `enzyme dict <https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py>`_ . Custom patterns are also supported, such as "5x3mer" means "any 5 consecutive 3-nucleotide sequences — typically 5 unique 3-mers in a row.
187 * **Enforce GC Content Constraints**: Define acceptable GC content ranges. For example min: 0.4, max: 0.6, window: 50 represents a 40–60% GC content requirement within a 50-base window. 207 * **Enforce GC Content Constraints**:
208 Define acceptable GC content ranges. For example min: 0.4, max: 0.6, window: 50 represents a 40–60% GC content requirement within a 50-base window.
209 (Parameters: `EnforceGCContent_params <https://edinburgh-genome-foundry.github.io/DnaChisel/ref/builtin_specifications#enforcegccontent>`_ )
188 * **Avoid Hairpins**: Avoid Hairpin patterns as defined by the IDT guidelines. 210 * **Avoid Hairpins**: Avoid Hairpin patterns as defined by the IDT guidelines.
189 A hairpin is defined by a sequence segment which has a reverse complement “nearby” in a given window. 211 A hairpin is defined by a sequence segment which has a reverse complement “nearby” in a given window.
212 (Parameters: `AvoidHairpins_params <https://edinburgh-genome-foundry.github.io/DnaChisel/ref/builtin_specifications#avoidhairpins>`_ ).
190 * **K-mer Uniqueness Size**: Avoid sub-sequence of length k with homologies elsewhere. 213 * **K-mer Uniqueness Size**: Avoid sub-sequence of length k with homologies elsewhere.
191 * **Use File Names As Sequence IDs**: Recommended if the GenBank file names represent the fragment names. 214 * **Use File Names As Sequence IDs**: Recommended if the GenBank file names represent the fragment names.
192 ]]></help> 215 ]]></help>
193 <citations> 216 <citations>
194 <citation type="bibtex"> 217 <citation type="bibtex">