Mercurial > repos > tduigou > evaluate_manufacturability
changeset 2:8cafb888ff89 draft default tip
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 7f5d8b62d749a0c41110cd9c04e0254e4fd44893-dirty
| author | tduigou |
|---|---|
| date | Wed, 15 Oct 2025 12:33:00 +0000 |
| parents | 7eec39ef47ed |
| children | |
| files | evaluate_manufacturability.py evaluate_manufacturability.xml test-data/DB_genbank_simulation/p16_bGHpolyA.gb test-data/DB_genbank_simulation/p19_mNeoGreen.gb test-data/DB_genbank_simulation/p3_2_CAG.gb test-data/NO_missing.txt test-data/manufacturability_report.tsv test-data/missing.txt |
| diffstat | 7 files changed, 621 insertions(+), 53 deletions(-) [+] |
line wrap: on
line diff
--- a/evaluate_manufacturability.py Wed Jul 23 09:45:19 2025 +0000 +++ b/evaluate_manufacturability.py Wed Oct 15 12:33:00 2025 +0000 @@ -44,14 +44,14 @@ folder=None, use_file_names_as_ids=use_file_names_as_id ) - # #try: - # #if not records_to_evaluate: - # #print('records to evaluate: is empty') - # #else: - # #for record in records_to_evaluate: - # #print(f'records to evaluate: {record}') - # #except Exception as e: - # #print(f'An error occurred: {e}') + # try: + # if not records_to_evaluate: + # print('records to evaluate: is empty') + # else: + # for record in records_to_evaluate: + # print(f'records to evaluate: {record}') + # except Exception as e: + # print(f'An error occurred: {e}') length_cutoff = 100 for part in records_to_evaluate: @@ -117,7 +117,7 @@ except ValueError: print(f"Skipping invalid k-mer size: {k_size}") - print(f'constraint_list is{constraint_list}') + # print(f'constraint_list is{constraint_list}') # constraint_list apply dataframe = cr.constraints_breaches_dataframe(constraint_list, records_to_evaluate) @@ -190,10 +190,12 @@ def parse_command_line_args(): parser = argparse.ArgumentParser(description="Evaluate manufacturability of DNA sequences.") - parser.add_argument("--files_to_evaluate", required=True, + parser.add_argument("--files_to_evaluate", required=False, help="List of GenBank files (Comma-separated)") parser.add_argument('--file_name_mapping', type=str, help='Mapping of Galaxy filenames to original filenames') + parser.add_argument('--DB_file_name_mapping', type=str, + help='Mapping of Galaxy filenames to original DB filenames') parser.add_argument("--output_tsv", required=True, help="Excel file name") parser.add_argument("--output_pdf", required=True, help="PDF file name") parser.add_argument("--outdir_gb", required=True, help="DIR for annotated GenBank files") @@ -211,6 +213,13 @@ help="JSON params for the tool") parser.add_argument("--use_json_param", required=True, help="If use JSON as param source") + parser.add_argument("--mode", required=True, + help="mode d'utilisation: standard ou workflow") + parser.add_argument("--DB_report", required=False, + help="In wkf mode") + parser.add_argument("--DB_genbank_files", required=False, + help="IN wkf mode") + return parser.parse_args() @@ -268,6 +277,67 @@ args = parse_command_line_args() + ### + if "--mode" in sys.argv: + mode_index = sys.argv.index("--mode") + 1 + mode = sys.argv[mode_index].strip() + + skip_evaluation = False + use_DB_files = False + DB_genbank_files = [] + + if mode == "wkf": + + if "--DB_report" not in sys.argv: + print("ERROR: --DB_report is required in wkf mode.") + sys.exit(1) + db_index = sys.argv.index("--DB_report") + 1 + db_report_path = sys.argv[db_index] + + if "--DB_genbank_files" in sys.argv: + db_gb_index = sys.argv.index("--DB_genbank_files") + 1 + DB_genbank_files = sys.argv[db_gb_index].split(",") + else: + DB_genbank_files = [] + + if not os.path.isfile(db_report_path): + print(f"ERROR: DB report file not found at {db_report_path}") + sys.exit(1) + + with open(db_report_path, 'r') as f: + lines = [line.strip() for line in f.readlines() if line.strip()] + + if not lines: + skip_evaluation = True + + else: + + missing_fragments = lines[0:] + + # Parse file_name_mapping + if isinstance(args.file_name_mapping, str): + mapping_dict = dict(item.split(":") for item in args.file_name_mapping.split(",")) + else: + mapping_dict = {} + + # Logical names + provided_filenames = [os.path.splitext(v)[0] for v in mapping_dict.values()] + + # print(f'provided_filenames is : {provided_filenames}') + + unmatched = [ + frag for frag in missing_fragments + if os.path.splitext(frag)[0] not in provided_filenames + ] + + if unmatched: + print(f"ERROR: The following missing fragment(s) must be provided as .gb files: {', '.join(unmatched)}") + sys.exit(1) + else: + use_DB_files = True # Append after evaluation + + ### + # Default values from command-line avoid_patterns, hairpin_constraints, gc_constraints, kmer_size = extract_constraints_from_args(args) @@ -304,10 +374,47 @@ "kmer_size": kmer_size } - evaluate_manufacturability( - params["files_to_evaluate"], params["file_name_mapping"], - params["output_tsv"], params["output_pdf"], params["outdir_gb"], - params["use_file_names_as_id"], params["avoid_patterns"], - params["hairpin_constraints"], params["gc_constraints"], - params["kmer_size"] - ) + if not skip_evaluation: + evaluate_manufacturability( + params["files_to_evaluate"], params["file_name_mapping"], + params["output_tsv"], params["output_pdf"], params["outdir_gb"], + params["use_file_names_as_id"], params["avoid_patterns"], + params["hairpin_constraints"], params["gc_constraints"], + params["kmer_size"] + ) + + if mode == "wkf" and (skip_evaluation or use_DB_files): + if DB_genbank_files: + print(f"DB_genbank_files is: {DB_genbank_files}") + print("Adding DB GenBank files to output collection using DB_file_name_mapping...") + + os.makedirs(params["outdir_gb"], exist_ok=True) + + # mapping real DB gb file name + if isinstance(args.DB_file_name_mapping, str): + print (f'DB_file_name_mapping is: {args.DB_file_name_mapping}') + DB_mapping_dict = dict(item.split(":") for item in args.DB_file_name_mapping.split(",")) + else: + DB_mapping_dict={} + + for path in DB_genbank_files: + basename = os.path.basename(path) + logical_name = DB_mapping_dict.get(path) or DB_mapping_dict.get(basename) + + if not logical_name: + print(f"WARNING: No mapping found for DB GenBank file: {path}. Skipping.") + continue + + output_filename = os.path.splitext(logical_name)[0] + ".gb" + dest_path = os.path.join(params["outdir_gb"], output_filename) + + try: + with open(path, 'r') as src, open(dest_path, 'w') as dst: + dst.write(src.read()) + print(f"Copied and renamed: {path} → {dest_path}") + except Exception as e: + print(f"ERROR: Failed to copy {path} → {dest_path}: {e}") + else: + print("No DB GenBank files to append, continuing without error.") + +
--- a/evaluate_manufacturability.xml Wed Jul 23 09:45:19 2025 +0000 +++ b/evaluate_manufacturability.xml Wed Oct 15 12:33:00 2025 +0000 @@ -1,8 +1,8 @@ <tool id="evaluate_manufacturability" name="Evaluate Manufacturability" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09"> <description>Evaluate DNA manufacturability with customizable constraints</description> <macros> - <token name="@VERSION_SUFFIX@">1</token> - <token name="@TOOL_VERSION@">0.2.0</token> + <token name="@VERSION_SUFFIX@">2</token> + <token name="@TOOL_VERSION@">0.3.0</token> </macros> <requirements> <requirement type="package" version="0.1.11">flametree</requirement> @@ -16,8 +16,22 @@ <requirement type="package" version="3.1.5">openpyxl</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ - #set genbank_file_paths = ','.join([str(f) for f in $genbank_files]) - #set file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files]) + #if $setting.mode == 'wkf': + #if $setting.genbank_files: + #set genbank_file_paths = ','.join([str(f) for f in $setting.genbank_files]) + #set file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $setting.genbank_files]) + #else: + #set genbank_file_paths = '' + #set file_name_mapping = '' + #end if + #set DB_genbank_file_paths = ','.join([str(file) for file in $setting.DB_genbank_files]) + #set DB_file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $setting.DB_genbank_files]) + #else: + #set genbank_file_paths = ','.join([str(f) for f in $setting.genbank_files]) + #set file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $setting.genbank_files]) + #set DB_genbank_file_paths = '' + #set DB_file_name_mapping = '' + #end if #if str($json_use.use_json_param) == "false": #set avoid_list = [line.strip() for line in str($json_use.avoid_patterns).strip().split('\n') if line.strip()] @@ -42,6 +56,16 @@ --use_json_param '$json_use.use_json_param' --files_to_evaluate '$genbank_file_paths' --file_name_mapping '$file_name_mapping' + --mode '$mode' + #if $setting.mode=='wkf': + --DB_report '$setting.DB_report' + --DB_genbank_files '$DB_genbank_file_paths' + --DB_file_name_mapping '$DB_file_name_mapping' + #else: + --DB_report '' + --DB_genbank_files '' + --DB_file_name_mapping '' + #end if --output_pdf '$report_pdf' --output_tsv '$report_tsv' --outdir_gb 'outdir_dir' @@ -54,11 +78,24 @@ --json_params '$json_use.json_params' #else: --json_params '' - #end if + #end if && echo 'DEBUG' && "$DB_genbank_files" ]]></command> <inputs> - <param name="genbank_files" type="data_collection" collection_type="list" format="genbank,fasta" label="GenBank File(s)"/> + <conditional name='setting'> + <param name="mode" type="select" label="Mode"> + <option value="std" selected="true">Standard</option> + <option value="wkf">Workflow</option> + </param> + <when value="std"> + <param name="genbank_files" type="data_collection" collection_type="list" format="genbank,fasta" label="GenBank File(s)"/> + </when> + <when value="wkf"> + <param name="genbank_files" type="data_collection" collection_type="list" format="genbank,fasta" label="GenBank File(s)" optional="true"/> + <param name="DB_genbank_files" type="data_collection" collection_type="list" format="genbank,fasta" label="DB GenBank File(s)"/> + <param name="DB_report" type="data" format="txt" label="Missing Fragments Report" help="text file report" /> + </when> + </conditional> <conditional name='json_use'> <param name="use_json_param" type="boolean" checked="false" label="Use parameter from a JSON file" /> <when value="false"> @@ -85,20 +122,7 @@ impossible to run it in conda env with this comand because there is a step to extract the the file name in galaxy (.dat) in line 83 of the python code--> <tests> <test> - <param name="genbank_files"> - <collection type="list"> - <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> - <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> - <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> - <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> - <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> - <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> - <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> - <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> - <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> - <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> - </collection> - </param> + <param name="use_file_names_as_ids" value="True" /> <conditional name='json_use'> <param name="use_json_param" value="false" /> @@ -118,6 +142,23 @@ <!-- UniquifyAllKmers --> <param name="kmer_size" value="15" /> </conditional> + <conditional name='setting'> + <param name="mode" value="std"/> + <param name="genbank_files"> + <collection type="list"> + <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> + <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> + <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> + <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> + <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> + <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> + <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> + <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> + <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> + <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> + </collection> + </param> + </conditional> <!-- Expecting pdf output --> <output name="report_pdf" ftype="pdf"> <assert_contents> @@ -125,7 +166,7 @@ </assert_contents> </output> <!-- Expecting tsv output --> - <output name="report_tsv" ftype="tsv"> + <output name="report_tsv" file='manufacturability_report.tsv' ftype="tsv"> <assert_contents> <has_n_lines n="11" /> <has_n_columns n="12" /> @@ -186,26 +227,29 @@ </test> <!-- parameters from json --> <test> - <param name="genbank_files"> - <collection type="list"> - <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> - <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> - <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> - <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> - <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> - <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> - <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> - <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> - <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> - <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> - </collection> - </param> <param name="use_file_names_as_ids" value="True" /> <conditional name='json_use'> <param name="use_json_param" value="true" /> <!-- JSON --> <param name="json_params" value="test_json.json" /> </conditional> + <conditional name='setting'> + <param name="mode" value="std"/> + <param name="genbank_files"> + <collection type="list"> + <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> + <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> + <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> + <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> + <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> + <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> + <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> + <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> + <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> + <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> + </collection> + </param> + </conditional> <!-- Expecting pdf output --> <output name="report_pdf" ftype="pdf"> <assert_contents> @@ -213,7 +257,7 @@ </assert_contents> </output> <!-- Expecting tsv output --> - <output name="report_tsv" ftype="tsv"> + <output name="report_tsv" file='manufacturability_report.tsv' ftype="tsv"> <assert_contents> <has_n_lines n="11" /> <has_n_columns n="12" /> @@ -272,6 +316,289 @@ </element> </output_collection> </test> + <!--test wkf mode simulation--> + <!--NO missing--> + <test> + <param name="use_file_names_as_ids" value="True" /> + <conditional name='json_use'> + <param name="use_json_param" value="false" /> + <!-- AvoidPatterns --> + <param name="avoid_patterns" value="BsaI_site + BsmBI_site + BbsI_site + SapI_site + 8x1mer + 5x3mer + 9x2mer" /> + <!-- AvoidHairpins --> + <param name="hairpin_constraints" value='stem_size=20, hairpin_window=200'/> + <!-- EnforceGCContent --> + <param name="gc_constraints" value="mini=0.3, maxi=0.7, window=100 + mini=0.1, maxi=0.9, window=100"/> + <!-- UniquifyAllKmers --> + <param name="kmer_size" value="15" /> + </conditional> + <conditional name='setting'> + <param name="mode" value="wkf"/> + <param name="DB_genbank_files"> + <collection type="list"> + <element name="p3_2_CAG" value="DB_genbank_simulation/p3_2_CAG.gb" /> + <element name="p16_bGHpolyA" value="DB_genbank_simulation/p16_bGHpolyA.gb" /> + <element name="p19_mNeoGreen" value="DB_genbank_simulation/p19_mNeoGreen.gb" /> + </collection> + </param> + <param name="DB_report" value="NO_missing.txt" /> + </conditional> + <output_collection name="annotated_gb" type="list" count="3"> + <element name="p16_bGHpolyA"> + <assert_contents> + <has_n_lines n="24" /> + </assert_contents> + </element> + <element name="p19_mNeoGreen"> + <assert_contents> + <has_n_lines n="44" /> + </assert_contents> + </element> + <element name="p3_2_CAG"> + <assert_contents> + <has_n_lines n="33" /> + </assert_contents> + </element> + </output_collection> + </test> + <!--missing with no providing gb fragment files--> + <!--should return error (faild)--> + <test> + <param name="use_file_names_as_ids" value="True" /> + <conditional name='json_use'> + <param name="use_json_param" value="false" /> + <!-- AvoidPatterns --> + <param name="avoid_patterns" value="BsaI_site + BsmBI_site + BbsI_site + SapI_site + 8x1mer + 5x3mer + 9x2mer" /> + <!-- AvoidHairpins --> + <param name="hairpin_constraints" value='stem_size=20, hairpin_window=200'/> + <!-- EnforceGCContent --> + <param name="gc_constraints" value="mini=0.3, maxi=0.7, window=100 + mini=0.1, maxi=0.9, window=100"/> + <!-- UniquifyAllKmers --> + <param name="kmer_size" value="15" /> + </conditional> + <conditional name='setting'> + <param name="mode" value="wkf"/> + <param name="genbank_files"> + <collection type="list"> + <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> + <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> + <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> + <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> + <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> + <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> + <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> + </collection> + </param> + <param name="DB_genbank_files"> + <collection type="list"> + <element name="p3_2_CAG" value="DB_genbank_simulation/p3_2_CAG.gb" /> + <element name="p16_bGHpolyA" value="DB_genbank_simulation/p16_bGHpolyA.gb" /> + <element name="p19_mNeoGreen" value="DB_genbank_simulation/p19_mNeoGreen.gb" /> + </collection> + </param> + <param name="DB_report" value="missing.txt" /> + </conditional> + <!-- Expecting pdf output --> + <output name="report_pdf" ftype="pdf"> + <assert_contents> + <has_size min="86"/> + </assert_contents> + </output> + <!-- Expecting tsv output --> + <output name="report_tsv" ftype="tsv"> + <assert_contents> + <has_n_lines n="11" /> + <has_n_columns n="12" /> + </assert_contents> + </output> + <output_collection name="annotated_gb" type="list" count="10"> + <element name="HC_Amp_ccdB"> + <assert_contents> + <has_n_lines n="150" /> + </assert_contents> + </element> + <element name="p15_PuroR"> + <assert_contents> + <has_n_lines n="150" /> + </assert_contents> + </element> + <element name="p4_Kt-L7Ae-Weiss"> + <assert_contents> + <has_n_lines n="120" /> + </assert_contents> + </element> + <element name="p6_Kozak-ATG"> + <assert_contents> + <has_n_lines n="124" /> + </assert_contents> + </element> + <element name="p6_Nt-IgKLsequence"> + <assert_contents> + <has_n_lines n="131" /> + </assert_contents> + </element> + <element name="p7_L7Ae-Weiss"> + <assert_contents> + <has_n_lines n="130" /> + </assert_contents> + </element> + <element name="p3_2_CAG"> + <assert_contents> + <has_n_lines n="138" /> + </assert_contents> + </element> + <element name="p16_bGHpolyA"> + <assert_contents> + <has_n_lines n="138" /> + </assert_contents> + </element> + <element name="p19_mNeoGreen"> + <assert_contents> + <has_n_lines n="138" /> + </assert_contents> + </element> + </output_collection> + </test> + <!--missing with providing gb fragment files--> + <test> + <param name="use_file_names_as_ids" value="True" /> + <conditional name='json_use'> + <param name="use_json_param" value="false" /> + <!-- AvoidPatterns --> + <param name="avoid_patterns" value="BsaI_site + BsmBI_site + BbsI_site + SapI_site + 8x1mer + 5x3mer + 9x2mer" /> + <!-- AvoidHairpins --> + <param name="hairpin_constraints" value='stem_size=20, hairpin_window=200'/> + <!-- EnforceGCContent --> + <param name="gc_constraints" value="mini=0.3, maxi=0.7, window=100 + mini=0.1, maxi=0.9, window=100"/> + <!-- UniquifyAllKmers --> + <param name="kmer_size" value="15" /> + </conditional> + <conditional name='setting'> + <param name="mode" value="wkf"/> + <param name="genbank_files"> + <collection type="list"> + <element name="p8_Linker1" value="10_emma_genbanks/p8_Linker1.gb" /> + <element name="p9_PuroR" value="10_emma_genbanks/p9_PuroR.gb" /> + <element name="p9_mTagBFP2" value="10_emma_genbanks/p9_mTagBFP2.gb" /> + <element name="p4_Kt-L7Ae-Weiss" value="10_emma_genbanks/p4_Kt-L7Ae-Weiss.gb" /> + <element name="p6_Nt-IgKLsequence" value="10_emma_genbanks/p6_Nt-IgKLsequence.gb" /> + <element name="HC_Amp_ccdB" value="10_emma_genbanks/HC_Amp_ccdB.gb" /> + <element name="p15_PuroR" value="10_emma_genbanks/p15_PuroR.gb" /> + <element name="p9_BSDR" value="10_emma_genbanks/p9_BSDR.gb" /> + <element name="p6_Kozak-ATG" value="10_emma_genbanks/p6_Kozak-ATG.gb" /> + <element name="p7_L7Ae-Weiss" value="10_emma_genbanks/p7_L7Ae-Weiss.gb" /> + </collection> + </param> + <param name="DB_genbank_files"> + <collection type="list"> + <element name="p3_2_CAG" value="DB_genbank_simulation/p3_2_CAG.gb" /> + <element name="p16_bGHpolyA" value="DB_genbank_simulation/p16_bGHpolyA.gb" /> + <element name="p19_mNeoGreen" value="DB_genbank_simulation/p19_mNeoGreen.gb" /> + </collection> + </param> + <param name="DB_report" value="missing.txt" /> + </conditional> + <!-- Expecting pdf output --> + <output name="report_pdf" ftype="pdf"> + <assert_contents> + <has_size min="86"/> + </assert_contents> + </output> + <!-- Expecting tsv output --> + <output name="report_tsv" file='manufacturability_report.tsv' ftype="tsv"> + <assert_contents> + <has_n_lines n="11" /> + <has_n_columns n="12" /> + </assert_contents> + </output> + <output_collection name="annotated_gb" type="list" count="13"> + <element name="HC_Amp_ccdB"> + <assert_contents> + <has_n_lines n="150" /> + </assert_contents> + </element> + <element name="p15_PuroR"> + <assert_contents> + <has_n_lines n="150" /> + </assert_contents> + </element> + <element name="p4_Kt-L7Ae-Weiss"> + <assert_contents> + <has_n_lines n="120" /> + </assert_contents> + </element> + <element name="p6_Kozak-ATG"> + <assert_contents> + <has_n_lines n="124" /> + </assert_contents> + </element> + <element name="p6_Nt-IgKLsequence"> + <assert_contents> + <has_n_lines n="131" /> + </assert_contents> + </element> + <element name="p7_L7Ae-Weiss"> + <assert_contents> + <has_n_lines n="130" /> + </assert_contents> + </element> + <element name="p8_Linker1"> + <assert_contents> + <has_n_lines n="117" /> + </assert_contents> + </element> + <element name="p9_BSDR"> + <assert_contents> + <has_n_lines n="136" /> + </assert_contents> + </element> + <element name="p9_mTagBFP2"> + <assert_contents> + <has_n_lines n="147" /> + </assert_contents> + </element> + <element name="p9_PuroR"> + <assert_contents> + <has_n_lines n="138" /> + </assert_contents> + </element> + <element name="p3_2_CAG"> + <assert_contents> + <has_n_lines n="33" /> + </assert_contents> + </element> + <element name="p16_bGHpolyA"> + <assert_contents> + <has_n_lines n="24" /> + </assert_contents> + </element> + <element name="p19_mNeoGreen"> + <assert_contents> + <has_n_lines n="44" /> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help><![CDATA[ Evaluate Manufacturability @@ -281,6 +608,10 @@ **Parameters**: --------------- +* **Mode**: + This option allows you to choose the mode in which the tool will be executed: + - Standard: Runs the tool as a standalone application. + - Workflow: Adds additional options to make the tool more flexible and interactive within a workflow. * **GenBank File(s)**: List of GenBank files to be processed. * **Use parameter from a JSON file**: Yes/No parameter to indicate if user want to set parameter manually or using a json file @@ -300,6 +631,20 @@ In case on using more than one combination, each one msut be on a ligne. * **K-mer Uniqueness Size**: Avoid sub-sequence of length k with homologies elsewhere. * **Use File Names As Sequence IDs**: Recommended if the GenBank file names represent the fragment names. + +**DEMO ** +---------- + + Executes the tool in standard mode using default parameters tailored to the test dataset. + + To start a demo you need to download the test dataset and the defaulte parameter aapted: + + - **GenBank File(s):** `Download GenBank files <https://files.osf.io/v1/resources/235de/providers/osfstorage/6894678c16b49a3aaeb067c9/?zip=>`_ + + **Note:** Make GenBank files as a collection list. + `How to make collection <https://training.galaxyproject.org/training-material/topics/galaxy-interface/tutorials/collections/tutorial.html>`_ + + - **Default Parameters:** Thy are provided `here <https://osf.io/download/689467ddc9ee0bb750f62d86/>`_ as JSON file (Enabel the Use parameter from a JSON file option) ]]></help> <citations> <citation type="bibtex">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DB_genbank_simulation/p16_bGHpolyA.gb Wed Oct 15 12:33:00 2025 +0000 @@ -0,0 +1,24 @@ +LOCUS . 257 bp DNA UNK 01-JAN-1980 +DEFINITION .. +ACCESSION <unknown id> +VERSION <unknown id> +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + polyA_signal 33..257 + /note="bGH poly(A) signal" + /note="bovine growth hormone polyadenylation signal" + source 1..257 + /source="Exported" + misc_feature 1..257 + /source="p16_bGHpolyA" + /note="From p16_bGHpolyA" +ORIGIN + 1 ctctggggtt cgaaatgacc gaccaagcga cgctgtgcct tctagttgcc agccatctgt + 61 tgtttgcccc tcccccgtgc cttccttgac cctggaaggt gccactccca ctgtcctttc + 121 ctaataaaat gaggaaattg catcgcattg tctgagtagg tgtcattcta ttctgggggg + 181 tggggtgggg caggacagca agggggagga ttgggaagac aatagcaggc atgctgggga + 241 tgcggtgggc tctatgg +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DB_genbank_simulation/p19_mNeoGreen.gb Wed Oct 15 12:33:00 2025 +0000 @@ -0,0 +1,44 @@ +LOCUS . 716 bp DNA UNK 01-JAN-1980 +DEFINITION .. +ACCESSION <unknown id> +VERSION <unknown id> +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..11 + /note="KozaK" + CDS 9..11 + /codon_start=1 + /note="ATG" + /translation="M" + CDS 12..716 + /codon_start=1 + /note="mNeonGreen" + /product="bright monomeric yellow-green fluorescent protein + derived from LanYFP (Shaner et al., 2013)" + /translation="VSKGEEDNMASLPATHELHIFGSINGVDFDMVGQGTGNPNDGYEE + LNLKSTKGDLQFSPWILVPHIGYGFHQYLPYPDGMSPFQAAMVDGSGYQVHRTMQFEDG + ASLTVNYRYTYEGSHIKGEAQVKGTGFPADGPVMTNSLTAADWCRSKKTYPNDKTIIST + FKWSYTTGNGKRYRSTARTTYTFAKPMAANYLKNQPMYVFRKTELKHSKTELNFKEWQK + AFTDVMGMDELYK" + source 1..716 + /source="Exported" + misc_feature 1..716 + /source="p19_mNeoGreen" + /note="From p19_mNeoGreen" +ORIGIN + 1 ccgccaccat ggtgagcaag ggcgaggagg ataacatggc ctctctccca gcgacacatg + 61 agttacacat ctttggctcc atcaacggtg tggactttga catggtgggt cagggcaccg + 121 gcaatccaaa tgatggttat gaggagttaa acctgaagtc caccaagggt gacctccagt + 181 tctccccctg gattctggtc cctcatatcg ggtatggctt ccatcagtac ctgccctacc + 241 ctgacgggat gtcgcctttc caggccgcca tggtagatgg ctccggatac caagtccatc + 301 gcacaatgca gtttgaagat ggtgcctccc ttactgttaa ctaccgctac acctacgagg + 361 gaagccacat caaaggagag gcccaggtga aggggactgg tttccctgct gacggtcctg + 421 tgatgaccaa ctcgctgacc gctgcggact ggtgcaggtc gaagaagact taccccaacg + 481 acaaaaccat catcagtacc tttaagtgga gttacaccac tggaaatggc aagcgctacc + 541 ggagcactgc gcggaccacc tacacctttg ccaagccaat ggcggctaac tatctgaaga + 601 accagccgat gtacgtgttc cgtaagacgg agctcaagca ctccaagacc gagctcaact + 661 tcaaggagtg gcaaaaggcc tttaccgatg tgatgggcat ggacgagctg tacaag +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/DB_genbank_simulation/p3_2_CAG.gb Wed Oct 15 12:33:00 2025 +0000 @@ -0,0 +1,33 @@ +LOCUS . 690 bp DNA UNK 01-JAN-1980 +DEFINITION .. +ACCESSION <unknown id> +VERSION <unknown id> +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + enhancer 19..398 + /note="CMV enhancer" + /note="human cytomegalovirus immediate early enhancer" + promoter 400..675 + /note="chicken beta-actin promoter" + source 1..690 + /source="Exported" + misc_feature 1..690 + /source="p3_2_CAG" + /note="From p3_2_CAG" +ORIGIN + 1 ggatctgata tcatcgtcga cattgattat tgactagtta ttaatagtaa tcaattacgg + 61 ggtcattagt tcatagccca tatatggagt tccgcgttac ataacttacg gtaaatggcc + 121 cgcctggctg accgcccaac gacccccgcc cattgacgtc aataatgacg tatgttccca + 181 tagtaacgcc aatagggact ttccattgac gtcaatgggt ggactattta cggtaaactg + 241 cccacttggc agtacatcaa gtgtatcata tgccaagtac gccccctatt gacgtcaatg + 301 acggtaaatg gcccgcctgg cattatgccc agtacatgac cttatgggac tttcctactt + 361 ggcagtacat ctacgtatta gtcatcgcta ttaccatggt cgaggtgagc cccacgttct + 421 gcttcactct ccccatctcc cccccctccc cacccccaat tttgtattta tttatttttt + 481 aattattttg tgcagcgatg ggggcggggg gggggggggc gcgcgccagg cggggcgggg + 541 cggggcgagg ggcggggcgg ggcgaggcgg agaggtgcgg cggcagccaa tcagagcggc + 601 gcgctccgaa agtttccttt tatggcgagg cggcggcggc ggcggcccta taaaaagcga + 661 agcgcgcggc gggcgggagt cgctgcgttg +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/manufacturability_report.tsv Wed Oct 15 12:33:00 2025 +0000 @@ -0,0 +1,11 @@ +sequence BsaI BsmBI BbsI SapI 8-repeats 1-mers 5-repeats 3-mers 9-repeats 2-mers 20-160-20bp hairpin GC outside 30-70%/100bp GC outside 10-90%/100bp 15bp homologies +p8_Linker1 5-11(-), 1155-1161(-), 1802-1808(+) 97-112, 738-753, 1704-1720, 1722-1738 +p9_PuroR 5-11(-), 1155-1161(-), 1802-1808(+) 1789-2405 97-112, 738-753, 1704-1720, 1722-1738 +p9_mTagBFP2 5-11(-), 1155-1161(-), 1802-1808(+) 2112-2118(+) 1827-1834(-) 97-112, 738-753, 1704-1720, 1722-1738 +p4_Kt-L7Ae-Weiss 1142-1148(+), 1190-1196(-), 1838-1844(+) 1195-1201(+) 401-416, 1042-1057, 1261-1277, 1279-1295 +p6_Nt-IgKLsequence 1142-1148(+), 1231-1237(-), 1879-1885(+) 1236-1242(+) 401-416, 1042-1057, 1302-1318, 1320-1336 +HC_Amp_ccdB 677-683(+) 72-78(-), 942-948(+) 891-925, 1022-1037, 1038-1053, 1674-1706 +p15_PuroR 1142-1148(+), 1769-1775(-), 2417-2423(+) 1774-1780(+) 1136-1754 401-416, 1042-1057, 1840-1856, 1858-1874 +p9_BSDR 5-11(-), 1155-1161(-), 1802-1808(+) 1890-1896(+) 2060-2208 97-112, 738-753, 1704-1720, 1722-1738 +p6_Kozak-ATG 1142-1148(+), 1201-1207(-), 1849-1855(+) 1206-1212(+) 401-416, 1042-1057, 1272-1288, 1290-1306 +p7_L7Ae-Weiss 1142-1148(+), 1514-1520(-), 2162-2168(+) 1519-1525(+) 401-416, 1042-1057, 1585-1601, 1603-1619
