changeset 2:11a3752feb0a draft default tip

planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 7f5d8b62d749a0c41110cd9c04e0254e4fd44893-dirty
author tduigou
date Wed, 15 Oct 2025 12:33:41 +0000
parents 7680420caf9f
children
files get_db_info.py seq_form_db.xml test-data/test_missing_input.csv
diffstat 3 files changed, 129 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/get_db_info.py	Wed Jul 23 09:44:50 2025 +0000
+++ b/get_db_info.py	Wed Oct 15 12:33:41 2025 +0000
@@ -93,7 +93,7 @@
     raise Exception("Database connection failed after timeout.")
 
 
-def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output):
+def fetch_annotations(csv_file, sequence_column, annotation_columns, db_uri, table_name, fragment_column_name, output, output_report):
     """Fetch annotations from the database and save the result as GenBank files."""
     db_uri = fix_db_uri(db_uri)
     df = pd.read_csv(csv_file, sep=',', header=None)
@@ -130,10 +130,14 @@
 
             db_fragments = set(fragment_map.keys())
             missing_fragments = sorted(list(csv_fragments - db_fragments))
-            if missing_fragments:
-                raise ValueError(
-                    f" Missing fragments in DB: {', '.join(missing_fragments)}"
-                )
+
+            # Write report file
+            with open(output_report, "w") as report_file:
+                if missing_fragments:
+                    for frag in missing_fragments:
+                        report_file.write(f"{frag}\n")
+                else:
+                    report_file.write("")
 
             # === CONTINUE WITH GB FILE CREATION ===
             for _, row in df.iterrows():
@@ -164,8 +168,14 @@
     try:
         for annotated_row in annotated_data:
             backbone_id = annotated_row["Backbone"]
+
             for fragment in annotated_row["Fragments"]:
                 fragment_id = fragment["id"]
+
+                # Skip generation for missing fragments
+                if fragment_id in missing_fragments:
+                    continue
+
                 sequence = fragment.get(sequence_column, "")
                 annotation = fragment.get(annotation_columns, "")
 
@@ -182,7 +192,7 @@
                     k: str(fragment[k]) for k in annotation_columns if k in fragment
                 }
 
-                # LOCUS line extraction from annotation (copy-paste the LOCUS from annotation)
+                # LOCUS line extraction from annotation
                 locus_line_match = re.search(r"LOCUS\s+.+", annotation)
                 if locus_line_match:
                     locus_line = locus_line_match.group()
@@ -190,41 +200,36 @@
                     print(f"LOCUS info missing for fragment {fragment_id}")
                     locus_line = f"LOCUS       {fragment_id: <20} {len(sequence)} bp    DNA     linear   UNK 01-JAN-2025"
 
-                # Format sequence as per GenBank standards (with ORIGIN and line breaks)
+                # Format sequence
                 if "ORIGIN" in sequence:
                     origin_block = sequence.strip()
                 else:
-                    # Format sequence as per GenBank standards (with ORIGIN and line breaks)
                     formatted_sequence = "ORIGIN\n"
                     seq_str = str(record.seq)
-                    for i in range(0, len(seq_str), 60):  # 60 bases per line
+                    for i in range(0, len(seq_str), 60):
                         line_seq = seq_str[i:i + 60]
                         formatted_sequence += f"{str(i + 1).rjust(9)} { ' '.join([line_seq[j:j+10] for j in range(0, len(line_seq), 10)]) }\n"
                     origin_block = formatted_sequence.strip()
 
-                # Find and copy the FEATURES section directly from annotation
+                # Extract FEATURES section
                 features_section = ""
                 features_start = annotation.find("FEATURES")
                 if features_start != -1:
                     features_section = annotation[features_start:]
 
-                # Writing the GenBank file
+                # Write GenBank file
                 if not os.path.exists(output):
                     os.makedirs(output)
 
                 gb_filename = os.path.join(output, f"{fragment_id}.gb")
                 with open(gb_filename, "w") as f:
-                    # Write the LOCUS line
                     f.write(locus_line + "\n")
-                    # Write DEFINITION, ACCESSION, and other annotations
                     f.write(f"DEFINITION  {record.description}\n")
                     f.write(f"ACCESSION   {record.id}\n")
                     f.write(f"VERSION     DB\n")
                     f.write(f"KEYWORDS    .\n")
                     f.write(f"SOURCE      .\n")
-                    # Write the FEATURES section directly from annotation
                     f.write(features_section)
-                    # Write the ORIGIN section
                     f.write(origin_block + "\n")
                     f.write("//\n")
 
@@ -244,6 +249,7 @@
     parser.add_argument("--fragment_column", required=False, help="Fragment column name in the database")
     parser.add_argument("--output", required=True, help="Output dir for gb files")
     parser.add_argument("--json_conf", required=False, help="JSON config file with DB parameters")
+    parser.add_argument("--report", required=True, help="Output report for fragments checking in DB")
     args = parser.parse_args()
     
     # get param and chek for json
@@ -287,7 +293,7 @@
                 time.sleep(2)
 
     # Fetch annotations from the database and save as gb
-    fetch_annotations(args.input, sequence_column, annotation_column, db_uri, table, fragment_column, args.output)
+    fetch_annotations(args.input, sequence_column, annotation_column, db_uri, table, fragment_column, args.output, args.report)
 
 if __name__ == "__main__":
     main()
--- a/seq_form_db.xml	Wed Jul 23 09:44:50 2025 +0000
+++ b/seq_form_db.xml	Wed Oct 15 12:33:41 2025 +0000
@@ -1,8 +1,8 @@
 <tool id="seq_form_db" name="Get sequences Data From DB" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
     <description>Import fragment's data from an accessible DB and export it as .gb files</description>
     <macros>
-        <token name="@VERSION_SUFFIX@">1</token>
-        <token name="@TOOL_VERSION@">0.2.0</token>
+        <token name="@VERSION_SUFFIX@">2</token>
+        <token name="@TOOL_VERSION@">0.3.0</token>
     </macros>
     <requirements>
         <requirement type="package" version="2.2.3">pandas</requirement>
@@ -25,6 +25,7 @@
                 --json_conf '$json_use.json_conf'
             #end if
             --output 'outdir'
+            --report '$report'
     ]]></command>
     <inputs> 
         <param name="input" type="data" format="csv" label="Input CSV File" />
@@ -46,6 +47,7 @@
         <collection name="output_gb" type="list" label="GenBank Files collection" >
             <discover_datasets pattern="(?P&lt;name&gt;.*).gb" format="genbank" directory="outdir" />
         </collection>
+        <data name='report' format='txt' label='missing fragments' />
     </outputs>
     <tests>
     <!--manual parameters-->
@@ -121,6 +123,11 @@
                     </assert_contents>
                 </element>
             </output_collection>
+            <output name='report'>
+                <assert_contents>
+                    <has_n_lines n="0" />
+                </assert_contents>
+            </output>
         </test>
         <!--JSON parameters-->
         <test> 
@@ -191,6 +198,96 @@
                     </assert_contents>
                 </element>
             </output_collection>
+            <output name='report'>
+                <assert_contents>
+                    <has_n_lines n="0" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--test missing fragments-->
+        <test> 
+            <param name="input" value="test_missing_input.csv" />
+            <conditional name="json_use">
+                <param name='use_json_paramers' value='false' />
+                <param name="table" value="sample" />
+                <param name="sequence_column" value="sequence" />
+                <param name="annotation_columns" value="annotation" />
+                <param name="fragment_column" value="fragment" />
+                <param name="db_uri" value="postgresql://postgres:RK17@localhost:5432/test_fragments_db" />
+            </conditional>
+            <output_collection name="output_gb" type="list" count="12">
+                <element name="part_A">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_B">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_C">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_D">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_E">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_F">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_G">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_H">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_I">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_J">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_K">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+                <element name="part_L">
+                    <assert_contents>
+                     <has_n_lines min="10" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name='report'>
+                <assert_contents>
+                    <has_n_lines n="6" />
+                    <has_line_matching expression="ACP10001AaCbbBS" />
+                    <has_line_matching expression="NEW20001BbDccKT" />
+                    <has_line_matching expression="XYZ10003AaCbbBS" />
+                    <has_line_matching expression="CFP10002AaCbbBS" />
+                    <has_line_matching expression="ALT30005CcEddLM" />
+                    <has_line_matching expression="QWE10004AaCbbBS" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     
--- a/test-data/test_missing_input.csv	Wed Jul 23 09:44:50 2025 +0000
+++ b/test-data/test_missing_input.csv	Wed Oct 15 12:33:41 2025 +0000
@@ -1,4 +1,8 @@
-Sample-1,ACP10001AaCbbBS,NEW20001BbDccKT,XYZ10003AaCbbBS
-Sample-2,CFP10002AaCbbBS,ACP10001AaCbbBS,ALT30005CcEddLM
-Sample-3,XYZ10003AaCbbBS,ALT30005CcEddLM,ACP10001AaCbbBS
-Sample-4,QWE10004AaCbbBS,NEW20001BbDccKT,CFP10002AaCbbBS
+Sample-1,ACP10001AaCbbBS,NEW20001BbDccKT,XYZ10003AaCbbBS,,,
+Sample-2,CFP10002AaCbbBS,ACP10001AaCbbBS,ALT30005CcEddLM,,,
+Sample-3,XYZ10003AaCbbBS,ALT30005CcEddLM,ACP10001AaCbbBS,,,
+Sample-4,QWE10004AaCbbBS,NEW20001BbDccKT,CFP10002AaCbbBS,,,
+construct_3,construct_1,part_L,part_J,part_K,,
+construct_4,construct_2,part_L,part_J,part_K,,
+construct_1,part_A,part_B,part_C,part_D,part_E,part_F
+construct_2,part_A,part_G,part_H,part_I,part_F,