Mercurial > repos > iuc > fastq_groupmerge

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_groupmerge.xml	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,229 @@
+<tool id="fastq_groupmerge" name="Fastq groupmerge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">25.0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">fastq-groupmerge</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+
+            mkdir 'output' 'samples' &&
+
+            #if $input.is_select == "pair":
+                #for $sample in $samples:
+                    ln -s '$sample.forward' 'samples/${sample.element_identifier}_forward.${sample.forward.ext}' &&
+                    ln -s '$sample.reverse' 'samples/${sample.element_identifier}_reverse.${sample.reverse.ext}' &&
+                #end for
+            #else:
+                #for $sample in $samples:
+                    ln -s '$sample' 'samples/$sample.element_identifier.${sample.ext}' &&
+                #end for
+            #end if
+
+            fastq_groupmerge.py
+            'samples'
+            'output'
+            #if $metadata:
+                --metadata '$metadata'
+                --group_col '$group_col'
+                #if $metadata.ext == "csv"
+                    --sep ","
+                #else
+                    --sep "\t"
+                #end if
+            #end if
+            #if $input.is_select == 'pair':
+                --forward_suffix '_forward'
+                --reverse_suffix '_reverse'
+            #else:
+                --single_reads
+            #end if
+
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="input">
+            <param name="is_select" type="select" label="Check type of fastq read library">
+                <option value="single">Single reads</option>
+                <option value="pair" selected="true">Paired reads</option>
+            </param>
+            <when value="single">
+                <param name="samples" type="data_collection" collection_type="list" format="fastq,fastq.gz" label="Input single sample(s) read(s)"/>
+                </when>
+            <when value="pair">
+                <param name="samples" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="Input paired sample(s) read(s) collection"/>
+            </when>
+        </conditional>
+        <param argument="--metadata" type="data" multiple="false" format="tabular,csv,tsv" optional="true" label="Metadata table file" help="Metadata file with first column sample name and another column with group ID. Multiple grouping is allowed, see the help section. If no metadata table is provided, this tool will merge all samples!"/>
+        <param argument="--group_col" type="text" value="group" label="Input the column name of the `group` column" help="The metadata file should contain two columns, one with the sample names and one with sample group ID. Use the same ID for samples that should be grouped. Look at the help section for more information!"/>
+    </inputs>
+    <outputs>
+        <collection name="merged_samples_pairs" type="list:paired" label="${tool.name} on ${on_string}: Merged samples (pairs)">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.fastq.gz" ext="fastq.gz" directory="output"/>
+            <filter>input['is_select'] == 'pair'</filter>
+        </collection>
+        <collection name="merged_samples_single" type="list" label="${tool.name} on ${on_string}: Merged samples (single)">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)\.fastq.gz" ext="fastq.gz" directory="output"/>
+            <filter>input['is_select'] == 'single'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A1">
+                            <collection type="paired">
+                                <element name="forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                                <element name="reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                            </collection>
+                        </element>
+                        <element name="B1">
+                            <collection type="paired">
+                                <element name="forward" value="B1_forward.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_1.csv" ftype="tabular"/>
+            <param name="group_col" value="TEST_COLUMN"/>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="2">
+                <element name="control">
+                    <element name="forward" value="control_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="control_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+                <element name="single">
+                    <element name="forward" value="single_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="single_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A2">
+                            <collection type="paired">
+                                <element name="forward" value="A2_R1.fastq" ftype="fastq"/>
+                                <element name="reverse" value="A2_R2.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                        <element name="B2">
+                            <collection type="paired">
+                                <element name="forward" value="B2_R1.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B2_R2.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_2.csv" ftype="csv"/>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="1">
+                <element name="treatment">
+                    <element name="forward" value="treatment_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="treatment_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="single"/>
+                <param name="samples">
+                    <collection type="list">
+                        <element name="A1_forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                        <element name="A1_reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                        <element name="B1_forward" value="B1_forward.fastq" ftype="fastq"/>
+                        <element name="B1_reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_single.csv" ftype="csv"/>
+            <output_collection name="merged_samples_single" type="list" count="1">
+                    <element name="Test" value="Test.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A1">
+                            <collection type="paired">
+                                <element name="forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                                <element name="reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                            </collection>
+                        </element>
+                        <element name="B1">
+                            <collection type="paired">
+                                <element name="forward" value="B1_forward.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="1">
+                <element name="merged">
+                    <element name="forward" value="merged_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="merged_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+            **What does this tool**
+
+            This tool is designed to group sample fastq reads together based on a grouping defined in a metadata file.
+            This tool can be used to support grouped-assembly. In some cases you want to group them in multiple ways. E.g. merge technical replicas but also merge samples from similar samples (e.g. all from the gut). To this end you can provide multiple groupings.
+
+            **Input**
+
+            - A collection of pair reads which can be in fastq or fastq format
+            - OPTIONAL BUT RECOMMENDED: a metadata file either tab separated in format: tabular/tsv or comma-separated in format: csv
+
+            The metadata file can look look like this for example:
+
+            .. metadata table::
+
+                sample_id,group
+                A1,control
+                B1,control
+                A1,A1
+                Test,
+                ,Test
+
+            Important to this:
+
+            - The metadata file required to have a column sample_id with sample names (this are the pair name for example 'A1' is the pair collection name so 'A1' has to be written in the sample_id column) when using the pair collection option.
+            - The column 'group' can be called anything. All samples with the same ID will be merged together in the output file. In the example file the output 'control_forward.fastq.gz' will contain the forward reads from 'A1' and 'B1'
+            - When there is a empty entry in any column this line will be ignored!
+            - When using the single read option note that in the 'sample_id' column the file name has to be stated completely therefore as example for input 'test_read.fastq' a line in the metadata table has to be 'test_read'
+            - If metadata file is given only the sample reads stated in this file will be taken into account so you can also add the collection where other sample reads in this collection, they will be ignored if there are not stated in the metadata file!
+
+            **Output**
+
+            - For each group stated in the 'group' column a forward file [{group_name}_{forward_suffix}.fastq.gz] and a reverse file [{group_name}_{reverse_suffix}.fastq.gz] will be created
+            - When no metadata is given all inputs which match to the 'forward_suffix' and 'reverse_suffix' will be merged together into one file each for forward and reverse!
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="bibtex">@misc{BibEntry2025Oct,
+            title = {{fastq-groupmerge}},
+            author = {Santino Faack (SantaMcCloud)},
+            journal = {GitHub},
+            year = {2025},
+            month = oct,
+            url = {https://github.com/SantaMcCloud/fastq-groupmerge}
+        }</citation>
+    </citations>
+</tool>
Binary file test-data/A1_R1.fastq.gz has changed
Binary file test-data/A1_R2.fastq.gz has changed
Binary file test-data/A1_forward.fastq.gz has changed
Binary file test-data/A1_reverse.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A2_R1.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R226/1
+GGTTTACCAATTGAAAATGCAATTCAAAAATTAGGTGTCAATCGTAAAGAAATGCCGACATACGAATTTAGAGCACTTTGTGAGAAATATGCGCGTGAACAAGTTGCAATTCAAATCGGTGATTTTAAATCGTTAGGTAAAAGTGGGGAT
++
+DAAGGGCGBIEHHKKGHKGKFKKKJGJKIGIIJJ@KHDHHKIJE@GGE=IKCEJKJKEEEEIG8EGJEICJGEE0EIE;3$HCDEA6EEDEF?E$DCE4<@$EEEEEEDEEDEEED$E)D@FDCDDEECEEEEEDCE=$;$EDECA;C4:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/A2_R2.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R226/2
+ATGCCCTTCATCGCCATTACCGCAAATACACGAATTTGTGTTGCTTCAAAATCTTTATTGAGTGTTAAATAAGGATTCTTATAATCCCCAATTTTACATAACTATTTAAAATAAGCCATTTGAATTGCAATTTATTTATGAGCAGATTGC
++
+<CAGGG@GIIIIEJKK9J$CK=KHKHKCKKJKIKJGKJKICJBFAJ=BA>CJA:<0EJADIGCAEIE@EEEE:G$BE=EEBEEDEEEBEEEBDFEEE$9CDE$EE$E@;B$C$EEEAC;E6E1DEECEED$;C$E$$EA;$BE5$D$@$=
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B1_forward.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R15658/1
+AATGGGAAGATGATTTTAGGGAATATCTCAAAATGCTGGATGAGACTAAACCTGTAGTCTTATGTGGGGACTTAAGCGTCGCTCATAAAGAGATTGACTTGAAAAATCCTTCAGCGAATCGTAAAAACCCTGGCTTTAGTTATCAAGAAC
++
+AADGGGEEIIHII<JCKKJKKKFFJFJKIJIJKKKII@JHKKKDKEEJKJJHEKJK$CIEH>JEKGIEGDGE@EE$K$EIEEEADC?DEEE?EEEECDC1E9EEEDEEDEE=)CCCEDE1BC??FE7?7CAEDEDAAC3E$EE$;;CEE$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B1_reverse.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R11796/2
+AACCCCGTTGCTCACGACGGAATCGTTGTAAGTGATCTTTCTTAAGTTTTGTAATATTTTGACCAGTTACACCAATGGTAAGGCTGGTCACCGAGTCAATTGTGGAAATGCCATTAAGAAGCGTCGTTTTACCAGAACCTTATGGTCCCA
++
+ADD>E$3E=I=?IHHJKKKJK$GKKFHKJGFKKK*KK:FKJGKCJCIH8I:KHDKDF>B$DEK<B$848DKEFIGD;EG@)$:ECGGE6?EDEEEEEE$$:FEEEDA$$??$?BEEAECCFAEE=DD4@D6CDEBCAECB$EC9$=EA$;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B2_R1.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R6353/1
+ATATCCAAAATCTTCATACACGTTGTATTCAAGAGTTTAATAAACAAATTTATTTGGTGGGGGGCGTTGACCGCGCATGTAATTTCGATGCATTTACGGTTGGTCACTCAATGCCAATCAACGAATCACTTTAAAAAATAGAAAGTGCAG
++
+DDDGGGGGHDBHGK;KKJJJJKDHKJFJKJHEIHJBJIJKJJEJCEEKJJBKHDCCJEIEK8FDD?EJEIG:E:FEC,DHEEGCE@EEE@-DAEEEE'DCEADCC=EE?AEEED;EEE==E?EEE?CE@EE;$@DBCB$DDDCB$D$E:$
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/B2_R2.fastq	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,4 @@
+@S0R15658/2
+CGAAGTAATCAATTCTCCACCCTATGTTTCGACTACGTGCATTGGCATGGTAACTCCACCAGGCATAACGATCGATTGCGTTTGGATGTAATTCACGGAACGAATCACTAAAGCCAGCATCTGAAAGATTGGTGAACTGTTCACGTTATG
++
+D8DEEGGGIDIGIJGKKK@KICKHG@JKKGKKGK@JG=KEEKICEJK$EKKIKJKCJEKIHKG$EGEJIEA7EECEE6EEIEDGCC??E=FE:EEED3EBEEC)EEE$EDEE$AECEEED(E$=?D$EEE$@DBC5$:;EE$DEBA=$?$
Binary file test-data/Test.fastq.gz has changed
Binary file test-data/control_forward.fastq.gz has changed
Binary file test-data/control_reverse.fastq.gz has changed
Binary file test-data/merged_forward.fastq.gz has changed
Binary file test-data/merged_reverse.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata_1.csv	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,6 @@
+sample_id	TEST_COLUMN
+A1	control
+B1	control
+A1	single
+Test
+	Test
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata_2.csv	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,3 @@
+sample_id,group
+A2,treatment
+B2,treatment
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/metadata_single.csv	Sun Nov 09 22:44:31 2025 +0000
@@ -0,0 +1,5 @@
+sample_id,group
+A1_forward,Test
+A1_reverse,Test
+B1_reverse,Test
+B1_forward,Test
Binary file test-data/single_forward.fastq.gz has changed
Binary file test-data/single_reverse.fastq.gz has changed
Binary file test-data/treatment_forward.fastq.gz has changed
Binary file test-data/treatment_reverse.fastq.gz has changed