Mercurial > repos > iuc > hicstuff_pipeline

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hicstuff_pipeline.xml	Fri Nov 25 11:32:55 2022 +0000
@@ -0,0 +1,107 @@
+<tool id="hicstuff_pipeline" name="hicstuff full pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>generates a Hi-C contact matrix</description>
+    <macros>
+        <import>macros.xml</import>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+hicstuff pipeline
+    --genome '$genome'
+    --outdir results
+    --aligner $aligner
+    $circular
+    $duplicates
+    --enzyme '$enzyme'
+    $filter
+    --mapping $mapping
+    --matfmt $matfmt
+    --quality-min $quality_min
+    --size $size
+    --threads \${GALAXY_SLOTS:-1}
+    #if $paired_cond.paired_select == "paired"
+        '$paired_cond.reads.forward'
+        '$paired_cond.reads.reverse'
+    #else
+        '$forward_reads'
+        '$reverse_reads'
+    #end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="genome" format="fasta,fasta.gz" label="Genome fasta file"/>
+        <conditional name="paired_cond">
+            <param name="paired_select" type="select" label="Paired reads">
+                <option value="paired">In a dataset pair</option>
+                <option value="separate">In two separate datasets</option>
+            </param>
+            <when value="paired">
+                <param name="reads" type="data_collection" collection_type="paired" format="fastqsanger,fastqsanger.gz" label="Paired reads"/>
+            </when>
+            <when value="separate">
+                <param name="forward_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads"/>
+                <param name="reverse_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads"/>
+            </when>
+        </conditional>
+        <param argument="--aligner" type="select" label="Alignment software to use" help="Minimap2 should only be used for reads > 100 bp">
+            <option value="bowtie2" selected="true">bowtie2</option>
+            <option value="minimap2">minimap2</option>
+            <option value="bwa">bwa</option>
+        </param>
+        <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" label="Circular genome"/>
+        <param argument="--duplicates" type="boolean" truevalue="--duplicates" falsevalue="" label="Removes PCR duplicates" help="PCR duplicates are defined as sets of pairs having identical mapping positions for both reads."/>
+        <param argument="--enzyme" type="text" value="5000" label="Bin size or enzyme" help="Restriction enzyme or 'mnase' if a string, or chunk size (i.e. resolution) if a number. Can also be multiple comma-separated enzymes."/>
+        <param argument="--filter" type="boolean" truevalue="--filter" falsevalue="" label="Filters out spurious 3C events, such as self religations or undigested fragments" help="This is only really useful at very fine resolutions (1-2kb) and not needed most of the time. This option is only meaningful when --enzyme is given a restriction enzyme and not a bin size."/>
+        <param argument="--mapping" type="select" label="Parameter of mapping" help="'normal': Directly map reads without any process. 'iterative': Map reads iteratively using iteralign, by truncating reads to 20bp and then repeatedly extending to align them. 'cutsite': Cut reads at the religation sites of the given enzyme using cutsite, create new pairs of reads and then align them ; enzyme is required">
+            <option value="normal" selected="true">normal</option>
+            <option value="iterative">iterative</option>
+            <option value="cutsite">cutsite</option>
+        </param>
+        <param argument="--matfmt" type="select" label="Format of the output sparse matrix" help="Available formats are bg2 (bedgraph2d), graal (graal-compatible plain text COO format) and cool, a binary format that is probably the most appropriate for large genomes.">
+            <option value="bg2">bg2</option>
+            <option value="cool">cool</option>
+            <option value="graal" selected="true">graal</option>
+        </param>
+        <param argument="--quality-min" type="integer" value="30" label="Minimum mapping quality for selecting contacts"/>
+        <param argument="--size" type="integer" value="0" label="Minimum size threshold to consider contigs. Keep all contigs by default."/>
+    </inputs>
+    <outputs>
+        <data name="abs_fragments_contacts_weighted" from_work_dir="./results/abs_fragments_contacts_weighter.txt" format="tabular"/>
+        <data name="fragments_list" from_work_dir="./results/fragments_list.txt" format="tabular"/>
+        <data name="info_contigs" from_work_dir="./results/info_contigs.txt" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="genome" value="seq.fa.gz" />
+            <param name="paired_cond|paired_select" value="separate"/>
+            <param name="paired_cond|forward_reads" value="sample.reads_for.fastq.gz" />
+            <param name="paired_cond|reverse_reads" value="sample.reads_rev.fastq.gz" />
+            <output name="info_contigs" file="info_contigs.txt"/>
+            <assert_stderr>
+                <has_text text="Contact map generated" />
+            </assert_stderr>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+hicstuff is a toolkit to generate and manipulate Hi-C matrices.
+
+The "hicstuff full pipeline" tool generates a Hi-C contact matrix.
+Output files can be used with instaGRAAL downstream.
+
+-----------
+Input files
+-----------
+* the fasta genome file
+* forward reads
+* reverse reads
+
+------------
+Output files
+------------
+* abs_fragments_contacts_weighter.txt: Sparse matrix file with 3 columns the rows, column and values of nonzero pixels. The first row contains the shape and total number of nonzero pixels in the matrix.
+* fragments_list.txt: Contains genomic coordinates of the matrix bins (row/columns).
+* info_contigs.txt: Contains chromosome names, theirs length and number of bins.
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Nov 25 11:32:55 2022 +0000
@@ -0,0 +1,34 @@
+<macros>
+    <token name="@TOOL_VERSION@">3.1.5</token>
+    <token name="@PROFILE@">21.09</token>
+    <xml name="requirements">
+        <requirements>
+        <requirement type="package" version="3.1.5">hicstuff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.5281/zenodo.4066363</citation>
+            <citation type="bibtex">
+@software{cyril_matthey_doret_2020_4066351,
+  author       = {Cyril Matthey-Doret and
+                  Lyam Baudry and
+                  Amaury Bignaud and
+                  Axel Cournac and
+                  Remi-Montagne and
+                  Nadège Guiglielmoni and
+                  Théo Foutel-Rodier and
+                  Vittore F. Scolari},
+  title        = {hicstuff: Simple library/pipeline to generate and handle Hi-C data },
+  month        = oct,
+  year         = 2020,
+  publisher    = {Zenodo},
+  version      = {v2.3.1},
+  doi          = {10.5281/zenodo.4066351},
+  url          = {http://doi.org/10.5281/zenodo.4066363}
+}</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/info_contigs.txt	Fri Nov 25 11:32:55 2022 +0000
@@ -0,0 +1,3 @@
+contig	length	n_frags	cumul_length
+seq1	60000	12	0
+seq2	20000	4	12
Binary file test-data/sample.reads_for.fastq.gz has changed
Binary file test-data/sample.reads_rev.fastq.gz has changed
Binary file test-data/seq.fa.gz has changed