Mercurial > repos > iuc > hicstuff_pipeline
view hicstuff_pipeline.xml @ 1:6956f0783d77 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicstuff commit c1b3c8d3e6a2f18ae89918e89d8dc8a3a0a6d24c
author | iuc |
---|---|
date | Fri, 04 Oct 2024 08:48:01 +0000 |
parents | 1efd17d2bfdb |
children | 7beecdd0976f |
line wrap: on
line source
<tool id="hicstuff_pipeline" name="hicstuff full pipeline" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>generates a Hi-C contact matrix</description> <macros> <import>macros.xml</import> <token name="@VERSION_SUFFIX@">1</token> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ hicstuff pipeline --genome '$genome' --outdir results --aligner $aligner $circular $duplicates --enzyme '$enzyme' $filter --mapping $mapping --matfmt $matfmt --quality-min $quality_min --size $size --threads "\${GALAXY_SLOTS:-1}" #if $paired_cond.paired_select == "paired" '$paired_cond.reads.forward' '$paired_cond.reads.reverse' #else '$forward_reads' '$reverse_reads' #end if ]]></command> <inputs> <param type="data" name="genome" format="fasta,fasta.gz" label="Genome fasta file"/> <conditional name="paired_cond"> <param name="paired_select" type="select" label="Paired reads"> <option value="paired">In a dataset pair</option> <option value="separate">In two separate datasets</option> </param> <when value="paired"> <param name="reads" type="data_collection" collection_type="paired" format="fastqsanger,fastqsanger.gz" label="Paired reads"/> </when> <when value="separate"> <param name="forward_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Forward reads"/> <param name="reverse_reads" type="data" format="fastqsanger,fastqsanger.gz" label="Reverse reads"/> </when> </conditional> <param argument="--aligner" type="select" label="Alignment software to use" help="Minimap2 should only be used for reads > 100 bp"> <option value="bowtie2" selected="true">bowtie2</option> <option value="minimap2">minimap2</option> <option value="bwa">bwa</option> </param> <param argument="--circular" type="boolean" truevalue="--circular" falsevalue="" label="Circular genome"/> <param argument="--duplicates" type="boolean" truevalue="--duplicates" falsevalue="" label="Removes PCR duplicates" help="PCR duplicates are defined as sets of pairs having identical mapping positions for both reads."/> <param argument="--enzyme" type="text" value="5000" label="Bin size or enzyme" help="Restriction enzyme or 'mnase' if a string, or chunk size (i.e. resolution) if a number. Can also be multiple comma-separated enzymes."/> <param argument="--filter" type="boolean" truevalue="--filter" falsevalue="" label="Filters out spurious 3C events, such as self religations or undigested fragments" help="This is only really useful at very fine resolutions (1-2kb) and not needed most of the time. This option is only meaningful when --enzyme is given a restriction enzyme and not a bin size."/> <param argument="--mapping" type="select" label="Parameter of mapping" help="'normal': Directly map reads without any process. 'iterative': Map reads iteratively using iteralign, by truncating reads to 20bp and then repeatedly extending to align them. 'cutsite': Cut reads at the religation sites of the given enzyme using cutsite, create new pairs of reads and then align them ; enzyme is required"> <option value="normal" selected="true">normal</option> <option value="iterative">iterative</option> <option value="cutsite">cutsite</option> </param> <param argument="--matfmt" type="select" label="Format of the output sparse matrix" help="Available formats are bg2 (bedgraph2d), graal (graal-compatible plain text COO format) and cool, a binary format that is probably the most appropriate for large genomes."> <option value="bg2">bg2</option> <option value="cool">cool</option> <option value="graal" selected="true">graal</option> </param> <param argument="--quality-min" type="integer" value="30" label="Minimum mapping quality for selecting contacts"/> <param argument="--size" type="integer" value="0" label="Minimum size threshold to consider contigs. Keep all contigs by default."/> </inputs> <outputs> <data name="abs_fragments_contacts_weighted_graal" from_work_dir="./results/abs_fragments_contacts_weighted.txt" format="tabular" label="${tool.name} on ${on_string}: Contact matrix (graal)"> <filter>matfmt == "graal"</filter> </data> <data name="abs_fragments_contacts_weighted_bg2" from_work_dir="./results/abs_fragments_contacts_weighted.bg2" format="tabular" label="${tool.name} on ${on_string}: Contact matrix (bg2)"> <filter>matfmt == "bg2"</filter> </data> <data name="abs_fragments_contacts_weighted_cool" from_work_dir="./results/abs_fragments_contacts_weighted.cool" format="cool" label="${tool.name} on ${on_string}: Contact matrix (cool)"> <filter>matfmt == "cool"</filter> </data> <data name="fragments_list" from_work_dir="./results/fragments_list.txt" format="tabular" label="${tool.name} on ${on_string}: Fragment list"/> <data name="info_contigs" from_work_dir="./results/info_contigs.txt" format="tabular" label="${tool.name} on ${on_string}: Contig info"/> </outputs> <tests> <test expect_num_outputs="3"> <param name="genome" value="seq.fa.gz" /> <param name="paired_cond|paired_select" value="separate"/> <param name="paired_cond|forward_reads" value="sample.reads_for.fastq.gz" /> <param name="paired_cond|reverse_reads" value="sample.reads_rev.fastq.gz" /> <output name="abs_fragments_contacts_weighted_graal"> <assert_contents> <has_n_lines n="77"/> <has_n_columns n="3"/> </assert_contents> </output> <output name="fragments_list"> <assert_contents> <has_n_lines n="17"/> <has_n_columns n="6"/> </assert_contents> </output> <output name="info_contigs" file="info_contigs.txt"/> <assert_stderr> <has_text text="Contact map generated" /> </assert_stderr> </test> <!-- bg2 format --> <test expect_num_outputs="3"> <param name="genome" value="seq.fa.gz" /> <param name="paired_cond|paired_select" value="separate"/> <param name="paired_cond|forward_reads" value="sample.reads_for.fastq.gz" /> <param name="paired_cond|reverse_reads" value="sample.reads_rev.fastq.gz" /> <param name="matfmt" value="bg2"/> <output name="abs_fragments_contacts_weighted_bg2"> <assert_contents> <has_n_lines n="76"/> <has_n_columns n="7"/> </assert_contents> </output> <output name="fragments_list"> <assert_contents> <has_n_lines n="17"/> <has_n_columns n="6"/> </assert_contents> </output> <output name="info_contigs" file="info_contigs.txt"/> <assert_stderr> <has_text text="Contact map generated" /> </assert_stderr> </test> <!-- cool format --> <test expect_num_outputs="3"> <param name="genome" value="seq.fa.gz" /> <param name="paired_cond|paired_select" value="separate"/> <param name="paired_cond|forward_reads" value="sample.reads_for.fastq.gz" /> <param name="paired_cond|reverse_reads" value="sample.reads_rev.fastq.gz" /> <param name="matfmt" value="cool"/> <output name="abs_fragments_contacts_weighted_cool"> <assert_contents> <has_h5_keys keys="bins,chroms,indexes,pixels"/> </assert_contents> </output> <output name="fragments_list"> <assert_contents> <has_n_lines n="17"/> <has_n_columns n="6"/> </assert_contents> </output> <output name="info_contigs" file="info_contigs.txt"/> <assert_stderr> <has_text text="Contact map generated" /> </assert_stderr> </test> </tests> <help><![CDATA[ hicstuff is a toolkit to generate and manipulate Hi-C matrices. The "hicstuff full pipeline" tool generates a Hi-C contact matrix. Output files can be used with instaGRAAL downstream. ----------- Input files ----------- * the fasta genome file * forward reads * reverse reads ------------ Output files ------------ * Contact matrix: Sparse matrix file with 3 columns the rows, column and values of nonzero pixels. The first row contains the shape and total number of nonzero pixels in the matrix. * Fragment ist: Contains genomic coordinates of the matrix bins (row/columns). * Contig info.txt: Contains chromosome names, theirs length and number of bins. ]]></help> <expand macro="citations" /> </tool>