Mercurial > repos > iuc > yahs
diff yahs.xml @ 0:d87433f2a54d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yahs commit 92823b21835509dde34557b6922e1b9a39311069
author | iuc |
---|---|
date | Wed, 29 Jun 2022 13:06:23 +0000 |
parents | |
children | a3a92e30a727 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yahs.xml Wed Jun 29 13:06:23 2022 +0000 @@ -0,0 +1,227 @@ +<tool id="yahs" name="YAHS" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> + <description>yet another HI-C scaffolding tool</description> + <macros> + <token name="@VERSION@">1.1a.2</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@VERSION@">yahs</requirement> + <requirement type="package" version="1.11">samtools</requirement> + <requirement type="package" version="3.9">python</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $function.function_select == "yahs": + ln -s '$function.fasta' input.fasta && + #if $function.bfile.ext == "bam": + ln -s '$function.bfile' input.bam && + #else if $function.bfile.ext == "bed": + ln -s '$function.bfile' input.bed && + #end if + #if $function.agp: + ln -s '$function.agp' input.agp && + #end if + samtools faidx input.fasta && + mkdir initial_break agp_out agp_break final_outs && + yahs input.fasta + #if $function.bfile.ext == "bam": + input.bam + #else if $function.bfile.ext == "bed": + input.bed + #end if + #if $agp: + -a input.agp + #end if + #if $function.res: + -r $function.res + #end if + #if $function.enzyme_conditional.enzyme_options == 'preconfigured': + #if $function.enzyme_conditional.preconfigured_enzymes == 'dovetail' + -e 'GATC' + #else if $function.enzyme_conditional.preconfigured_enzymes == 'arima1' + -e 'GATC,GANTC' + #else + -e 'GATC,GANTC,CTNAG,TTAA' + #end if + #else if $function.enzyme_conditional.enzyme_options == 'specific': + -e '${function.enzyme_conditional.manual_enzyme}' + #end if + #if $function.length: + -l $function.length + #end if + #if $function.quality: + -q $function.quality + #end if + $function.no_contig_ec + $function.no_scaffold_ec + -o yahs_out && + ls -lah && + python '$__tool_directory__/move_files.py' + && ls -lah ./* + #else: + ln -s '$function.fasta' input.fasta && + ln -s '$function.agp' input.agp && + agp_to_fasta input.agp input.fasta + #if $function.length: + -l $function.length + #end if + -o output.fasta + #end if + ]]></command> + <inputs> + <conditional name="function"> + <param name="function_select" type="select" label="Which function would you like to run?"> + <option value="yahs">YAHS</option> + <option value="agp_to_fasta">AGP to fasta</option> + </param> + <when value="yahs"> + <param name="fasta" type="data" format="fasta" label="Input contig sequences"/> + <param name="bfile" type="data" format="bam,bed" label="Alignment file of Hi-C reads to contigs"/> + <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)" + help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/> + <param name="res" argument="-r" type="text" label="Resolutions" optional="true" + help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size"/> + <conditional name="enzyme_conditional"> + <param name="enzyme_options" type="select" label="Restriction enzyme used in Hi-C experiment" help="Hi-C experiments can use different restriction enzymes. + The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual + sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you + use an enzyme-free prep, e.g. Omin-C."> + <option value="not_especified">Not especified</option> + <option value="preconfigured">Preconfigured restriction enzymes</option> + <option value="specific">Enter a specific sequence</option> + </param> + <when value="not_especified"/> + <when value="preconfigured"> + <param name="preconfigured_enzymes" argument="-e" type="select" label="Preconfigured enzymes"> + <option value="dovetail">Dovetail Chicago, Dovetail Hi-C or Phase: GATC</option> + <option value="arima1">Arima Hi-C 1.0: GATC, GANTC</option> + <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option> + </param> + </when> + <when value="specific"> + <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)" + help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> + <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> + </param> + </when> + </conditional> + <param name="length" argument="-l" type="integer" label="Minimum contig length included for scaffolding" min="1" optional="true"/> + <param name="quality" argument="-q" type="integer" label="Minimum read mapping quality (for BAM input only)" min="1" optional="true"/> + <param argument="--no-contig-ec" type="boolean" label="Skip assembly error correction step" truevalue="--no-contig-ec" falsevalue="" help="Automatically active if AGP output is set"/> + <param argument="--no-scaffold-ec" type="boolean" label="Skip assembly/scaffolding error check each round" truevalue="--no-scaffold-ec" falsevalue="" help="Will also prevent any break.agp output files"/> + </when> + <when value="agp_to_fasta"> + <param name="agp" type="data" format="agp" label="Input AGP file"/> + <param name="fasta" type="data" format="fasta" label="Contig fasta file"/> + <param name="length" type='integer' label="Output fasta line length" value="60" min="1"/> + </when> + </conditional> + </inputs> + <outputs> + <!-- INITIAL ROUNDS --> + <collection name="initial_agp_break" type="list" label="${tool.name} on ${on_string}: AGP initial break files"> + <filter>function['function_select'] == "yahs"</filter> + <discover_datasets pattern="__name_and_ext__" directory="initial_break" ext="agp"/> + </collection> + <collection name="agp_break" type="list" label="${tool.name} on ${on_string}: AGP break files"> + <filter> function['function_select'] == "yahs"</filter> + <discover_datasets pattern="__name_and_ext__" directory="agp_break" ext="agp"/> + </collection> + <collection name="agp_out" type="list" label="${tool.name} on ${on_string}: AGP scaffolding results files"> + <filter>function['function_select'] == "yahs"</filter> + <discover_datasets pattern="__name_and_ext__" directory="agp_out" ext="agp"/> + </collection> + <!-- FINAL FILES --> + <data format="agp" name="final_agp_out" label="${tool.name} on ${on_string}: Final scaffolds agp output" from_work_dir="final_outs/yahs_out_scaffolds_final.agp"> + <filter>function['function_select'] == "yahs"</filter> + </data> + <data format="fasta" name="final_fasta_out" label="${tool.name} on ${on_string}: Final scaffolds fasta output" from_work_dir="final_outs/yahs_out_scaffolds_final.fa"> + <filter>function['function_select'] == "yahs"</filter> + </data> + <data format="fasta" name="fasta_from_agp" label="${tool.name} on ${on_string}: agp to fasta" from_work_dir="output.fasta"> + <filter>function['function_select'] == "agp_to_fasta"</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="5"> + <conditional name="function"> + <param name="function_select" value="yahs"/> + <param name="fasta" value="test.fasta"/> + <param name="bfile" value="test.bed"/> + <param name="agp" value="test.agp"/> + <param name="res" value="50000,100000,150000,2000000,1000000"/> + </conditional> + <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/> + <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/> + <!-- COMMAND: yahs test.fasta test.bed -r 50000,100000,150000,2000000,1000000 -a test.agp -o test_1 --> + </test> + <test expect_num_outputs="5"> + <conditional name="function"> + <param name="function_select" value="yahs"/> + <param name="fasta" value="test.fasta"/> + <param name="bfile" value="test.bed"/> + <param name="no_contig_ec" value="--no-contig-ec"/> + <param name="no_scaffold_ec" value="--no-scaffold-ec"/> + </conditional> + <output name="final_agp_out" file="test_02_scaffolds_final.agp" ftype="agp"/> + <output name="final_fasta_out" file="test_02_scaffolds_final.fa" ftype="fasta"/> + <output_collection name="agp_break"> + <element name="yahs_out_no_break" file="test_02_no_break.agp" ftype="agp"/> + </output_collection> + <!-- COMMAND: yahs test.fasta test.bed -\-no-contig-ec -\-no-scaffold-ec -o test_2 --> + </test> + <test expect_num_outputs="5"> + <conditional name="function"> + <param name="function_select" value="yahs"/> + <param name="fasta" value="test.fasta"/> + <param name="bfile" value="test.bam"/> + <param name="qual" value="10"/> + <param name="length" value="20"/> + <conditional name="enzyme_conditional"> + <param name="enzyme_options" value="specific"/> + <param name="manual_enzyme" value="GATC,GANT"/> + </conditional> + </conditional> + <output name="final_agp_out" file="test_03_scaffolds_final.agp" ftype="agp"/> + <output name="final_fasta_out" file="test_03_scaffolds_final.fa" ftype="fasta"/> + <output_collection name="initial_agp_break"> + <element name="yahs_out_inital_break_01" file="test_03_initial_break.agp" ftype="agp"/> + </output_collection> + <!-- COMMAND: yahs test.fasta test.bed -q 10 -l 20 -e 'GATC,GANT' -o test_3 --> + </test> + <test expect_num_outputs="5"> + <conditional name="function"> + <param name="function_select" value="yahs"/> + <param name="fasta" value="test.fasta"/> + <param name="bfile" value="test.bam"/> + <param name="qual" value="10"/> + <param name="length" value="20"/> + <conditional name="enzyme_conditional"> + <param name="enzyme_options" value="preconfigured"/> + <param name="preconfigured_enzymes" value="arima1"/> + </conditional> + </conditional> + <output name="final_agp_out" file="test_03_scaffolds_final.agp" ftype="agp"/> + <output name="final_fasta_out" file="test_03_scaffolds_final.fa" ftype="fasta"/> + <output_collection name="initial_agp_break"> + <element name="yahs_out_inital_break_01" file="test_04_initial_break.agp" ftype="agp"/> + </output_collection> + <!-- COMMAND: yahs test.fasta test.bed -q 10 -l 20 -e 'GATC,GANT' -o test_3 --> + </test> + <test expect_num_outputs="1"> + <conditional name="function"> + <param name="function_select" value="agp_to_fasta"/> + <param name="fasta" value="test.fasta"/> + <param name="agp" value="test.agp"/> + <param name="length" value="20"/> + </conditional> + <output name="fasta_from_agp" file="test_05.fasta" ftype="fasta"/> + <!-- COMMAND: agp_to_fasta teast.fasta test.agp -l 20 -o test_4 --> + </test> + </tests> + <help><![CDATA[ + YaHS is scaffolding tool using Hi-C data. It relies on a new algothrim for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data. + ]]></help> + <citations> + <citation type="doi">10.5281/zenodo.5848772</citation> + </citations> +</tool>