Mercurial > repos > iuc > yahs
view yahs.xml @ 3:39495e107274 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/yahs commit 968382ccb84765d3e8a6e67c43f095a8cc5ab8f5
author | iuc |
---|---|
date | Wed, 28 Sep 2022 09:41:02 +0000 |
parents | fc925f53cae7 |
children | 425e63adcc5d |
line wrap: on
line source
<tool id="yahs" name="YAHS" version="@VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> <description>yet another HI-C scaffolding tool</description> <macros> <token name="@VERSION@">1.2a.2</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <requirements> <requirement type="package" version="@VERSION@">yahs</requirement> <requirement type="package" version="1.11">samtools</requirement> <requirement type="package" version="3.9">python</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #if $function.function_select == "yahs": ln -s '$function.fasta' input.fasta && #if $function.bfile.ext == "bam": ln -s '$function.bfile' input.bam && #else if $function.bfile.ext == "bed": ln -s '$function.bfile' input.bed && #end if #if $function.agp: ln -s '$function.agp' input.agp && #end if samtools faidx input.fasta && mkdir initial_break agp_out agp_break final_outs && yahs --no-mem-check input.fasta #if $function.bfile.ext == "bam": input.bam #else if $function.bfile.ext == "bed": input.bed #end if #if $agp: -a input.agp #end if #if $function.res: -r $function.res #end if #if $function.enzyme_conditional.enzyme_options == 'preconfigured': #if $function.enzyme_conditional.preconfigured_enzymes == 'dovetail' -e 'GATC' #else if $function.enzyme_conditional.preconfigured_enzymes == 'arima1' -e 'GATC,GANTC' #else -e 'GATC,GANTC,CTNAG,TTAA' #end if #else if $function.enzyme_conditional.enzyme_options == 'specific': -e '${function.enzyme_conditional.manual_enzyme}' #end if #if $function.length: -l $function.length #end if #if $function.quality: -q $function.quality #end if $function.no_contig_ec $function.no_scaffold_ec -o yahs_out #if $log_out 2> output.log #end if && ls -lah && python '$__tool_directory__/move_files.py' && ls -lah ./* #else: ln -s '$function.fasta' input.fasta && ln -s '$function.agp' input.agp && agp_to_fasta input.agp input.fasta #if $function.length: -l $function.length #end if -o output.fasta #if $log_out: 2> output.log #end if #end if ]]></command> <inputs> <conditional name="function"> <param name="function_select" type="select" label="Which function would you like to run?"> <option value="yahs">YAHS</option> <option value="agp_to_fasta">AGP to fasta</option> </param> <when value="yahs"> <param name="fasta" type="data" format="fasta" label="Input contig sequences"/> <param name="bfile" type="data" format="bam,bed" label="Alignment file of Hi-C reads to contigs"/> <param name="agp" argument="-a" type="data" format="agp" optional="true" label="Input AGP file (for rescaffolding)" help="You can specify a AGP format file to ask YaHS to do scaffolding with the scaffolds in the AGP file as the start point"/> <param name="res" argument="-r" type="text" label="Resolutions" optional="true" help="Comma separated, ascending list of range of resolutions with no spaces. Ex. 50000,100000,200000,500000,1000000,2000000,5000000. By default and the upper limit is automatically adjusted with the genome size"/> <conditional name="enzyme_conditional"> <param name="enzyme_options" type="select" label="Restriction enzyme used in Hi-C experiment" help="Hi-C experiments can use different restriction enzymes. The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you use an enzyme-free prep, e.g. Omin-C."> <option value="not_specified">Not specified</option> <option value="preconfigured">Preconfigured restriction enzymes</option> <option value="specific">Enter a specific sequence</option> </param> <when value="not_specified"/> <when value="preconfigured"> <param name="preconfigured_enzymes" argument="-e" type="select" label="Preconfigured enzymes"> <option value="dovetail">Dovetail Chicago, Dovetail Hi-C or Phase: GATC</option> <option value="arima1">Arima Hi-C 1.0: GATC, GANTC</option> <option value="arima2">Arima Hi-C 2.0: GATC, GANTC, CTNAG, TTAA</option> </param> </when> <when value="specific"> <param name="manual_enzyme" argument="-e" type="text" label="Restriction enzyme sequence(s)" help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> </param> </when> </conditional> <param name="length" argument="-l" type="integer" label="Minimum contig length included for scaffolding" min="1" optional="true"/> <param name="quality" argument="-q" type="integer" label="Minimum read mapping quality (for BAM input only)" min="1" optional="true"/> <param argument="--no-contig-ec" type="boolean" label="Skip assembly error correction step" truevalue="--no-contig-ec" falsevalue="" help="Automatically active if AGP output is set"/> <param argument="--no-scaffold-ec" type="boolean" label="Skip assembly/scaffolding error check each round" truevalue="--no-scaffold-ec" falsevalue="" help="Will also prevent any break.agp output files"/> </when> <when value="agp_to_fasta"> <param name="agp" type="data" format="agp" label="Input AGP file"/> <param name="fasta" type="data" format="fasta" label="Contig fasta file"/> <param name="length" type='integer' label="Output fasta line length" value="60" min="1"/> </when> </conditional> <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> </inputs> <outputs> <!-- INITIAL ROUNDS --> <collection name="initial_agp_break" type="list" label="${tool.name} on ${on_string}: AGP initial break files"> <filter>function['function_select'] == "yahs"</filter> <discover_datasets pattern="__name_and_ext__" directory="initial_break" ext="agp"/> </collection> <collection name="agp_break" type="list" label="${tool.name} on ${on_string}: AGP break files"> <filter> function['function_select'] == "yahs"</filter> <discover_datasets pattern="__name_and_ext__" directory="agp_break" ext="agp"/> </collection> <collection name="agp_out" type="list" label="${tool.name} on ${on_string}: AGP scaffolding results files"> <filter>function['function_select'] == "yahs"</filter> <discover_datasets pattern="__name_and_ext__" directory="agp_out" ext="agp"/> </collection> <!-- FINAL FILES --> <data format="agp" name="final_agp_out" label="${tool.name} on ${on_string}: Final scaffolds agp output" from_work_dir="final_outs/yahs_out_scaffolds_final.agp"> <filter>function['function_select'] == "yahs"</filter> </data> <data format="fasta" name="final_fasta_out" label="${tool.name} on ${on_string}: Final scaffolds fasta output" from_work_dir="final_outs/yahs_out_scaffolds_final.fa"> <filter>function['function_select'] == "yahs"</filter> </data> <data format="fasta" name="fasta_from_agp" label="${tool.name} on ${on_string}: agp to fasta" from_work_dir="output.fasta"> <filter>function['function_select'] == "agp_to_fasta"</filter> </data> <data format="txt" name="log_file" from_work_dir="output.log" label="${tool.name} ${on_string}, log file"> <filter>log_out</filter> </data> </outputs> <tests> <!-- TEST 1 --> <test expect_num_outputs="5"> <conditional name="function"> <param name="function_select" value="yahs"/> <param name="fasta" value="test.fasta"/> <param name="bfile" value="test.bed"/> <param name="agp" value="test.agp"/> <param name="res" value="50000,100000,150000,2000000,1000000"/> </conditional> <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/> <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/> <!-- COMMAND: yahs test.fasta test.bed -r 50000,100000,150000,2000000,1000000 -a test.agp -o test_1 --> </test> <!-- TEST 2 --> <test expect_num_outputs="5"> <conditional name="function"> <param name="function_select" value="yahs"/> <param name="fasta" value="test.fasta"/> <param name="bfile" value="test.bed"/> <param name="no_contig_ec" value="--no-contig-ec"/> <param name="no_scaffold_ec" value="--no-scaffold-ec"/> </conditional> <output name="final_agp_out" file="test_02_scaffolds_final.agp" ftype="agp"/> <output name="final_fasta_out" file="test_02_scaffolds_final.fa" ftype="fasta"/> <output_collection name="agp_break"> <element name="yahs_out_no_break" file="test_02_no_break.agp" ftype="agp"/> </output_collection> <!-- COMMAND: yahs test.fasta test.bed -\-no-contig-ec -\-no-scaffold-ec -o test_2 --> </test> <!-- TEST 3 --> <test expect_num_outputs="6"> <conditional name="function"> <param name="function_select" value="yahs"/> <param name="fasta" value="test2.fasta"/> <param name="bfile" value="test2.bam"/> <param name="res" value="1000,2000,5000,10000,20000,50000,100000,200000,500000"/> <conditional name="enzyme_conditional"> <param name="enzyme_options" value="not_specified"/> </conditional> </conditional> <param name="log_out" value="yes"/> <output name="log_file" ftype="txt"> <assert_contents> <has_text text="[I::dump_links_from_bam_file] dumped 6399 read pairs from 17675 records: 6297 intra links + 102 inter links" /> </assert_contents> </output> <!-- COMMAND: yahs test.fasta test.bam -r 1000,2000,5000,10000,20000,50000,100000,200000,500000 -o test_3 --> </test> <!-- TEST 4 --> <test expect_num_outputs="5"> <conditional name="function"> <param name="function_select" value="yahs"/> <param name="fasta" value="test2.fasta"/> <param name="bfile" value="test2.bed"/> <param name="qual" value="10"/> <param name="length" value="20"/> <conditional name="enzyme_conditional"> <param name="enzyme_options" value="not_specified"/> </conditional> </conditional> <output name="final_agp_out" file="test_04_scaffolds_final.agp" ftype="agp"/> <output name="final_fasta_out" file="test_04_scaffolds_final.fa" ftype="fasta"/> <output_collection name="initial_agp_break"> <element name="yahs_out_inital_break_01" file="test_04_initial_break.agp" ftype="agp"/> </output_collection> </test> <!-- TEST 5 --> <test expect_num_outputs="1"> <conditional name="function"> <param name="function_select" value="agp_to_fasta"/> <param name="fasta" value="test.fasta"/> <param name="agp" value="test.agp"/> <param name="length" value="20"/> </conditional> <output name="fasta_from_agp" file="test_05.fasta" ftype="fasta"/> <!-- COMMAND: agp_to_fasta test.fasta test.agp -l 20 -o test_4 --> </test> <!-- TEST 6 --> <test expect_num_outputs="6"> <conditional name="function"> <param name="function_select" value="yahs"/> <param name="fasta" value="test.fasta"/> <param name="bfile" value="test.bed"/> <param name="agp" value="test.agp"/> <param name="res" value="50000,100000,150000,2000000,1000000"/> </conditional> <param name="log_out" value="yes"/> <output name="final_agp_out" file="test_01_scaffolds_final.agp" ftype="agp"/> <output name="final_fasta_out" file="test_01_scaffolds_final.fa" ftype="fasta"/> <output name="log_file" ftype="txt"> <assert_contents> <has_text text="[I::main] CMD: yahs"/> </assert_contents> </output> <!-- COMMAND: yahs test.fasta test.bed -r 50000,100000,150000,2000000,1000000 -a test.agp -o test_1 --> </test> </tests> <help><![CDATA[ YaHS is scaffolding tool using Hi-C data. It relies on a new algothrim for contig joining detection which considers the topological distribution of Hi-C signals aiming to distingush real interaction signals from mapping nosies. YaHS has been tested in a wide range of genome assemblies. Compared to other Hi-C scaffolding tools, it usually generates more contiguous scaffolds - especially with a higher N90 and L90 statistics. It is also super fast - takes less than 5 minutes to reconstruct the human genome from an assembly of 5,483 contigs with ~45X Hi-C data. ]]></help> <citations> <citation type="doi">10.5281/zenodo.5848772</citation> </citations> </tool>