Mercurial > repos > iuc > salsa
comparison salsa2.xml @ 4:9a22227bb6d0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/salsa2 commit 41b8952cd0739db0b5eb62d5dd9ccc7bc0f1925f
author | iuc |
---|---|
date | Thu, 19 May 2022 14:17:24 +0000 |
parents | f77f7a7f3b83 |
children | 58a870ef434c |
comparison
equal
deleted
inserted
replaced
3:f77f7a7f3b83 | 4:9a22227bb6d0 |
---|---|
1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | 1 <tool id="salsa" name="SALSA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> |
2 <description>scaffold long read assemblies with Hi-C</description> | 2 <description>scaffold long read assemblies with Hi-C</description> |
3 <macros> | |
4 <token name="@TOOL_VERSION@">2.3</token> | |
5 <token name="@VERSION_SUFFIX@">3</token> | |
6 </macros> | |
3 <xrefs> | 7 <xrefs> |
4 <xref type="bio.tools">SALSA</xref> | 8 <xref type="bio.tools">SALSA</xref> |
5 </xrefs> | 9 </xrefs> |
6 <macros> | |
7 <token name="@TOOL_VERSION@">2.3</token> | |
8 <token name="@VERSION_SUFFIX@">2</token> | |
9 </macros> | |
10 <requirements> | 10 <requirements> |
11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement> | 11 <requirement type="package" version="@TOOL_VERSION@">salsa2</requirement> |
12 <requirement type="package" version="1.11">samtools</requirement> | 12 <requirement type="package" version="1.11">samtools</requirement> |
13 </requirements> | 13 </requirements> |
14 <command detect_errors="exit_code"><![CDATA[ | 14 <command detect_errors="exit_code"><![CDATA[ |
37 -g '$gfa_file' | 37 -g '$gfa_file' |
38 #end if | 38 #end if |
39 #if $iter: | 39 #if $iter: |
40 -i '$iter' | 40 -i '$iter' |
41 #end if | 41 #end if |
42 #if $gensize: | |
43 -s '$gensize' | |
44 #end if | |
45 -m '$clean' | |
42 -o ./out | 46 -o ./out |
43 ]]></command> | 47 ]]></command> |
44 <inputs> | 48 <inputs> |
45 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/> | 49 <param name="fasta_in" type="data" format="fasta" label="Initial assembly file" help="Headers must not contain ':'."/> |
46 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. | 50 <param name="bed_file" type="data" format="bed" label="Bed alignment" help="To start scaffolding with SALSA, reads need to be mapped to the assembly. |
47 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from | 51 BWA or BOWTIE2 are recommended. SALSA requires a bed file as the input. The alignment bam file can be converted using the bamToBed command from |
48 the Bedtools package."/> | 52 the Bedtools package."/> |
49 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/> | 53 <param name="cutoff" argument="-c" type="integer" min="1" label="Cutoff" optional="true" help="Minimum contig length to scaffold"/> |
50 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" | 54 <param name="gfa_file" argument="-g" type="data" format="gfa1,gfa2" optional="true" label="Sequence graphs" |
51 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/> | 55 help="An assembly graph can be optionally provided to guide the scaffolding, potentially reducing the scaffolding errors"/> |
52 <conditional name="enzyme_conditional"> | 56 <conditional name="enzyme_conditional"> |
53 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. | 57 <param name="enzyme_options" type="select" label="Enzyme selection" help="Hi-C experiments can use different restriction enzymes. |
54 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual | 58 The enzyme frequency in contigs is used to normalize the Hi-C interaction frequency. Note that you need to specify the actual |
55 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you | 59 sequence of the cutting site for a restriction enzyme and not the enzyme name. You can also specify DNASE as an enzyme if you |
56 use an enzyme-free prep, e.g. Omin-C."> | 60 use an enzyme-free prep, e.g. Omin-C."> |
57 <option value="preconfigured">Preconfigured restriction enzymes</option> | 61 <option value="preconfigured">Preconfigured restriction enzymes</option> |
58 <option value="specific">Enter a specific sequence</option> | 62 <option value="specific">Enter a specific sequence</option> |
59 </param> | 63 </param> |
60 <when value="preconfigured"> | 64 <when value="preconfigured"> |
69 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> | 73 help="Restriction enzyme sequence. If multiple were used, include all as a comma separated list without spaces (ex. 'GATC,AAGCTT')."> |
70 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> | 74 <validator type="expression" message="Only alphabetical letters and the comma can be used in to define restriction enzym sequences.">value.replace(',', '').isalpha()</validator> |
71 </param> | 75 </param> |
72 </when> | 76 </when> |
73 </conditional> | 77 </conditional> |
74 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" | 78 <param name="iter" argument="-i" type="integer" min="0" max="20" label="Iterations" optional="true" |
75 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will | 79 help="SALSA will scaffold through sequential iterations. The default number of iterations is 3. Increasing the number of iterations will |
76 potentially increase the number of joins, however it could also introduce additional misjoins"/> | 80 potentially increase the number of joins, however it could also introduce additional misjoins"/> |
81 <param name="clean" argument="-m" type="boolean" label="Clean Assembly" checked="false" truevalue='yes' falsevalue="no" help="Set this option to 'yes' if you want to find misassemblies in input assembly" /> | |
82 <param name="gensize" argument="-s" type="integer" label="Expected Genome Size" optional="true" help="Expected Genome size of the assembled genome. If not set, Salsa will estimate genome size." /> | |
77 </inputs> | 83 </inputs> |
78 <outputs> | 84 <outputs> |
79 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/> | 85 <data name="scaffolds_fasta" format="fasta" from_work_dir="out/scaffolds_FINAL.fasta" label="${tool.name} on ${on_string}: FASTA assembly"/> |
80 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/> | 86 <data name="scaffolds_agp" format="tabular" from_work_dir="out/scaffolds_FINAL.agp" label="${tool.name} on ${on_string}: agp output"/> |
81 </outputs> | 87 </outputs> |
85 <param name="length" value="test.fai"/> | 91 <param name="length" value="test.fai"/> |
86 <param name="bed_file" value="test.bed"/> | 92 <param name="bed_file" value="test.bed"/> |
87 <param name="gfa_file" value="test.gfa1"/> | 93 <param name="gfa_file" value="test.gfa1"/> |
88 <conditional name="enzyme_conditional"> | 94 <conditional name="enzyme_conditional"> |
89 <param name="enzyme_options" value="specific"/> | 95 <param name="enzyme_options" value="specific"/> |
90 <param name="manual_enzyme" value="GATC,GANTC"/> | 96 <param name="manual_enzyme" value="GATC,GANTC"/> |
91 </conditional> | 97 </conditional> |
92 <param name="enzyme" value="GATC,GANTC"/> | 98 <param name="enzyme" value="GATC,GANTC"/> |
93 <param name="cutoff" value="1000"/> | 99 <param name="cutoff" value="1000"/> |
94 <param name="iter" value="3"/> | 100 <param name="iter" value="3"/> |
101 <param name="clean" value="yes"/> | |
95 <output name="scaffolds_fasta" file="out.fasta"/> | 102 <output name="scaffolds_fasta" file="out.fasta"/> |
96 <output name="scaffolds_agp" file="out.agp"/> | 103 <output name="scaffolds_agp" file="out.agp"/> |
97 </test> | 104 </test> |
98 <!--Test manual enzyme--> | 105 <!--Test manual enzyme--> |
99 <test> | 106 <test> |
100 <param name="fasta_in" value="test.fasta"/> | 107 <param name="fasta_in" value="test.fasta"/> |
101 <param name="bed_file" value="test.bed"/> | 108 <param name="bed_file" value="test.bed"/> |
102 <param name="gfa_file" value="test.gfa1"/> | 109 <param name="gfa_file" value="test.gfa1"/> |
103 <conditional name="enzyme_conditional"> | 110 <conditional name="enzyme_conditional"> |
104 <param name="enzyme_options" value="specific"/> | 111 <param name="enzyme_options" value="specific"/> |
105 <param name="manual_enzyme" value="GATC,GANTC"/> | 112 <param name="manual_enzyme" value="GATC,GANTC"/> |
106 </conditional> | 113 </conditional> |
107 <param name="cutoff" value="1000"/> | 114 <param name="cutoff" value="1000"/> |
108 <param name="iter" value="3"/> | 115 <param name="iter" value="3"/> |
116 <param name="clean" value="yes"/> | |
109 <output name="scaffolds_fasta" file="out.fasta"/> | 117 <output name="scaffolds_fasta" file="out.fasta"/> |
110 <output name="scaffolds_agp" file="out.agp"/> | 118 <output name="scaffolds_agp" file="out.agp"/> |
111 </test> | 119 </test> |
112 <!--Test predefined enzyme--> | 120 <!--Test predefined enzyme--> |
113 <test> | 121 <test> |
114 <param name="fasta_in" value="test.fasta"/> | 122 <param name="fasta_in" value="test.fasta"/> |
115 <param name="bed_file" value="test.bed"/> | 123 <param name="bed_file" value="test.bed"/> |
116 <param name="gfa_file" value="test.gfa1"/> | 124 <param name="gfa_file" value="test.gfa1"/> |
117 <conditional name="enzyme_conditional"> | 125 <conditional name="enzyme_conditional"> |
118 <param name="enzyme_options" value="preconfigured"/> | 126 <param name="enzyme_options" value="preconfigured"/> |
119 <param name="preconfigured_enzymes" value="arima1"/> | 127 <param name="preconfigured_enzymes" value="arima1"/> |
120 </conditional> | 128 </conditional> |
121 <param name="cutoff" value="1000"/> | 129 <param name="cutoff" value="1000"/> |
122 <param name="iter" value="3"/> | 130 <param name="iter" value="3"/> |
131 <param name="clean" value="yes"/> | |
123 <output name="scaffolds_fasta" file="out.fasta"/> | 132 <output name="scaffolds_fasta" file="out.fasta"/> |
124 <output name="scaffolds_agp" file="out.agp"/> | 133 <output name="scaffolds_agp" file="out.agp"/> |
125 </test> | 134 </test> |
126 </tests> | 135 </tests> |
127 <help><![CDATA[ | 136 <help><![CDATA[ |
136 | 145 |
137 .. class:: infomark | 146 .. class:: infomark |
138 | 147 |
139 **Mapping reads** | 148 **Mapping reads** |
140 | 149 |
141 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ | 150 To start the scaffolding, first step is to map reads to the assembly. We recommend using `BWA <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2>`_ |
142 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires | 151 or `BOWTIE2 <https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/devteam/bowtie2/bowtie2/2.4.2+galaxy0>`_ aligner to map reads. The read mapping generates a bam file. SALSA requires |
143 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the | 152 BED file as the input. This can be done using the bamToBed command from the `Bedtools package <http://bedtools.readthedocs.io/en/latest/>`_. Also, SALSA requires BED files to be sorted by the |
144 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA. | 153 read name, rather than the alignment coordinates. Once you have bam file, you can run following commands to get the bam file needed as an input to SALSA. |
145 | 154 |
146 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess | 155 Since Hi-C reads and alignments contain experimental artifacts, the alignments needs some postprocessing. To align and postprocess |
147 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_. | 156 the alignments, you can use the pipeline released by Arima Genomics which can be found in the `GitHub repository <https://github.com/ArimaGenomics>`_. |
148 | 157 |
149 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_. | 158 Additional information on how to generate/filter the bam `here <https://github.com/marbl/SALSA#mapping-reads>`_. |
150 | 159 |
151 ]]></help> | 160 ]]></help> |