comparison mixcr_analyze.xml @ 0:d38cfb922f95 draft default tip

"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/mixcr commit b847d69ff272b194e29858c173a7343442f905b2"
author iuc
date Thu, 10 Oct 2019 18:03:22 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d38cfb922f95
1 <tool id="mixcr_analyze" name="MiXCR Analyze" version="@VERSION@.0">
2 <description>immuno clonotyes from sequence data</description>
3 <macros>
4 <import>mixcr_macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 #import os.path
9 #import re
10 #def clean(name)
11 #set $base_name = $os.path.basename($name)
12 #set $name_clean = re.sub('[^\w\-_\.]', '_', $base_name)
13 #return $name_clean
14 #end def
15 #if $imgt.library_selector == 'history':
16 #set $libname = $re.sub('.gz$','',$clean($imgt.library.name))
17 ln -s -f '$imgt.library' $libname &&
18 #end if
19 #if str( $fastq_input.fastq_input_selector ) == "paired":
20 #set $fq1 = $clean($fastq_input.fastq_input1.name)
21 ln -s -f '$fastq_input.fastq_input1' $fq1 &&
22 #set $fq2 = $clean($fastq_input.fastq_input2.name)
23 ln -s -f '$fastq_input.fastq_input2' $fq2 &&
24 #else:
25 #set $fq1 = $clean($fastq_input.fastq_input1.name)
26 ln -s -f '$fastq_input.fastq_input1' $fq1 &&
27 #end if
28 mixcr analyze $analyze.pipeline --starting-material $starting_material
29 #if $analyze.pipeline == 'amplicon':
30 --5-end $analyze.primers5end
31 --3-end $analyze.primers3end
32 --adapters $analyze.adapters
33 #end if
34 #if $imgt.library_selector == 'history':
35 --align "--library $libname"
36 #set $taxonId = str($imgt.species).split(':')[0]
37 --species $taxonId
38 ## #elif $imgt.library_selector == 'cached':
39 #else
40 --species $imgt.species
41 #end if
42 $contig_assembly $impute_germline_on_export $only_productive
43 --receptor-type $receptor_type
44 #if str( $fastq_input.fastq_input_selector ) == "paired":
45 $fq1 $fq2
46 #else:
47 $fq1
48 #end if
49 mixcr_analysis
50 ]]></command>
51 <inputs>
52 <conditional name="analyze">
53 <param name="pipeline" type="select" label="amplicon or shotgun data" help="">
54 <option value="amplicon">amplicon: enriched targeted TCR/IG libraries (5’RACE, Amplicon, Multiplex, etc)</option>
55 <option value="shotgun">shotgun: non-enriched RNA-seq or non-targeted genomic data</option>
56 </param>
57 <when value="amplicon">
58 <param name="primers5end" type="select" label="5’-end of the library.">
59 <help>
60 There are two possible values:
61 no-v-primers — no V gene primers (e.g. 5’RACE with template switch oligo or a like),
62 v-primers — V gene single primer / multiple.
63 </help>
64 <option value="no-v-primers">no-v-primers</option>
65 <option value="v-primers">v-primers</option>
66 </param>
67 <param name="primers3end" type="select" label="3’-end of the library.">
68 <help>
69 There are three possible values:
70 j-primers — J gene single primer / multiplex,
71 j-c-intron-primers — J-C intron single primer / multiplex,
72 c-primers — C gene single primer / multiplex (e.g. IGHC primers specific to different immunoglobulin isotypes).
73 </help>
74 <option value="j-primers">j-primers</option>
75 <option value="j-c-intron-primers">j-c-intron-primers</option>
76 <option value="c-primers">c-primers</option>
77 </param>
78 <param name="adapters" type="select" label="Presence of PCR primers and/or adapter sequences">
79 <help>
80 If sequences of primers used for PCR or adapters are present in sequencing data,
81 it may influence the accuracy of V, J and C gene segments identification and CDR3 mapping.
82 </help>
83 <option value="adapters-present">adapters-present</option>
84 <option value="no-adapters">no-adapters</option>
85 </param>
86 </when>
87 <when value="shotgun"/>
88 </conditional>
89 <param name="starting_material" type="select" label="Type of starting material: RNA or DNA" help="">
90 <option value="rna">RNA</option>
91 <option value="dna">DNA</option>
92 </param>
93 <conditional name="fastq_input">
94 <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
95 <option value="single">single-end fastq</option>
96 <option value="paired">paired-end fastq</option>
97 </param>
98 <when value="paired">
99 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Select first set of reads" help="Specify dataset with forward reads"/>
100 <param name="fastq_input2" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2" label="Select second set of reads" help="Specify dataset with reverse reads"/>
101 </when>
102 <when value="single">
103 <param name="fastq_input1" type="data" format="fastqsanger,fastqsanger.gz,fastqsanger.bz2,fasta" label="Select sequence dataset" help="Specify dataset with single reads"/>
104 </when>
105 </conditional>
106 <conditional name="imgt">
107 <param name="library_selector" type="select" label="Library selector" help="Select between paired and single end data">
108 <option value="builtin">MiXCR builtin library</option>
109 <!-- <option value="cached">repseqio IMGT library</option> -->
110 <option value="history">history repseqio IMGT library</option>
111 </param>
112 <when value="builtin">
113 <param name="species" type="text" label="Species">
114 <option value="9606">HomoSapiens</option>
115 <option value="MusMusculus">MusMusculus</option>
116 <option value="rat">rat</option>
117 </param>
118 </when>
119 <!--
120 <when value="cached">
121 <param name="library" type="select" label="repseqio IMGT library">
122 <options from_data_table="imgt_library">
123 <column name="name" index="1"/>
124 <column name="value" index="2"/>
125 </options>
126 </param>
127 <param name="species" type="select" label="Species">
128 <options from_data_table="imgt_library">
129 <column name="name" index="3"/>
130 <column name="value" index="3"/>
131 <filter type="param_value" ref="library" column="2" />
132 <filter type="multiple_splitter" column="3" separator=","/>
133 </options>
134 </param>
135 </when>
136 -->
137 <when value="history">
138 <param name="library" type="data" format="imgt.json" label="repseqio IMGT library">
139 <help><![CDATA[
140 Data coming from IMGT server may be used for academic research only,
141 provided that it is referred to IMGT®, and cited as:MiXCR is a universal framework that processes big immunome data from raw sequences to quantitated clonotypes. MiXCR efficiently handles paired- and single-end reads, considers sequence quality, corrects PCR errors and identifies germline hypermutations. The software supports both partial- and full-length profiling and employs all available RNA or DNA information, including sequences upstream of V and downstream of J gene segments.
142
143 MiXCR is free for academic and non-profit use (see License).
144 "IMGT®, the international ImMunoGeneTics information system® http://www.imgt.org (founder and director: Marie-Paule Lefranc, Montpellier, France)."
145 ]]></help>
146 </param>
147 <param name="species" type="select" label="Species">
148 <options>
149 <filter type="data_meta" ref="library" key="taxon_names" />
150 </options>
151 </param>
152 </when>
153 </conditional>
154 <param name="contig_assembly" type="boolean" truevalue="--contig-assembly" falsevalue="" checked="false" label="Assemble full receptor sequences." help="This option may slow down the computation."/>
155 <param name="impute_germline_on_export" type="boolean" truevalue="--impute-germline-on-export" falsevalue="" checked="false" label="Use germline segments (printed with lowercase letters) for uncovered gene features"/>
156 <param name="only_productive" type="boolean" truevalue="--only-productive" falsevalue="" checked="false" label="Filter out-of-frame and stop-codons in export"/>
157 <param name="receptor_type" type="select" label="Dedicated receptor type for analysis">
158 <option value="xcr" selected="true">xcr (all T- and B-cell receptor chains are analyzed)</option>
159 <option value="tcr">tcr</option>
160 <option value="bcr">bcr</option>
161 <option value="tra">tra</option>
162 <option value="trb">trb</option>
163 <option value="trg">trg</option>
164 <option value="trd">trd</option>
165 <option value="igh">igh</option>
166 <option value="igk">igk</option>
167 <option value="igl">igl</option>
168 </param>
169 </inputs>
170 <outputs>
171 <data name="report" format="txt" label="${tool.name} on ${on_string}: report" from_work_dir="mixcr_analysis.report"/>
172 <data name="clonotypes" format="tabular" label="${tool.name} on ${on_string}: clonotypes.ALL" from_work_dir="mixcr_analysis.clonotypes.ALL.txt">
173 <actions>
174 <action name="comment_lines" type="metadata" default="1" />
175 <action name="column_names" type="metadata" default="cloneId,cloneCount,cloneFraction,targetSequences,targetQualities,allVHitsWithScore,allDHitsWithScore,allJHitsWithScore,allCHitsWithScore,allVAlignments,allDAlignments,allJAlignments,allCAlignments,nSeqFR1,minQualFR1,nSeqCDR1,minQualCDR1,nSeqFR2,minQualFR2,nSeqCDR2,minQualCDR2,nSeqFR3,minQualFR3,nSeqCDR3,minQualCDR3,nSeqFR4,minQualFR4,aaSeqFR1,aaSeqCDR1,aaSeqFR2,aaSeqCDR2,aaSeqFR3,aaSeqCDR3,aaSeqFR4,refPoints" />
176 </actions>
177 </data>
178 </outputs>
179 <tests>
180 <test>
181 <conditional name="analyze">
182 <param name="pipeline" value="shotgun"/>
183 </conditional>
184 <param name="starting_material" value="rna"/>
185 <conditional name="fastq_input">
186 <param name="fastq_input_selector" value="paired"/>
187 <param name="fastq_input1" value="sample_IGH_R1.fastq" ftype="fastqsanger"/>
188 <param name="fastq_input2" value="sample_IGH_R2.fastq" ftype="fastqsanger"/>
189 </conditional>
190 <conditional name="imgt">
191 <param name="library_selector" value="builtin"/>
192 <param name="species" value="9606"/>
193 </conditional>
194 <param name="contig_assembly" value="True"/>
195 <param name="impute_germline_on_export" value="True"/>
196 <param name="only_productive" value="False"/>
197 <param name="receptor_type" value="xcr"/>
198 <output name="report">
199 <assert_contents>
200 <has_text text="Final clonotype count" />
201 </assert_contents>
202 </output>
203 <output name="clonotypes">
204 <assert_contents>
205 <has_text text="CARDDGGGKGDYGRLW" />
206 </assert_contents>
207 </output>
208 </test>
209 <test>
210 <conditional name="analyze">
211 <param name="pipeline" value="amplicon"/>
212 <param name="primers5end" value="v-primers"/>
213 <param name="primers3end" value="j-primers"/>
214 <param name="adapters" value="no-adapters"/>
215 </conditional>
216 <param name="starting_material" value="rna"/>
217 <conditional name="fastq_input">
218 <param name="fastq_input_selector" value="paired"/>
219 <param name="fastq_input1" value="sample_IGH_R1.fastq" ftype="fastqsanger"/>
220 <param name="fastq_input2" value="sample_IGH_R2.fastq" ftype="fastqsanger"/>
221 </conditional>
222 <conditional name="imgt">
223 <param name="library_selector" value="builtin"/>
224 <param name="species" value="9606"/>
225 </conditional>
226 <param name="contig_assembly" value="True"/>
227 <param name="impute_germline_on_export" value="True"/>
228 <param name="only_productive" value="False"/>
229 <param name="receptor_type" value="xcr"/>
230 <output name="report">
231 <assert_contents>
232 <has_text text="Final clonotype count" />
233 </assert_contents>
234 </output>
235 <output name="clonotypes">
236 <assert_contents>
237 <has_text text="CARDDGGGKGDYGRLW" />
238 </assert_contents>
239 </output>
240 </test>
241
242 </tests>
243 <help><![CDATA[
244 **MiXCR** **a universal tool for fast and accurate analysis of T- and B- cell receptor repertoire sequencing data**
245
246 MiXCR_ is a universal framework that processes big immunome data from raw sequences to quantitated clonotypes. MiXCR_ efficiently handles paired- and single-end reads, considers sequence quality, corrects PCR errors and identifies germline hypermutations. The software supports both partial- and full-length profiling and employs all available RNA or DNA information, including sequences upstream of V and downstream of J gene segments.
247
248 **MiXCR is free for academic and non-profit use** (see License_).
249
250 This tool runs the MiXCR_ analyze_ pipeline.
251 Generally, there two distinct types of library preparation which correspond to the two analyze pipelines:
252
253 - analyze_ amplicon_ for analysis of targeted TCR/IG library amplification (5’RACE, Amplicon, Multiplex, etc).
254 - analyze_ shotgun_ for analysis of random fragments (RNA-Seq, Exome-Seq, etc).
255
256
257 MiXCR_ has builtin libraries for human, mouse and rat. Additional compiled IMGT_ libraries can be imported into your Galaxy history as datatype: *imgt.json* from: https://github.com/repseqio/library-imgt/releases
258
259 NOTE: The imgt.201822-5.sv4.json.gz release has the rattus genus taxonId:10114 for rat, whereas the mixcr builtin library has the rattus norvegicus species taxId:10116 for rat. If you encounter imgt library loading errors from mixcr, you may have to substitute 10116 for 10114 in the imgt.201822-5.sv4.json.gz file.
260
261 **Data coming from IMGT server may be used for academic research only**, provided that it is referred to IMGT®, and cited as "IMGT®, the international ImMunoGeneTics information system® http://www.imgt.org (founder and director: Marie-Paule Lefranc, Montpellier, France)."
262
263 .. _MiXCR: https://mixcr.readthedocs.io/en/latest/index.html
264 .. _analyze: https://mixcr.readthedocs.io/en/latest/analyze.html
265 .. _amplicon: https://mixcr.readthedocs.io/en/latest/analyze.html#analysis-of-targeted-tcr-ig-libraries
266 .. _shotgun: https://mixcr.readthedocs.io/en/latest/analyze.html#analysis-of-non-enriched-or-random-fragments
267 .. _License: https://mixcr.readthedocs.io/en/latest/license.html#license
268 .. _IMGT: https://github.com/repseqio/library-imgt/releases
269 ]]></help>
270 <expand macro="citations" />
271 </tool>