annotate shm_csr.xml @ 81:b6f9a640e098 draft

Uploaded
author davidvanzessen
date Fri, 19 Feb 2021 15:10:54 +0000
parents
children 729738462297
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
81
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
1 <tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.0">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
2 <description></description>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
3 <requirements>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
4 <requirement type="package" version="2.7">python</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
5 <requirement type="package" version="1.16.0">numpy</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
6 <requirement type="package" version="1.2.0">xlrd</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
7 <requirement type="package" version="3.0.0">r-ggplot2</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
8 <requirement type="package" version="1.4.3">r-reshape2</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
9 <requirement type="package" version="0.5.0">r-scales</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
10 <requirement type="package" version="3.4_5">r-seqinr</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
11 <requirement type="package" version="1.11.4">r-data.table</requirement>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
12 </requirements>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
13 <command interpreter="bash">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
14 #if str ( $filter_unique.filter_unique_select ) == "remove":
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
15 wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select $filter_unique.filter_unique_clone_count $class_filter_cond.class_filter $empty_region_filter $fast
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
16 #else:
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
17 wrapper.sh $in_file custom $out_file $out_file.files_path "${in_file.name}" "-" $functionality $unique $naive_output_cond.naive_output $naive_output_ca $naive_output_cg $naive_output_cm $naive_output_ce $naive_output_all $filter_unique.filter_unique_select 2 $class_filter_cond.class_filter $empty_region_filter $fast
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
18 #end if
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
19 </command>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
20 <inputs>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
21 <param name="in_file" type="data" format="data" label="IMGT zip file to be analysed" />
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
22 <param name="empty_region_filter" type="select" label="Sequence starts at" help="" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
23 <option value="leader" selected="true">Leader: include FR1, CDR1, FR2, CDR2, FR3 in filters</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
24 <option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
25 <option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
26 <option value="FR2">FR2: include CDR2,FR3 in filters</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
27 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
28 <param name="functionality" type="select" label="Functionality filter" help="" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
29 <option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
30 <option value="unproductive">Unproductive (Unproductive and Unproductive see comment)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
31 <option value="remove_unknown">Productive and Unproductive (Productive, Productive see comment, Unproductive, Unproductive and Unproductive see comment)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
32 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
33 <conditional name="filter_unique">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
34 <param name="filter_unique_select" type="select" label="Filter unique sequences" help="See below for an example.">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
35 <option value="remove" selected="true">Remove uniques (Based on nucleotide sequence + C)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
36 <option value="remove_vjaa">Remove uniques (Based on V+J+CDR3 (AA))</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
37 <option value="keep">Keep uniques (Based on nucleotide sequence + C)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
38 <option value="no">No</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
39 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
40 <when value="remove">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
41 <param name="filter_unique_clone_count" size="4" type="integer" label="How many sequences should be in a group to keep 1 of them" value="2" min="2"/>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
42 </when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
43 <when value="keep"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
44 <when value="no"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
45 </conditional>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
46 <param name="unique" type="select" label="Remove duplicates based on" help="" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
47 <option value="VGene,CDR3.IMGT.AA,best_match_class">Top.V.Gene, CDR3 (AA), C region</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
48 <option value="VGene,CDR3.IMGT.AA">Top.V.Gene, CDR3 (AA)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
49 <option value="CDR3.IMGT.AA,best_match_class">CDR3 (AA), C region</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
50 <option value="CDR3.IMGT.AA">CDR3 (AA)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
51
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
52 <option value="VGene,CDR3.IMGT.seq,best_match_class">Top.V.Gene, CDR3 (nt), C region</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
53 <option value="VGene,CDR3.IMGT.seq">Top.V.Gene, CDR3 (nt)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
54 <option value="CDR3.IMGT.seq,best_match_class">CDR3 (nt), C region</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
55 <option value="CDR3.IMGT.seq">CDR3 (nt)</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
56 <option value="Sequence.ID" selected="true">Don't remove duplicates</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
57 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
58 <conditional name="class_filter_cond">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
59 <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
60 <option value="70_70" selected="true">>70% class and >70% subclass</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
61 <option value="60_55">>60% class and >55% subclass</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
62 <option value="70_0">>70% class</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
63 <option value="60_0">>60% class</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
64 <option value="19_0">>19% class</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
65 <option value="101_101">Do not assign (sub)class</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
66 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
67 <when value="70_70"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
68 <when value="60_55"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
69 <when value="70_0"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
70 <when value="60_0"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
71 <when value="19_0"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
72 <when value="101_101"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
73 </conditional>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
74 <conditional name="naive_output_cond">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
75 <param name="naive_output" type="select" label="Output new IMGT archives per class into your history?">
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
76 <option value="yes">Yes</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
77 <option value="no" selected="true">No</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
78 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
79 <when value="yes"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
80 <when value="no"></when>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
81 </conditional>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
82 <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
83 <option value="yes">Yes</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
84 <option value="no" selected="true">No</option>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
85 </param>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
86 </inputs>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
87 <outputs>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
88 <data format="html" name="out_file" label = "SHM &amp; CSR on ${in_file.name}"/>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
89 <data format="imgt_archive" name="naive_output_ca" label = "Filtered IMGT IGA: ${in_file.name}" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
90 <filter>naive_output_cond['naive_output'] == "yes"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
91 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
92 </data>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
93 <data format="imgt_archive" name="naive_output_cg" label = "Filtered IMGT IGG: ${in_file.name}" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
94 <filter>naive_output_cond['naive_output'] == "yes"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
95 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
96 </data>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
97 <data format="imgt_archive" name="naive_output_cm" label = "Filtered IMGT IGM: ${in_file.name}" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
98 <filter>naive_output_cond['naive_output'] == "yes"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
99 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
100 </data>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
101 <data format="imgt_archive" name="naive_output_ce" label = "Filtered IMGT IGE: ${in_file.name}" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
102 <filter>naive_output_cond['naive_output'] == "yes"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
103 <filter>class_filter_cond['class_filter'] != "101_101"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
104 </data>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
105 <data format="imgt_archive" name="naive_output_all" label = "Filtered IMGT all: ${in_file.name}" >
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
106 <filter>naive_output_cond['naive_output'] == "yes"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
107 <filter>class_filter_cond['class_filter'] == "101_101"</filter>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
108 </data>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
109 </outputs>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
110 <tests>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
111 <test>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
112 <param name="fast" value="yes"/>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
113 <output name="out_file" file="test1.html"/>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
114 </test>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
115 </tests>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
116 <help>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
117 <![CDATA[
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
118 **References**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
119
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
120 Yaari, G. and Uduman, M. and Kleinstein, S. H. (2012). Quantifying selection in high-throughput Immunoglobulin sequencing data sets. In *Nucleic Acids Research, 40 (17), pp. e134–e134.* [`doi:10.1093/nar/gks457`_]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
121
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
122 .. _doi:10.1093/nar/gks457: http://dx.doi.org/10.1093/nar/gks457
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
123
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
124 Gupta, Namita T. and Vander Heiden, Jason A. and Uduman, Mohamed and Gadala-Maria, Daniel and Yaari, Gur and Kleinstein, Steven H. (2015). Change-O: a toolkit for analyzing large-scale B cell immunoglobulin repertoire sequencing data: Table 1. *In Bioinformatics, 31 (20), pp. 3356–3358.* [`doi:10.1093/bioinformatics/btv359`_]
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
125
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
126 .. _doi:10.1093/bioinformatics/btv359: http://dx.doi.org/10.1093/bioinformatics/btv359
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
127
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
128 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
129
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
130 **Input files**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
131
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
132 IMGT/HighV-QUEST .zip and .txz are accepted as input files. The file to be analysed can be selected using the dropdown menu.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
133
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
134 .. class:: infomark
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
135
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
136 Note: Files can be uploaded by using “get data” and “upload file” and selecting “IMGT archive“ as a file type. Special characters should be prevented in the file names of the uploaded samples as these can give errors when running the immune repertoire pipeline. Underscores are allowed in the file names.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
137
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
138 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
139
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
140 **Sequence starts at**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
141
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
142 Identifies the region which will be included in the analysis (analysed region)
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
143
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
144 - Sequences which are missing a gene region (FR1/CDR1 etc) in the analysed region are excluded.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
145 - Sequences containing an ambiguous base in the analysed region or the CDR3 are excluded.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
146 - All other filtering/analysis is based on the analysed region.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
147
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
148 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
149
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
150 **Functionality filter**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
151
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
152 Allows filtering on productive rearrangements, unproductive rearrangements or both based on the assignment provided by IMGT.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
153
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
154 **Filter unique sequences**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
155
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
156 *Remove unique:*
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
157
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
158
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
159 This filter consists of two different steps.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
160
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
161 Step 1: removes all sequences of which the nucleotide sequence in the “analysed region” and the CDR3 (see sequence starts at filter) occurs only once. (Sub)classes are not taken into account in this filter step.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
162
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
163 Step 2: removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region, the CDR3 and the same (sub)class).
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
164
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
165 .. class:: infomark
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
166
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
167 This means that sequences with the same nucleotide sequence but a different (sub)class will be included in the results of both (sub)classes.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
168
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
169 *Keep unique:*
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
170
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
171 Removes all duplicate sequences (sequences with the exact same nucleotide sequence in the analysed region and the same (sub)class).
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
172
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
173 Example of the sequences that are included using either the “remove unique filter” or the “keep unique filter”
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
174
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
175 +--------------------------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
176 | unique filter |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
177 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
178 | values | remove | keep |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
179 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
180 | A | A | A |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
181 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
182 | A | B | B |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
183 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
184 | B | D | C |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
185 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
186 | B | | D |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
187 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
188 | C | | |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
189 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
190 | D | | |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
191 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
192 | D | | |
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
193 +--------+--------+--------+
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
194
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
195 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
196
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
197 **Remove duplicates based on**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
198
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
199 Allows the selection of a single sequence per clone. Different definitions of a clone can be chosen.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
200
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
201 .. class:: infomark
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
202
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
203 Note: The first sequence (in the data set) of each clone is always included in the analysis. When the first matched sequence is unmatched (no subclass assigned) the first matched sequence will be included. This means that altering the data order (by for instance sorting) can change the sequence which is included in the analysis and therefore slightly influences the results.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
204
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
205 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
206
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
207 **Human Class/Subclass filter**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
208
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
209 .. class:: warningmark
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
210
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
211 Note: This filter should only be applied when analysing human IGH data in which a (sub)class specific sequence is present. Otherwise please select the do not assign (sub)class option to prevent errors when running the pipeline.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
212
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
213 The class percentage is based on the ‘chunk hit percentage’ (see below). The subclass percentage is based on the ‘nt hit percentage’ (see below).
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
214
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
215 The SHM & CSR pipeline identifies human Cµ, Cα, Cγ and Cε constant genes by dividing the reference sequences for the subclasses (NG_001019) in 8 nucleotide chunks which overlap by 4 nucleotides. These overlapping chunks are then individually aligned in the right order to each input sequence. This alignment is used to calculate the chunck hit percentage and the nt hit percentage.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
216
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
217 *Chunk hit percentage*: The percentage of the chunks that is aligned
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
218
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
219 *Nt hit percentage*: The percentage of chunks covering the subclass specific nucleotide match with the different subclasses. The most stringent filter for the subclass is 70% ‘nt hit percentage’ which means that 5 out of 7 subclass specific nucleotides for Cα or 6 out of 8 subclass specific nucleotides of Cγ should match with the specific subclass.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
220 The option “>25% class” can be chosen when you only are interested in the class (Cα/Cγ/Cµ/Cɛ) of your sequences and the length of your sequence is not long enough to assign the subclasses.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
221
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
222 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
223
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
224 **Output new IMGT archives per class into your history?**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
225
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
226 If yes is selected, additional output files (one for each class) will be added to the history which contain information of the sequences that passed the selected filtering criteria. These files are in the same format as the IMGT/HighV-QUEST output files and therefore are also compatible with many other analysis programs, such as the Immune repertoire pipeline.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
227
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
228 -----
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
229
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
230 **Execute**
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
231
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
232 Upon pressing execute a new analysis is added to your history (right side of the page). Initially this analysis will be grey, after initiating the analysis colour of the analysis in the history will change to yellow. When the analysis is finished it will turn green in the history. Now the analysis can be opened by clicking on the eye icon on the analysis of interest. When an analysis turns red an error has occurred when running the analysis. If you click on the analysis title additional information can be found on the analysis. In addition a bug icon appears. Here more information on the error can be found.
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
233
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
234 ]]>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
235 </help>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
236 <citations>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
237 <citation type="doi">10.1093/nar/gks457</citation>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
238 <citation type="doi">10.1093/bioinformatics/btv359</citation>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
239 </citations>
b6f9a640e098 Uploaded
davidvanzessen
parents:
diff changeset
240 </tool>