annotate je-demultiplex.xml @ 7:8f16495dc5f2 draft

planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
author gbcs-embl-heidelberg
date Mon, 05 Mar 2018 07:12:52 -0500
parents 222819c87d90
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
8930b411a9d7 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit dd9e62bdb01d1252a90ce778103ce9b6b4a8cd52-dirty
gbcs-embl-heidelberg
parents: 0
diff changeset
1 <tool id="je_demultiplex" name="Je-Demultiplex" version="@VERSION_STRING@">
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
2 <description>demultiplexes fastq files</description>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
3 <macros>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
4 <import>macros.xml</import>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
5 </macros>
5
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
6 <expand macro="requirements" />
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
7 <stdio>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
8 <exit_code range="1:" level="fatal" description="Tool exception" />
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
9 </stdio>
5
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
10 <expand macro="version_command" />
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
11 <command>
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
12 <![CDATA[
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
13 je demultiplex
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
14
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
15 ## Fastq inputs
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
16 @single_or_paired_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
17 #if str( $library.type ) != "single":
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
18 @demultiplex_paired_end_cmd_options@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
19 #end if
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
20
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
21 @barcode_option_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
22 @barcode_len_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
23 C=$CLIP_BARCODE
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
24
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
25 @demultiplexer_common_options_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
26 @common_options_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
27
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
28 @demultiplexer_common_output_options_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
29 @demultiplexer_common_outputs_cmd@
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
30
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
31 ]]>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
32 </command>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
33 <configfiles>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
34 <expand macro="barcode_config_file"></expand>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
35 </configfiles>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
36 <inputs>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
37 <!-- single/paired - similar to macro 'single_or_paired_general' -->
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
38 <expand macro="single_or_paired_general">
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
39 <expand macro="demultiplex_paired_end_options"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
40 </expand>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
41
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
42 <expand macro="barcode_option"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
43 <expand macro="barcode_len_option"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
44 <expand macro="clip_barcode"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
45
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
46 <expand macro="demultiplexer_common_options"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
47
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
48 <expand macro="common_options"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
49
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
50 <expand macro="demultiplexer_common_output_options"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
51
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
52 </inputs>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
53 <outputs>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
54 <expand macro="demultiplexer_common_outputs"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
55 </outputs>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
56
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
57 <tests>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
58 <test>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
59 <!-- simple test on single end data -->
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
60 <param name="type" value="single"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
61 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
62 <param name="BARCODE_FILE" value="barcodes_SE.txt" ftype="tabular"/>
7
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
63 <output name="METRICS_FILE_NAME" file="summary_SE.txt" ftype="tabular" lines_diff="4"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
64 <output name="DEMULTIPLEX_RESULTS" ftype="tabular">
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
65 <discovered_dataset designation="unassigned_1" file="unassigned_1_SE.txt" />
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
66 </output>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
67 </test>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
68 <test>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
69 <!-- more complex test on paired end data with different barcode for fwd/rev -->
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
70 <param name="type" value="paired"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
71 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
72 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
73
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
74 <param name="BPOS" value="BOTH"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
75 <param name="BM" value="BOTH"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
76 <param name="BRED" value="false"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
77
7
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
78 <param name="COLLECT_OUTPUTS" value="false" />
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
79 <param name="barcode_list_type_con" value="text"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
80 <param name="barcode_text"
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
81 value="sample1 CACTGT:GTATAG&#10;sample2 ATTCCG:TCCGTC&#10;sample3 GCTACC:TGGTCA&#10;sample4 CGAAAC:CACTGT"/>
7
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
82 <output name="METRICS_FILE_NAME" file="summary_PE.txt" ftype="tabular" lines_diff="4"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
83 <output name="DEMULTIPLEX_RESULTS" ftype="tabular">
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
84 <discovered_dataset designation="unassigned_1" file="unassigned_1_PE.txt" />
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
85 <discovered_dataset designation="unassigned_2" file="unassigned_2_PE.txt" />
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
86 <discovered_dataset designation="sample4_CGAAACCACTGT_2" file="sample4_CGAAACCACTGT_2.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
87 <discovered_dataset designation="sample4_CGAAACCACTGT_1" file="sample4_CGAAACCACTGT_1.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
88 <discovered_dataset designation="sample3_GCTACCTGGTCA_2" file="sample3_GCTACCTGGTCA_2.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
89 <discovered_dataset designation="sample3_GCTACCTGGTCA_1" file="sample3_GCTACCTGGTCA_1.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
90 <discovered_dataset designation="sample2_ATTCCGTCCGTC_2" file="sample2_ATTCCGTCCGTC_2.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
91 <discovered_dataset designation="sample2_ATTCCGTCCGTC_1" file="sample2_ATTCCGTCCGTC_1.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
92 <discovered_dataset designation="sample1_CACTGTGTATAG_2" file="sample1_CACTGTGTATAG_2.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
93 <discovered_dataset designation="sample1_CACTGTGTATAG_1" file="sample1_CACTGTGTATAG_1.txt"/>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
94 </output>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
95 </test>
7
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
96 <test>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
97 <!-- Repeat of previous but with collection outputs -->
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
98 <param name="type" value="paired"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
99 <param name="input_1" value="file_1_sequence.txt" ftype="fastqsanger"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
100 <param name="input_2" value="file_2_sequence.txt" ftype="fastqsanger"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
101
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
102 <param name="BPOS" value="BOTH"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
103 <param name="BM" value="BOTH"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
104 <param name="BRED" value="false"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
105 <param name="barcode_list_type_con" value="text"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
106 <param name="barcode_text"
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
107 value="sample1 CACTGT:GTATAG&#10;sample2 ATTCCG:TCCGTC&#10;sample3 GCTACC:TGGTCA&#10;sample4 CGAAAC:CACTGT"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
108 <param name="COLLECT_OUTPUTS" value="true" />
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
109
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
110 <output_collection name="COLLECTION_1" type="list">
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
111 <element name="sample1_CACTGTGTATAG_1.txt" value="sample4_CGAAACCACTGT_1.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
112 <element name="sample3_GCTACCTGGTCA_1.txt" value="sample3_GCTACCTGGTCA_1.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
113 <element name="sample2_ATTCCGTCCGTC_1.txt" value="sample2_ATTCCGTCCGTC_1.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
114 <element name="sample1_CACTGTGTATAG_1.txt" value="sample1_CACTGTGTATAG_1.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
115 </output_collection>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
116 <output_collection name="COLLECTION_2" type="list">
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
117 <element name="sample4_CGAAACCACTGT_2.txt" value="sample4_CGAAACCACTGT_2.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
118 <element name="sample3_GCTACCTGGTCA_2.txt" value="sample3_GCTACCTGGTCA_2.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
119 <element name="sample2_ATTCCGTCCGTC_2.txt" value="sample2_ATTCCGTCCGTC_2.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
120 <element name="sample1_CACTGTGTATAG_2.txt" value="sample1_CACTGTGTATAG_2.txt"/>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
121 </output_collection>
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
122 </test>
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
123 </tests>
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
124
5
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
125 <help>
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
126 <![CDATA[
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
127 **What it does**
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
128
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
129 Je demultiplex: A fastq file demultiplexer with optional handling of Unique Molecular Identifiers for further use
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
130 in 'markdupes' module.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
131 Input files are fastq files, and can be in gzip compressed format.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
132
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
133 Author: Charles Girardot (charles.girardot@embl.de).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
134
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
135 Wrapper by: Jelle Scholtalbers (jelle.scholtalbers@embl.de).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
136
7
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
137 With contributions by: Mehmet Tekman (@mtekman)
8f16495dc5f2 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit e217faa15f73427979bb212036cb130a14c59750
gbcs-embl-heidelberg
parents: 5
diff changeset
138
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
139 ------
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
140
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
141 **Know what you are doing**
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
142
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
143 .. class:: warningmark
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
144
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
145 You will want to read the `documentation`__.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
146
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
147 .. __: http://gbcs.embl.de/portal/Je
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
148
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
149 ------
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
150
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
151 **Parameter list**
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
152
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
153 This is an exhaustive list of options::
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
154
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
155 FASTQ_FILE1=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
156 F1=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
157
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
158 Input fastq file (optionally gzipped) for single end data, or first read in paired end
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
159 data.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
160
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
161 Required.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
162
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
163 FASTQ_FILE2=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
164 F2=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
165
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
166 Input fastq file (optionally gzipped) for the second read of paired end data.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
167
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
168 Default value: null.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
169
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
170 BARCODE_FILE=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
171 BF=File
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
172
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
173 Barcode file describing sequence list and sample names. Tab-delimited file with 2
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
174 columns, with the sample in col1 and the corresponding barcode in col2.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
175 Simple barcode file format : 2 tab-delimited colums
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
176 If multiple barcode map to the same sample, either line can be duplicated e.g.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
177 sample1 ATAT
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
178 sample1 GAGG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
179 sample2 CCAA
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
180 sample2 TGTG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
181 Or barcodes can be combined using the OR operator '|' i.e. the file above can be
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
182 re-written like
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
183 sample1 ATAT|GAGG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
184 sample2 CCAA|TGTG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
185 Finally, for the special situation of paired-end data in which barcodes differ at both
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
186 ends (ie BPOS=BOTH BRED=false BM=BOTH , see BRED option description), barcodes for read_1
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
187 and read_2 can be distinguished using a ':' separator i.e.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
188 sample1 ATAT:GAGG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
189 sample2 CCAA:TGTG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
190 This above syntax means that sample 1 is encoded with ATAT barcode at read_1 AND GAGG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
191 barcode at read_2. Note that you can still combine barcodes using | e.g.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
192 sample1 ATAT|GAGG:CCAA|TGTG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
193 would mean that sample 1 is mapped by the combination of barcode: ATAT OR GAGG at read_1
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
194 AND CCAA OR TGTG at read_2.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
195 Extended barcode file format : 3 (single-end) or 4 (paired-end) tab-delimited colums
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
196 same as the simple barcode file format but the extra columns contains the file name(s)
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
197 to use to name output files. A unique extra column is expected for single-end while 2
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
198 extra columns are expected for paired-end. In case, lines are duplicated (multiple
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
199 barcodesmapping the same sample), the same file name should be indicated in the third
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
200 (and fourth) column(s).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
201 sample1 ATAT spl1_1.txt.gz spl1_2.txt.gz
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
202 sample1 GAGG spl1_1.txt.gz spl1_2.txt.gz
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
203 sample2 CCAA spl2_1.txt.gz spl2_2.txt.gz
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
204 Or
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
205 sample1 ATAT|GAGG:CCAA|TGTG spl1_1.txt.gz spl1_2.txt.gz
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
206 Ns in barcode sequence are allowed and are used to flag positions that should be ignored
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
207 in sample matching
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
208 i.e. they will be clipped off the read sequence (like in iCLIP protocol).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
209
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
210 Required.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
211
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
212 BARCODE_READ_POS=BarcodePosition
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
213 BPOS=BarcodePosition
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
214
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
215 For paired-end data, where to expect the barcode(s) :
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
216 READ_1 (beginning of read from FASTQ_FILE_1),
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
217 READ_2 (beginning of read from FASTQ_FILE_2),
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
218 BOTH (beginning of both reads).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
219 Automatically set to READ_1 in single end mode.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
220
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
221 Default value: BOTH. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
222 Possible values: {READ_1, READ_2, BOTH, NONE}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
223
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
224 BCLEN=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
225 LEN=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
226
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
227 Length of the barcode sequences, optional. Taken from barcode file when not given.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
228 In situations where BARCODE_READ_POS == BOTH AND REDUNDANT_BARCODES=false, two distinct
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
229 length can be provided using the syntax LEN=X:Z where X and Z are 2 integers representing
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
230 the barcode length for read_1 and read_2 respectively.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
231
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
232 Default value: null.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
233
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
234 BARCODE_FOR_SAMPLE_MATCHING=BarcodePosition
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
235 BM=BarcodePosition
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
236
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
237 Indicates which barcode(s) should be used for sample lookup
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
238 Automatically set to READ_1 in single end mode.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
239 For paired-end data and when BARCODE_READ_POS == BOTH, which barcode should be used to
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
240 resolve sample:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
241 use BM=READ_1 (beginning of read from FASTQ_FILE_1) if only this read should be used
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
242 for sample matching:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
243 use BM=READ_2 (beginning of read from FASTQ_FILE_2) if only this read should be used
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
244 for sample matching:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
245 use BM=BOTH (beginning of both reads) if both should be used.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
246
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
247 When BM=BOTH, the behaviour is different based on the value of REDUNDANT_BARCODES :
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
248 If REDUNDANT_BARCODES=true, the two barcodes are considered to map to the same sample
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
249 and 'Je demultiplex' uses the two barcodes according to the STRICT value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
250 If REDUNDANT_BARCODES=false, the barcode file should map a couple of barcode to each
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
251 sample (e.g. sample1 => AGAGTG:TTGATA) and 'Je demultiplex' needs both barcodes to find
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
252 the relevant sample. Note that this is the only situation in which all barcode matching
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
253 options (MM, MMD, Q) accept different values for both barcodes in the form X:Z where X
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
254 and Z are 2 integers.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
255
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
256 Default value: BOTH. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
257 Possible values: {READ_1, READ_2, BOTH, NONE}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
258
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
259
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
260 REDUNDANT_BARCODES=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
261 BRED=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
262
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
263 This option only applies for paired-end data with BARCODE_READ_POS set to 'BOTH'
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
264 Indicates if both read's barcodes encode redundant information or if barcodes are
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
265 supposed to be identical at both ends (or to resolve to the same sample when a pool of
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
266 barcodes is used per sample).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
267 When REDUNDANT_BARCODES=false, the 2 barcodes potentially encode
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
268 different information. For example, only one of the barcodes encodes the sample identity
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
269 while
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
270 the second barcode might be a random barcode (UMI) to tell apart PCR artefacts from real
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
271 duplicates.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
272 Another example is when both barcodes should be used in a combined fashion to resolve the
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
273 sample.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
274 In the first example, you should use BPOS=BOTH BRED=false BM=READ_1.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
275 In the second example, you should have BPOS=BOTH BRED=false BM=BOTH.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
276 Note that with BPOS=BOTH BRED=true BM=BOTH, the behavior would be different as
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
277 'demultiplex' would then check the STRICT option to perform sample resolution.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
278 Importantly, when BARCODE_READ_POS (BPOS) == BOTH AND REDUNDANT_BARCODES=false, BLEN,
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
279 barcode matching options (MM, MMD, Q) and read trimming/clipping options (XT, ZT) accept
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
280 different values for both barcodes in the form X:Z where X and Z are 2 integers.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
281
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
282 Default value: true. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
283 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
284
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
285 STRICT=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
286 S=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
287
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
288 For paired-end data and when two distinct barcodes/indices are used to encode samples,
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
289 this option tells if both barcodes should resolve to the same sample.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
290 When true and if only one of the two reads has a barcode match, the read pair is
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
291 'unassigned'.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
292 When false and if only one of the two reads has a barcode match, the read pair is
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
293 assigned to the
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
294 corresponding sample
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
295 When reads resolve to different samples, the read pair is always 'unassigned'.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
296
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
297 Default value: false. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
298 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
299
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
300 MAX_MISMATCHES=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
301 MM=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
302
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
303 Maximum mismatches for a barcode to be considered a match. In situations where both
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
304 barcodes are used for sample matching i.e. BPOS=BOTH BM=BOTH (or 2 INDEX_FILE given), two
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
305 distinct
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
306 values can be given here using the syntax MM=X:Z where X and Z are 2 integers to use for
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
307 read_1 and read_2 respectively.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
308 MM=null is like MM=0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
309
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
310 Default value: 1. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
311
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
312 MIN_MISMATCH_DELTA=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
313 MMD=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
314
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
315 Minimum difference between the number of mismatches against the best and the second best
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
316 barcode. When MMD is not respected, the read remains unassigned.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
317 When two distinct barcodes are used for sample matching (dual encoding), two distinct
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
318 values can be given using the syntax MMD=X:Z where X and Z are 2 integers to use for
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
319 first (e.g. from read_1 or index_1)
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
320 MMD=null is like MMD=0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
321
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
322 Default value: 1. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
323
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
324 MIN_BASE_QUALITY=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
325 Q=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
326
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
327 Minimum base quality during barcode matching: bases which quality is less than this
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
328 cutoff are always considered as a mismatch.When two distinct barcodes are used for sample
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
329 matching (dual encoding), two distinct values can be given using the syntax Q=X:Z where X
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
330 and Z are 2 integers to use for first (e.g. from read_1 or index_1) and second barcode
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
331 (e.g. from read_2 or index_2) respectively.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
332 Q=null is like Q=0.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
333
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
334 Default value: 10. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
335
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
336 XTRIMLEN=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
337 XT=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
338
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
339 Optional extra number of base to be trimmed right after the barcode (only used if
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
340 CLIP_BARCODE=true).
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
341 When running paired-end, two distinct values can be given using the syntax XT=X:Z where X
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
342 and Z are 2 integers to use for read_1 and read_2 respectively. Note that even when
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
343 BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode as to
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
344 end up with reads of the same length (note that this can also be operated using ZT). If a
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
345 unique value is given, e.g. XT=1, while running paired-end the following rule applies:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
346 (1) BPOS=READ_1 or BPOS=READ_2, no trim is applied at the read w/o barcode
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
347 (2) BPOS=BOTH, the value is used for both reads.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
348
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
349 Note that XT=null is like XT=0.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
350 Default value: 0. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
351
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
352 ZTRIMLEN=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
353 ZT=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
354
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
355 Optional extra number of bases to be trimmed from the read end i.e. 3' end.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
356 When running paired-end, two distinct values can be given here using the syntax ZT=X:Z
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
357 where X and Z are 2 integers to use for read_1 and read_2 respectively. Note that even
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
358 when BPOS=READ_1 or BPOS=READ_2, a X:Y synthax can be given to trim the read w/o barcode
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
359 as to end up with reads of the same length (note that this can also be operated using
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
360 XT). Note that if a single value is passed, the value always applies to both reads in
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
361 paired-end mode without further consideration.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
362 ZT=null is like ZT=0.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
363
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
364 Default value: 0. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
365
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
366 CLIP_BARCODE=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
367 C=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
368
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
369 Clip barcode sequence from read sequence, as well as XTRIMLEN (and ZTRIMLEN) bases if
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
370 applicable, before writing to output file.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
371 If false, reads are written without modification to output file.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
372 Apply to both barcodes when BPOS=BOTH.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
373
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
374 Default value: true. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
375 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
376
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
377 ADD_BARCODE_TO_HEADER=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
378 ADD=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
379
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
380 Add barcode at the end of the read header. Apply to both barcodes when BPOS=BOTH.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
381 If true, the string ':barcode' is added at the end of the read header with a ':' added
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
382 only if current read header does not end with ':'.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
383 If both reads of the pair have a barcode (i.e. BARCODE_READ_POS == BOTH), thenthe second
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
384 read also has its own matched barcode written. Else, the read without a barcode receives
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
385 the barcode from the barcoded read.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
386 For example:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
387 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
388 becomes:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
389 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:BARCODE
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
390
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
391 When barcodes containing random positions, i.e. 'N', (for example like in the iCLIP
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
392 protocol) or are UMIs, the added sequence is the sequence clipped from the read and NOT
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
393 the matched barcode.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
394
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
395 Default value: true. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
396 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
397
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
398
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
399 ENSURE_IDENTICAL_HEADER_NAMES=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
400 SAME_HEADERS=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
401
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
402 Makes sure that headers of both reads of a pair are identical, using the following read
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
403 header pattern (for both reads of a pair):
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
404 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 SAMPLEBARCODE_READ1:SAMPLEBARCODE_READ2(:CLIPPED_SEQ_FROMREAD1:CLIPPED_SEQ_FROMREAD2)
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
405 This option only makes sense in
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
406 paired end mode and ADD=true. Some (if not all) mappers will indeed complain when the
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
407 read headers are not identical. When molecular barcodes are present in reads (either as
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
408 additional barcodes or as degenerate barcodes ie with 'N') and the RCHAR is used, you
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
409 will end with (problematic) read headers like this:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
410 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:1:N:0:TAGAACAC:TGGAGTAG
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
411 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:3:N:0:TAGAACAC:CGTTGTAT
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
412 SAME_HEADERS=true will instead generates the following identical header for both reads:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
413 HISEQ:44:C6KC0ANXX:5:1101:1491:1994:TAGAACAC:TGGAGTAG:CGTTGTAT
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
414 Note that we also clipped the useless '1:N:0' and '3:N:0' has they will also result in
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
415 generating different headers.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
416 Important: this option will force RCHAR=: UNLESS you specify RCHAR=null ; in which
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
417 case a space will be preserved ie:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
418 HISEQ:44:C6KC0ANXX:5:1101:1491:1994 TAGAACAC:TGGAGTAG:CGTTGTAT
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
419
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
420 Default value: true. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
421 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
422
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
423
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
424 READ_NAME_REPLACE_CHAR=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
425 RCHAR=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
426
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
427 Replace spaces in read name/header using provided character. This is particularly handy
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
428 when you need to retain ADDed barcode in read name/header during mapping (everything
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
429 after space in read name is usually clipped in BAM files). For example, with RCHAR=':':
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
430 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965 2:N:0:
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
431 becomes
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
432 @D3FCO8P1:178:C1WLBACXX:7:1101:1836:1965:2:N:0:BARCODE
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
433 Default value: null.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
434
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
435 QUALITY_FORMAT=FastqQualityFormat
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
436 V=FastqQualityFormat
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
437
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
438 A value describing how the quality values are encoded in the fastq. Either 'Solexa' for
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
439 pre-pipeline 1.3 style scores (solexa scaling + 66), 'Illumina' for pipeline 1.3 and
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
440 above (phred scaling + 64) or 'Standard' for phred scaled scores with a character shift
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
441 of 33. If this value is not specified (or 'null' is given), the quality format will be
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
442 detected.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
443
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
444 Default value: Standard. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
445 Possible values: {Solexa, Illumina, Standard}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
446
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
447 KEEP_UNASSIGNED_READ=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
448 UN=Boolean
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
449
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
450 Should un-assigned reads be saved in files or simply ignored. File names are
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
451 automatically created or can be given using UF1 & UF2 options.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
452
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
453 Default value: true. This option can be set to 'null' to clear the default value.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
454 Possible values: {true, false}
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
455
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
456 BARCODE_DIAG_FILE=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
457 DIAG=String
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
458
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
459 Name for a barcode match reporting file (not generated by default).Either a name (in
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
460 which case the file will be created in the output dir) or full path. This file will
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
461 contain a line per read pair with the barcode best matching the read subsequence or
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
462 'null' when no match is found according to matching parameters ; and the final selected
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
463 sample. This file is useful for debugging or further processing in case both ends are
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
464 barcoded.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
465 N.B: this file will have a size of about one of the fastq input files.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
466
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
467 Default value: null.
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
468 ]]>
5
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
469 </help>
222819c87d90 planemo upload for repository https://git.embl.de/grp-gbcs/Je/tree/master/src/galaxy commit 0eefd837333dae6fbecaf4f55b053268d844eff6
gbcs-embl-heidelberg
parents: 3
diff changeset
470 <expand macro="citations"/>
0
424f44e2124e Initial upload
gbcs-embl-heidelberg
parents:
diff changeset
471 </tool>