annotate make_families.xml @ 6:9a0bee12b583 draft default tip

planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
author nick
date Mon, 06 Feb 2017 23:39:11 -0500
parents 4bc49a5769ee
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
1 <?xml version="1.0"?>
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
2 <tool id="make_families" name="Du Novo: Make families" version="0.6">
2
ba2a53b970ca planemo upload commit 670b3282d2c120882b956ad617e61369467fb0fe
nick
parents: 1
diff changeset
3 <description>of duplex sequencing reads</description>
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
4 <requirements>
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
5 <requirement type="package" version="0.6">duplex</requirement>
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
6 <requirement type="set_environment">DUPLEX_DIR</requirement>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
7 </requirements>
2
ba2a53b970ca planemo upload commit 670b3282d2c120882b956ad617e61369467fb0fe
nick
parents: 1
diff changeset
8 <!-- TODO: Add dependency on coreutils to get paste? -->
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
9 <command>paste '$fastq1' '$fastq2'
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
10 | paste - - - -
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
11 | awk -f "\$DUPLEX_DIR/make-barcodes.awk" -v TAG_LEN=$taglen -v INVARIANT=$invariant
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
12 | sort
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
13 &gt; '$output'
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
14 </command>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
15 <inputs>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
16 <param name="fastq1" type="data" format="fastq" label="Sequencing reads, mate 1"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
17 <param name="fastq2" type="data" format="fastq" label="Sequencing reads, mate 2"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
18 <param name="taglen" type="integer" value="12" min="0" label="Tag length" help="length of each random barcode on the ends of the fragments"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
19 <param name="invariant" type="integer" value="5" min="0" label="Invariant sequence length" help="length of the sequence between the tag and actual sample sequence (the restriction site, normally)"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
20 </inputs>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
21 <outputs>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
22 <data name="output" format="tabular"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
23 </outputs>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
24 <tests>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
25 <test>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
26 <param name="fastq1" value="smoke_1.fq"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
27 <param name="fastq2" value="smoke_2.fq"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
28 <param name="taglen" value="5"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
29 <param name="invariant" value="1"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
30 <output name="output" file="smoke.families.tsv"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
31 </test>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
32 <test>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
33 <param name="fastq1" value="smoke_1.fq"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
34 <param name="fastq2" value="smoke_2.fq"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
35 <param name="taglen" value="5"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
36 <param name="invariant" value="0"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
37 <output name="output" file="smoke.families.i0.tsv"/>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
38 </test>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
39 </tests>
6
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
40 <citations>
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
41 <citation type="bibtex">@article{Stoler2016,
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
42 author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
43 doi = {10.1186/s13059-016-1039-4},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
44 issn = {1474-760X},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
45 journal = {Genome biology},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
46 number = {1},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
47 pages = {180},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
48 pmid = {27566673},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
49 publisher = {Genome Biology},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
50 title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
51 url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
52 volume = {17},
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
53 year = {2016}
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
54 }</citation>
9a0bee12b583 planemo upload for repository https://github.com/galaxyproject/dunovo commit b'9fbe4b8871038d2945fa4b9ee0b5f89cf4e4f494\n'-dirty
nick
parents: 5
diff changeset
55 </citations>
0
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
56 <help>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
57
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
58 **What it does**
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
59
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
60 This tool is for processing raw duplex sequencing data, removing the barcodes and grouping by them into families of reads from the same fragment.
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
61
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
62 -----
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
63
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
64 **Output**
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
65
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
66 The output will be a tabular file where each line corresponds to a pair of input reads.
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
67
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
68 The columns are::
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
69
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
70 1: barcode (both tags joined and ordered)
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
71 2: tag order in barcode ("ab" or "ba")
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
72 3: read1 name
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
73 4: read1 sequence (minus the tag and invariant sequences)
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
74 5: read1 quality scores (minus the same tag and invariant)
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
75 6: read2 name
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
76 7: read2 sequence (minus the tag and invariant sequences)
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
77 8: read2 quality scores (minus the same tag and invariant)
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
78
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
79 -----
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
80
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
81 **Barcode creation**
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
82
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
83 For each pair, the tool will remove the tag at the beginning of each read and create a barcode by concatenating the two tags. The order of the tags is determined by a string comparison so that it will make an identical barcode from pairs of either order. The original tag order will be noted in the second column.
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
84
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
85 Since pairs from opposite strands will have the same tags, but in the reverse order, this produces the same barcode for reads from the same fragment, regardless of strand. Then a simple sort will group all reads from the same strand together, separated into strands by the different "order" values.
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
86
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
87 Examples::
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
88
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
89 +---------------+-----------------+
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
90 | input tags | output |
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
91 +-------+-------+-------+---------+
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
92 | read1 | read2 | order | barcode |
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
93 +-------+-------+-------+---------+
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
94 | ATG | CCT | ab | ATGCCT |
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
95 +-------+-------+-------+---------+
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
96 | CCT | ATG | ba | ATGCCT |
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
97 +-------+-------+-------+---------+
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
98
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
99 </help>
d2e46adc199e planemo upload commit 35b743e6492923c0e2b1e5e434eaf4e56d268108
nick
parents:
diff changeset
100 </tool>