annotate hd.xml @ 29:6b15b3b6405c draft

planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
author mheinzl
date Wed, 24 Jul 2019 05:58:15 -0400
parents 15d5da04ef70
children 46bfbec0f9e6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
1 <?xml version="1.0" encoding="UTF-8"?>
29
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
2 <tool id="hd" name="HD:" version="1.0.1">
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
3 <description>hamming distance analysis of duplex tags</description>
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
4 <requirements>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
5 <requirement type="package" version="2.7">python</requirement>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
6 <requirement type="package" version="1.4.0">matplotlib</requirement>
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
7 </requirements>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
8 <command>
29
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
9 python2 '$__tool_directory__/hd.py' --inputFile '$inputFile' --inputName1 '$inputFile.name' --sample_size $sampleSize --subset_tag $subsetTag --nproc $nproc $onlyDCS $rel_freq --minFS $minFS --maxFS $maxFS
22
7e570ba56b83 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 20
diff changeset
10 $nr_above_bars --output_pdf $output_pdf --output_tabular $output_tabular --output_chimeras_tabular $output_chimeras_tabular
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
11 </command>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
12 <inputs>
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
13 <param name="inputFile" type="data" format="tabular" label="Dataset 1: input tags" optional="false" help="Input in tabular format with the family size, tag and the direction of the strand ('ab' or 'ba') for each family."/>
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
14 <param name="sampleSize" type="integer" label="number of tags in the sample" value="1000" min="0" help="specifies the number of tags in one analysis. If sample size is 0, all tags of the dataset are compared against all tags."/>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
15 <param name="minFS" type="integer" label="minimum family size of the tags" min="1" value="1" help="filters the tags after their family size: Families with a smaller size are skipped. Default: min. family size = 1."/>
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
16 <param name="maxFS" type="integer" label="max family size of the tags" min="0" value="0" help="filters the tags after their family size: Families with a larger size are skipped. If max. family size is 0, no upper bound is defined and the maximum family size in the analysis will be the maximum family size of the whole dataset. Default: max. family size = 0."/>
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
17 <param name="onlyDCS" type="boolean" label="only DCS in the analysis?" truevalue="" falsevalue="--only_DCS" checked="False" help="Only tags, which have a partner tag (ab and ba) in the dataset, are included in the analysis."/>
29
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
18 <param name="rel_freq" type="boolean" label="relative frequency?" truevalue="" falsevalue="--rel_freq" checked="False" help="If True, the relative frequencies instead of the absolute values are displayed in the plots."/>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
19 <param name="subsetTag" type="integer" label="shorten tag in the analysis?" value="0" help="By this parameter an analysis with shorter tag length is simulated. If this parameter is 0 (by default), the tags with its original length are used in the analysis."/>
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
20 <param name="nproc" type="integer" label="number of processors" value="8" help="Number of processor used for computing."/>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
21 <param name="nr_above_bars" type="boolean" label="include numbers above bars?" truevalue="--nr_above_bars" falsevalue="" checked="True" help="The absolute and relative values of the data can be included or removed from the plots. "/>
14
883e6381ba29 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 38f5c032262361131c645812dd3dc639be6a5f4e
mheinzl
parents: 13
diff changeset
22
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
23 </inputs>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
24 <outputs>
22
7e570ba56b83 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 20
diff changeset
25 <data name="output_pdf" format="pdf" />
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
26 <data name="output_tabular" format="tabular"/>
22
7e570ba56b83 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 20
diff changeset
27 <data name="output_chimeras_tabular" format="tabular"/>
7e570ba56b83 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 20
diff changeset
28
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
29 </outputs>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
30 <tests>
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
31 <test>
25
9e384b0741f1 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 24
diff changeset
32 <param name="inputFile" value="hd_data.tab"/>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
33 <param name="sampleSize" value="0"/>
25
9e384b0741f1 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 24
diff changeset
34 <output name="output_pdf" file="hd_output.pdf" lines_diff="6"/>
9e384b0741f1 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 24
diff changeset
35 <output name="output_tabular" file="hd_output.tab"/>
9e384b0741f1 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 24
diff changeset
36 <output name="output_chimeras_tabular" file="hd_output_chimeras.tab"/>
19
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
37 </test>
2e9f7ea7ae93 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit dfaab79252a858e8df16bbea3607ebf1b6962e5a-dirty
mheinzl
parents: 18
diff changeset
38 </tests>
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
39 <help> <![CDATA[
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
40 **What it does**
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
41
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
42 Tags used in Duplex Sequencing (DS) are randomized 12-mers. Since each DNA fragment is labeled by two tags at each end there are theoretically 4 to the power of (12+12) unique combinations. However, the input DNA in a typical DS experiment contains only ~1,000,000 molecules creating a large tag-to-input excess (4^24 ≫ 1,000,000). Because of such excess it is, theoretically, highly unlikely to observe distinct input DNA molecules tagged by barcodes that are highly similar to each other.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
43
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
44 This tool allows to see if there are tags highly similar to each other. It uses `Hamming distance <https://en.wikipedia.org/wiki/Hamming_distance>`_ as a measure of similarity. In this context the Hamming distance is simply the number of differences between two tags.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
45
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
46 **Input**
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
47
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
48 This tools expects a tabular file with the tags of all families, their sizes and information about forward (ab) and reverse (ba) strands::
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
49
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
50 1 AAAAAAAAAAAATGTTGGAATCTT ba
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
51 10 AAAAAAAAAAAGGCGGTCCACCCC ab
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
52 28 AAAAAAAAAAATGGTATGGACCGA ab
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
53
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
54 .. class:: infomark
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
55
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
56 **How to generate the input**
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
57
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
58 The first step of the `Du Novo Analysis Pipeline <https://doi.org/10.1186/s13059-016-1039-4>`_ is the **Make Families** tool that produces output in this form::
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
59
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
60 1 2 3 4
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
61 ------------------------------------------------------
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
62 AAAAAAAAAAAAAAATAGCTCGAT ba read1 CGCTACGTGACTGGGTCATG
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
63 AAAAAAAAAAAAAAATAGCTCGAT ba read2 CGCTACGTGACTGGGTCATG
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
64 AAAAAAAAAAAAAAATAGCTCGAT ba read3 CGCTACGTGACTGGGTCATG
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
65
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
66 we only need columns 1 and 2. These two columns can be extracted from this dataset using **Cut** tool::
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
67
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
68 1 2
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
69 ---------------------------
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
70 AAAAAAAAAAAAAAATAGCTCGAT ba
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
71 AAAAAAAAAAAAAAATAGCTCGAT ba
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
72 AAAAAAAAAAAAAAATAGCTCGAT ba
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
73
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
74 now one needs to count the number of unique occurencies of each tag. This is done using **Unique lines** tool, which would add an additional column containg counts (column 1)::
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
75
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
76
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
77 1 2 3
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
78 -----------------------------
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
79 3 AAAAAAAAAAAAAAATAGCTCGAT ba
20
b084b6a8e3ac planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit e76960d95c059a78d880ed5ecd6202f54b091025
mheinzl
parents: 19
diff changeset
80
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
81 these data can now be used in this tool.
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
82
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
83 **Output**
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
84
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
85 The output is one PDF file with the plots of the Hamming distance, a tabular file with the data of the plot for each dataset and a tabular file with the chimeric tags. The PDF file contains several panles:
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
86
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
87 1. This first page contains a graph representing the Hamming distance stratified by their family sizes.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
88 2. The second page contains the same informations as the first page but it is plotted the other way around: a family size distribution which is stratified by the Hamming distance.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
89 3. The third page contains the **first step** of the **chimera analysis**: HDs of the individual parts of the tags and their sums. First the tags are splitted into two halves (notated as a and b in the graph) and the minimum HD for part a (=HD a) is calculated. In the next step the data is subsetted by selecting only those tags that showed the minimum HD in half a. The HD of the second half is then calculated by comparing the b halves of the sample to the subset of halves from one step before and look for the maximum HD (=HD b'). Finally, the same approach is repeated but starts this time with the calculation of the minimum HD of part b (=HD b) followed by the calculation of the maximum HD of part a (=HD a') to identify all possible chimeras in the dataset.
26
15d5da04ef70 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 25
diff changeset
90 4. The fourth page contains the **second step** of the **chimera analysis**: the absolute difference between the partial HDs (=delta HD). The HD of a chimeric reads is normally very different between its halves and therefore, the difference (=absolute delta) between those HDs should be very large, which would make it possible to identify chimeras from true molecules. To get a more accurate number of chimeric tags in the later steps, the maximum difference will be selected since the calculation of the HDs of the parts was performed twice for each tag in the third step.
24
3bc67ac46740 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 23
diff changeset
91 5. The fifth page contains the **third step** of the **chimera analysis**: the relative differences of the partial HDs (=relative delta HD). Since it is not known whether the absolute difference originates due to a low and a very large HD in both halves or one half is completely identical (HD=0) to a second molecule, the relative difference is calculated by dividing the absolute difference by the HD of the whole tag (=sum of the partial HDs). The plot can be interpreted as the following:
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
92
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
93 - Low relative differences indicate that the total HD is almost equal split up into partial HDs. This case would be expected, if all tags originate from different molecules.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
94 - Higher relative differences occur either due to low total HDs and/or larger absolute differences, both things that indicate that 2 tags were originally the same tag.
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
95 - A relative difference of 1 means that one part of the tags is identical. Since it is very unlikely that by chance two different tags have a HD of 0 between one of their parts, the HDs in the other part are probably artificially introduced (chimeric reads).
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
96
29
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
97 6. The last page contains a graph representing the **HD of the chimeric tags** which is at the same time the HD of the non-identical halves of the chimeric tags with a relative difference of 1 from the previous page.
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
98
6b15b3b6405c planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit 5b3ab8c6467fe3a52e89f5a7d175bd8a0189018a-dirty
mheinzl
parents: 26
diff changeset
99 7. The last page is only generated when the parameter "only DCS in the analysis?" is **False**. The graph represents the **HD of the chimeric tags** which is at the same time the HD of the non-identical halves of the chimeric tags and indicates if they can form a DCS or not.
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
100
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
101 .. class:: infomark
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
102
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
103 **Note:**
26
15d5da04ef70 planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 25
diff changeset
104 It is possible that both halves of a chimera show a HD of 0 from two different tags. This means that their halves are identical to two different tags in one part of the tag and therefore, they show different HDs in the second part of the tag. In such a case the maximum HD will be selected in the plots but they are notated as the following example in the output file containing the chimeric tags (the identical part of the tag is marked with an asterix):
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
105
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
106 e.g. AAAAAAAAAAAT ATTCACCCTTGT
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
107
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
108 ***AAAAAAAAAAAT*** ATCATAGACTCT and AAAAAAAAAAAA ***ATTCACCCTTGT***
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
109
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
110
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
111
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
112 **About Author**
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
113
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
114 Author: Monika Heinzl
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
115
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
116 Department: Institute of Bioinformatics, Johannes Kepler University Linz, Austria
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
117
23
ffd105ac12fb planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
mheinzl
parents: 22
diff changeset
118 Contact: monika.heinzl@edumail.at
0
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
119
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
120 ]]>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
121
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
122 </help>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
123 <citations>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
124 <citation type="bibtex">
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
125 @misc{duplex,
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
126 author = {Heinzl, Monika},
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
127 year = {2018},
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
128 title = {Development of algorithms for the analysis of duplex sequencing data}
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
129 }
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
130 </citation>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
131 </citations>
239c4448a163 planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 6055f8c5c052f528ff85fb5e0d43b4500830637a
mheinzl
parents:
diff changeset
132 </tool>