annotate gene_fraction/src/alignment_util.h @ 0:f95150c37d38 draft default tip

planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
author chrisd
date Sun, 21 Feb 2016 23:31:55 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
1 #ifndef ALIGNMENT_UTIL_H
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
2 #define ALIGNMENT_UTIL_H
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
3
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
4 #include <string>
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
5 #include <vector>
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
6
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
7 #include <boost/algorithm/string.hpp>
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
8
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
9 #include "int_util.h"
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
10
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
11 // macro to check if read mapped
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
12 #define READ_UNMAPPED 4
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
13
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
14 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
15 * Splits alignment into separate parts
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
16 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
17 std::vector<std::string> split_alignment(std::string &alignment) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
18 std::vector<std::string> parts;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
19
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
20 boost::trim_if(alignment, boost::is_any_of("\t "));
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
21 // split on tab delimeter
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
22 boost::split(parts, alignment, boost::is_any_of("\t "), boost::token_compress_on);
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
23
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
24 return parts;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
25 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
26
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
27 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
28 * Validates bit flag
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
29 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
30 bool is_good_flag(const int &bit_flag) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
31 if( (bit_flag & READ_UNMAPPED) > 0) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
32 return true;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
33 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
34
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
35 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
36 * Validates rname
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
37 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
38 bool is_good_rname(const std::string &rname) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
39 return rname.compare("*") != 0;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
40 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
41
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
42 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
43 * Validates pos
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
44 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
45 bool is_good_pos(const int &pos) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
46 return pos > 0;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
47 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
48
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
49 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
50 * Validates cigar
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
51 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
52 bool is_good_cigar(const std::string &cigar) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
53 return cigar.compare("*") != 0;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
54 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
55
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
56 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
57 * Validates seq
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
58 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
59 bool is_good_seq(const std::string &seq) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
60 return seq.compare("*") != 0;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
61 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
62
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
63 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
64 * Validates alignment fields
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
65 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
66 bool fields_are_good(std::vector<std::string> &parts) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
67 int bit_flag = s_to_i(parts[1]);
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
68 int pos = s_to_i(parts[3]);
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
69
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
70 std::string rname = parts[2];
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
71 std::string cigar = parts[5];
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
72 std::string seq = parts[9];
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
73
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
74 if(!(is_good_flag(bit_flag))) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
75 if(!(is_good_pos(pos))) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
76 if(!(is_good_rname(rname))) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
77 if(!(is_good_cigar(cigar))) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
78 if(!(is_good_seq(seq))) return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
79
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
80 return true;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
81 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
82
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
83 /**
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
84 * Stores alignments that pass validity checks
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
85 */
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
86 bool is_good_alignment(std::string &alignment) {
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
87 std::vector<std::string> alignment_parts;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
88
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
89 alignment_parts = split_alignment(alignment);
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
90
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
91 if(!(fields_are_good(alignment_parts)))
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
92 return false;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
93 return true;
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
94 }
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
95
f95150c37d38 planemo upload for repository https://github.com/ChrisD11/Tools commit ddc95e5d6b5f2c0a5340c0bc384aa822db8856d5
chrisd
parents:
diff changeset
96 #endif /* ALIGNMENT_UTIL_H */