Mercurial > repos > cstrittmatter > mitokmer
annotate kmer_read_m3.cpp @ 2:a0852bb4b09b draft
planemo upload commit 03463f4b0598df5619def5230de3fb758b4090ba-dirty
author | cstrittmatter |
---|---|
date | Thu, 25 Apr 2019 07:59:05 -0400 |
parents | 1472b4f4fbfe |
children |
rev | line source |
---|---|
0
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1 // newkmer_5.cpp load k-mers for all species |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
2 // test reads by SmithWaterman |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
3 // MKM 1 APR 2016 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
4 //-std=c++0x -D__NO_INLINE__ ... at end: -lz |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
5 //#include "stdafx.h" |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
6 #include <fstream> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
7 #include <stdlib.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
8 #include <stdio.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
9 #include <math.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
10 #include <iostream> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
11 #include <sstream> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
12 #include <iomanip> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
13 #include <ostream> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
14 #include <string> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
15 #include <vector> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
16 #include <unordered_set> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
17 #include <time.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
18 //#include <sys/time.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
19 #include <cstddef> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
20 #include <cstring> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
21 #include <algorithm> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
22 #include <set> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
23 #include <zlib.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
24 #include <dirent.h> |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
25 using namespace std; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
26 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
27 const int minalign = 0; //120; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
28 unordered_set<int> myset = { 0}; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
29 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
30 typedef int ptype; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
31 typedef unsigned long long ui64; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
32 typedef unsigned long long ktype; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
33 typedef unsigned int vtype; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
34 typedef unsigned short int otype; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
35 typedef unsigned long long itype; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
36 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
37 const int KSIZE = 30; //kmer size |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
38 const int MAXREP = 2048; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
39 const int REPSHIFT = 11; //(log of maxrep) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
40 const int SAVENUM = 12; //save this many reads |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
41 const itype MAXHASH = (1 << 30); //pow2 size of hashtable |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
42 const int MAXREPROBE = 16; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
43 const int GAPO = 11; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
44 const int GAPX = 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
45 const int MATCH = 5; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
46 const int MISMATCH = -4; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
47 const bool INIGAPPEN = false; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
48 const int MAXALIGN = 1024; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
49 const int beam = 8; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
50 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
51 vector<string> accession; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
52 vector<int> targno; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
53 int mcount, savepos, tct, num_orgs=0, save_target=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
54 vector<int> gcount, ucount; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
55 ofstream outread; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
56 set<ktype> kmer_seen; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
57 string fdir = ""; //directory for fasta.gz files |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
58 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
59 const char bases[4] = { 'A', 'C', 'G', 'T' }; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
60 const ui64 one = 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
61 const ui64 two = 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
62 const ui64 three = 3; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
63 const ui64 mask = (one << (KSIZE * 2)) - 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
64 const ui64 hiC = one << (KSIZE - 1) * 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
65 const ui64 hiG = two << (KSIZE - 1) * 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
66 const ui64 hiT = three << (KSIZE - 1) * 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
67 const ui64 hi1 = one << 62; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
68 const ui64 hi2 = two << 62; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
69 const ui64 hi3 = three << 62; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
70 const ui64 himask = hi1 - one; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
71 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
72 const unsigned BUFLEN = 0x4000; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
73 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
74 void error(const char* const msg) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
75 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
76 cerr << msg << "\n"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
77 exit(255); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
78 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
79 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
80 string int2str(ktype key) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
81 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
82 string sequence = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
83 for (int i = 0; i < KSIZE; ++i) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
84 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
85 sequence = bases[key & 3] + sequence; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
86 key >>= 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
87 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
88 return sequence; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
89 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
90 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
91 class Tree1 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
92 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
93 public: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
94 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
95 vector<vector<int>> children; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
96 vector<int> parent; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
97 int root; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
98 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
99 Tree1(int nt) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
100 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
101 vector<int> blank; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
102 root = 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
103 for (int i = 0; i < nt; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
104 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
105 parent.push_back(root); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
106 children.push_back(blank); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
107 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
108 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
109 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
110 ~Tree1() |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
111 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
112 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
113 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
114 void add_edge(int x, int y) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
115 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
116 parent[y] = x; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
117 children[x].push_back(y); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
118 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
119 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
120 int msca(int x, int y) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
121 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
122 //return most specific common ancestor of nodes x and y |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
123 set<int> ancestors; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
124 int z; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
125 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
126 ancestors.insert(root); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
127 z = x; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
128 //cout << x << " " << y << " " << z << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
129 while (z != root) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
130 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
131 ancestors.insert(z); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
132 z = get_parent(z); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
133 //cout << x << " " << y << " " << z << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
134 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
135 z = y; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
136 if (ancestors.find(y) != ancestors.end()) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
137 return x; //x is descendent of y |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
138 while (ancestors.find(z) == ancestors.end()) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
139 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
140 z = get_parent(z); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
141 if (z==x) //y is descendent of x |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
142 return y; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
143 //cout << x << " " << y << " " << z << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
144 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
145 return z; //common ancesctor |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
146 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
147 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
148 int get_parent(int x) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
149 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
150 if (x != root && x > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
151 return parent[x]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
152 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
153 return root; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
154 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
155 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
156 }; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
157 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
158 Tree1 *taxonomy; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
159 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
160 class Hashtable |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
161 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
162 public: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
163 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
164 itype size; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
165 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
166 struct Cell |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
167 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
168 ktype key; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
169 vtype value; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
170 int org; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
171 ptype position; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
172 bool fstrand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
173 } *pHash; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
174 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
175 Hashtable() |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
176 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
177 size = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
178 pHash = new Cell[MAXHASH]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
179 HashClear(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
180 //for (int i = 1; i<MAXREPROBE; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
181 // reprobeVal[i] = reprobeVal[i - 1] + i; //i*i/2+i/2 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
182 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
183 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
184 ~Hashtable() |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
185 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
186 delete[] pHash; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
187 size = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
188 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
189 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
190 // from code.google.com/p/smhasher/wiki/MurmurHash3 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
191 itype integerHash(ui64 k) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
192 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
193 k ^= k >> 33; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
194 k *= 0xff51afd7ed558ccd; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
195 k ^= k >> 33; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
196 k *= 0xc4ceb9fe1a85ec53; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
197 k ^= k >> 33; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
198 return (itype) k; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
199 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
200 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
201 void HashClear() |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
202 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
203 memset(pHash, 0, MAXHASH * sizeof(Cell)); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
204 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
205 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
206 int getHash(ktype key, otype &org, ptype &position, bool &fstrand) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
207 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
208 //0 target=bad |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
209 //returns target if probe present in table |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
210 //sets position and fstrand |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
211 itype index, reprobe, i, hash; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
212 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
213 position = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
214 hash = integerHash(key); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
215 reprobe = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
216 i = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
217 do |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
218 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
219 index = ((hash + reprobe) & (MAXHASH-1)); //for power of 2 size |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
220 reprobe += ++i; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
221 if (pHash[index].value == 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
222 { //empty cell |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
223 return 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
224 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
225 else if (pHash[index].key == key) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
226 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
227 position = pHash[index].position; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
228 fstrand = pHash[index].fstrand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
229 org = pHash[index].org; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
230 return (pHash[index].value); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
231 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
232 } while (reprobe < MAXHASH && i < MAXREPROBE); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
233 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
234 return 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
235 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
236 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
237 void add_kmer(ktype key, vtype target, otype org, ptype position, bool fstrand) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
238 { //add kmer to table |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
239 itype index, reprobe, i, hash; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
240 bool notdone=true; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
241 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
242 hash = integerHash(key); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
243 reprobe = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
244 i = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
245 do |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
246 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
247 index = ((hash + reprobe) & (MAXHASH-1)); //for power of 2 size |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
248 reprobe += ++i; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
249 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
250 if (pHash[index].value == 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
251 { //empty cell |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
252 notdone = false; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
253 pHash[index].key = key; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
254 pHash[index].value = target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
255 pHash[index].org = org; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
256 pHash[index].position = position; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
257 pHash[index].fstrand = fstrand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
258 if (++size > MAXHASH - 32) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
259 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
260 cout << "out of memory in table " << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
261 exit(1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
262 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
263 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
264 } while (notdone); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
265 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
266 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
267 }; //class Hashtable |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
268 Hashtable *ht; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
269 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
270 string process_seq(string seq) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
271 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
272 //format nucleotide sequence to all caps ACTG, all else is N |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
273 string seq2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
274 string::iterator it1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
275 char base; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
276 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
277 seq2 = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
278 for (it1 = seq.begin(); it1 != seq.end(); ++it1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
279 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
280 base = *it1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
281 switch (base) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
282 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
283 case 'a': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
284 seq2 += "A"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
285 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
286 case 'c': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
287 seq2 += "C"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
288 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
289 case 'g': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
290 seq2 += "G"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
291 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
292 case 't': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
293 seq2 += "T"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
294 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
295 case 'A': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
296 seq2 += "A"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
297 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
298 case 'C': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
299 seq2 += "C"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
300 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
301 case 'G': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
302 seq2 += "G"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
303 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
304 case 'T': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
305 seq2 += "T"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
306 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
307 default: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
308 seq2 += "N"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
309 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
310 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
311 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
312 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
313 return seq2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
314 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
315 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
316 string load_data3(gzFile in) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
317 { //read gzip fasta file and extract nt sequence |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
318 char buf[BUFLEN]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
319 char* offset = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
320 string line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
321 string sequence = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
322 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
323 while (true) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
324 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
325 int err, len = sizeof(buf)-(offset-buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
326 if (len == 0) error("Buffer to small for input line lengths"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
327 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
328 len = gzread(in, offset, len); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
329 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
330 if (len == 0) break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
331 if (len < 0) error(gzerror(in, &err)); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
332 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
333 char* cur = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
334 char* end = offset+len; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
335 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
336 for (char* eol; (cur<end) && (eol = find(cur, end, '\n')) < end; cur = eol + 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
337 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
338 line = string(cur, eol); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
339 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
340 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
341 if (line.length() > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
342 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
343 if (line.at(0) == '>') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
344 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
345 sequence += "N"; //contig separator |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
346 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
347 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
348 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
349 sequence += process_seq(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
350 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
351 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
352 //cout << line << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
353 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
354 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
355 // any trailing data in [eol, end) now is a partial line |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
356 offset = copy(cur, end, buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
357 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
358 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
359 // trailing data without eol |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
360 line = string(buf, offset); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
361 if (gzclose(in) != Z_OK) error("failed gzclose"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
362 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
363 return sequence; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
364 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
365 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
366 int *tableM, *tableI, *tableD; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
367 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
368 void tableinit() |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
369 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
370 int i,j; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
371 const int NINF = -2000000000; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
372 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
373 tableM[0] = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
374 tableI[0] = NINF; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
375 tableD[0] = NINF; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
376 for (i = 1; i<MAXALIGN; i++) //initialize upper row |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
377 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
378 j = (0 > i - beam - 1) ? 0 : i - beam - 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
379 tableI[j*MAXALIGN + i] = NINF; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
380 if (INIGAPPEN) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
381 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
382 tableD[j*MAXALIGN + i] = -GAPO - (i - 1)*GAPX; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
383 tableM[j*MAXALIGN + i] = -GAPO - (i - 1)*GAPX; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
384 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
385 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
386 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
387 tableD[0 + i] = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
388 tableM[0 + i] = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
389 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
390 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
391 for (j = 1; j<MAXALIGN; j++) //init left column |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
392 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
393 i = (0 > j - beam - 1) ? 0 : j - beam - 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
394 tableD[j*MAXALIGN + i] = NINF; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
395 if (INIGAPPEN) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
396 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
397 tableI[j*MAXALIGN + i] = -GAPO - (j - 1)*GAPX; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
398 tableM[j*MAXALIGN + i] = -GAPO - (j - 1)*GAPX; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
399 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
400 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
401 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
402 tableI[j*MAXALIGN + i] = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
403 tableM[j*MAXALIGN + i] = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
404 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
405 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
406 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
407 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
408 int align(string dna1, string dna2) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
409 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
410 //smith waterman alignment with beam |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
411 //returns alignment score |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
412 int b1, b2, len1, len2, tindex; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
413 int b,bb,c,cc; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
414 int i, j, maxval, startx, starty; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
415 int i1, i2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
416 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
417 len1 = dna1.length() + 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
418 len2 = dna2.length() + 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
419 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
420 j=1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
421 do |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
422 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
423 i = (j <= beam) ? j : j - beam; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
424 i2 = (j + beam > len1) ? len1 : j + beam; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
425 do |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
426 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
427 maxval = max(max(tableM[(j-1)*MAXALIGN + i-1], tableI[(j-1)*MAXALIGN + i-1]), tableD[(j-1)*MAXALIGN + i-1]); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
428 //copy/replace |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
429 if (dna1[i-1] != dna2[j-1]) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
430 maxval += MISMATCH; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
431 else maxval += MATCH; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
432 tableM[j*MAXALIGN + i] = maxval; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
433 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
434 b=tableM[(j-1)*MAXALIGN + i] - GAPO; //new gap |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
435 bb = tableI[(j-1)*MAXALIGN + i] -GAPX; //extend |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
436 tableI[j*MAXALIGN + i] = (bb<b) ? b : bb; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
437 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
438 c=tableM[j*MAXALIGN + i-1] - GAPO; //new gap |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
439 cc = tableD[j*MAXALIGN + i-1] -GAPX; //extend |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
440 tableD[j*MAXALIGN + i] = (cc<c) ? c : cc; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
441 } while(++i < i2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
442 } while(++j < len2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
443 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
444 i = len1 - 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
445 j = len2 - 1; //process last entry |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
446 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
447 maxval = max(max(tableM[j*MAXALIGN + i], tableI[j*MAXALIGN + i]), tableD[j*MAXALIGN + i]); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
448 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
449 return maxval; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
450 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
451 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
452 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
453 int process_read(string &sequence, string acc, int start, int stop) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
454 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
455 //returns id of read, 0 = no match, updates gcount, ucount |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
456 //will do align for first minalign of each target |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
457 //will save first SAVENUM reads of each target |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
458 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
459 char base; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
460 int it1, it2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
461 string kmer, sequence2, dna1, dna1r, dna2, fname, finalkeystr; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
462 ktype keyF, keyR, key; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
463 int i, cpos, minscr, gpos; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
464 bool fstrand1, fstrand2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
465 int readlength = stop - start + 1, readlen2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
466 int st2, scr, stlen2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
467 ktype target, final_targ=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
468 ptype position; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
469 otype org; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
470 int maxct, totct, rtarg=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
471 bool reject = false; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
472 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
473 cpos = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
474 keyF = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
475 keyR = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
476 gpos = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
477 minscr = 5 * sequence.length() / 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
478 for (it1 = start; it1 <= stop; ++it1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
479 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
480 base = sequence.at(it1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
481 switch (base) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
482 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
483 case 'A': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
484 keyF = (keyF << 2) & mask; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
485 keyR = (keyR >> 2) | hiT; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
486 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
487 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
488 case 'C': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
489 keyF = ((keyF << 2) & mask) | 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
490 keyR = (keyR >> 2) | hiG; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
491 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
492 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
493 case 'G': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
494 keyF = ((keyF << 2) & mask) | 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
495 keyR = (keyR >> 2) | hiC; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
496 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
497 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
498 case 'T': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
499 keyF = ((keyF << 2) & mask) | 3; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
500 keyR = keyR >> 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
501 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
502 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
503 case 'a': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
504 keyF = (keyF << 2) & mask; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
505 keyR = (keyR >> 2) | hiT; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
506 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
507 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
508 case 'c': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
509 keyF = ((keyF << 2) & mask) | 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
510 keyR = (keyR >> 2) | hiG; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
511 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
512 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
513 case 'g': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
514 keyF = ((keyF << 2) & mask) | 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
515 keyR = (keyR >> 2) | hiC; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
516 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
517 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
518 case 't': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
519 keyF = ((keyF << 2) & mask) | 3; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
520 keyR = keyR >> 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
521 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
522 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
523 default: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
524 cpos = 0; //ambiguous character |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
525 keyF = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
526 keyR = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
527 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
528 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
529 gpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
530 if (cpos == KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
531 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
532 key = keyF < keyR ? keyF : keyR; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
533 target = ht->getHash(key, org, position, fstrand2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
534 //position is ending base, 1-based |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
535 if (target > 0 && gcount[target] < minalign && target != final_targ) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
536 { //do alignment on initial reads of a species |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
537 fname = fdir + accession[org] + ".fasta.gz"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
538 sequence2 = load_data3(gzopen(fname.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
539 stlen2 = sequence2.length(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
540 fstrand1 = (keyF < keyR); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
541 readlen2 = readlength; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
542 if (fstrand1 == fstrand2) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
543 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
544 st2 = position - it1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
545 if (st2 < 0) st2 = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
546 if (st2 + readlen2 > stlen2) readlen2 = stlen2 - st2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
547 dna1 = sequence.substr(start, readlength); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
548 dna2 = sequence2.substr(st2, readlen2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
549 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
550 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
551 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
552 //reverse complement dna1 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
553 st2 = position - KSIZE + 2 + it1 - readlength; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
554 if (st2 < 0) st2 = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
555 if (st2 + readlen2 > stlen2) readlen2 = stlen2 - st2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
556 //dna1 = sequence.substr(start, readlength); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
557 dna1 = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
558 for (it2 = stop; it2 >= start; --it2) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
559 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
560 base = sequence.at(it2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
561 switch (base) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
562 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
563 case 'A': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
564 dna1.push_back('T'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
565 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
566 case 'C': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
567 dna1.push_back('G'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
568 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
569 case 'G': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
570 dna1.push_back('C'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
571 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
572 case 'T': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
573 dna1.push_back('A'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
574 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
575 default: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
576 dna1.push_back('N'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
577 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
578 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
579 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
580 dna2 = sequence2.substr(st2, readlen2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
581 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
582 scr = align(dna1, dna2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
583 if (scr < minscr) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
584 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
585 rtarg = target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
586 target = 0; //fail |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
587 reject = true; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
588 //cout << rtarg << ": failed " << scr << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
589 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
590 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
591 if (final_targ > 0 && target > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
592 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
593 final_targ = taxonomy->msca(target, final_targ); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
594 finalkeystr = int2str(key); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
595 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
596 else if (target > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
597 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
598 final_targ = target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
599 finalkeystr = int2str(key); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
600 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
601 if (target > 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
602 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
603 if (kmer_seen.find(key) == kmer_seen.end()) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
604 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
605 ucount[target]++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
606 kmer_seen.insert(key); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
607 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
608 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
609 cpos--; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
610 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
611 } //it |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
612 /* |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
613 if (final_targ > 1 && gcount[final_targ] < SAVENUM && save_target == 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
614 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
615 outread << ">" << final_targ << ":" << acc << endl << sequence.substr(start, stop-start+1) << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
616 outread << "[" << finalkeystr << "]" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
617 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
618 if (final_targ > 1 && final_targ == save_target) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
619 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
620 outread << ">" << final_targ << ":" << acc << endl << sequence.substr(start, stop-start+1) << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
621 } */ |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
622 gcount[final_targ]++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
623 tct++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
624 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
625 return final_targ; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
626 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
627 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
628 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
629 void process_kmer(string sequence, vtype target, otype org, ptype position, bool fstrand) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
630 { //adds kmer from datafile into hashtable |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
631 char base; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
632 string::iterator it1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
633 int cpos, gpos; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
634 ui64 keyF; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
635 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
636 cpos = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
637 gpos = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
638 keyF = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
639 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
640 for (it1 = sequence.begin(); it1 != sequence.end(); ++it1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
641 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
642 base = *it1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
643 switch (base) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
644 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
645 case 'A': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
646 keyF = (keyF << 2) & mask; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
647 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
648 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
649 case 'C': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
650 keyF = ((keyF << 2) & mask) | 1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
651 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
652 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
653 case 'G': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
654 keyF = ((keyF << 2) & mask) | 2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
655 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
656 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
657 case 'T': |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
658 keyF = ((keyF << 2) & mask) | 3; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
659 cpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
660 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
661 default: |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
662 cpos = 0; //ambiguous character |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
663 keyF = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
664 break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
665 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
666 if (cpos == KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
667 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
668 ht->add_kmer(keyF, target, org, position, fstrand); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
669 cpos--; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
670 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
671 gpos++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
672 } //it |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
673 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
674 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
675 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
676 void process_qual(string acc, string &seq, string &qual) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
677 { //uses PHRED 33 scores |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
678 //to trim read and pass it to process_read |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
679 const int cutoff_qual = 17; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
680 const char cutoff_char = 32 + cutoff_qual; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
681 int start, stop, target, i; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
682 int window_size = 4; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
683 int window_val, window_cut = cutoff_qual * window_size; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
684 string trim_seq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
685 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
686 stop = seq.length()-1; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
687 start = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
688 //trim all low qual bases from end |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
689 while (qual.at(start) < cutoff_char && start < stop) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
690 start++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
691 while (qual.at(stop) < cutoff_char && stop > start) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
692 stop--; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
693 //trim by window |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
694 if (start < stop - window_size) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
695 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
696 window_val = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
697 for (i=0; i<window_size; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
698 window_val += qual.at(start+i) - 32; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
699 while (window_val < window_cut && start < stop - window_size) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
700 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
701 window_val += qual.at(start+window_size) - qual.at(start); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
702 start++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
703 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
704 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
705 if (start < stop - window_size) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
706 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
707 window_val = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
708 for (i=0; i<window_size; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
709 window_val += qual.at(stop-i) - 32; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
710 while (window_val < window_cut && start < stop - window_size) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
711 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
712 window_val += qual.at(stop-window_size) - qual.at(stop); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
713 stop--; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
714 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
715 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
716 //trim_seq = seq.substr(start,stop-start+1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
717 if (stop - start >= KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
718 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
719 target = process_read(seq,acc,start,stop); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
720 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
721 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
722 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
723 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
724 void process_fqgz(gzFile in) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
725 { //read gzip fastq file and extract sequence and quality lines |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
726 //passes them to process_qual |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
727 char buf[BUFLEN]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
728 char* offset = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
729 string line, seq, acc; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
730 int mod4=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
731 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
732 while (true) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
733 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
734 int err, len = sizeof(buf)-(offset-buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
735 if (len == 0) error("Buffer to small for input line lengths"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
736 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
737 len = gzread(in, offset, len); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
738 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
739 if (len == 0) break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
740 if (len < 0) error(gzerror(in, &err)); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
741 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
742 char* cur = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
743 char* end = offset+len; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
744 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
745 for (char* eol; (cur<end) && (eol = find(cur, end, '\n')) < end; cur = eol + 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
746 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
747 line = string(cur, eol); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
748 if (line.back() == '\r') //windows |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
749 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
750 if (line.length() > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
751 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
752 if (mod4 == 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
753 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
754 seq = line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
755 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
756 else if (mod4 == 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
757 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
758 acc = line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
759 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
760 else if (mod4 == 3) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
761 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
762 process_qual(acc, seq, line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
763 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
764 mod4 = (mod4 + 1) % 4; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
765 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
766 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
767 //cout << line << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
768 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
769 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
770 // any trailing data in [eol, end) now is a partial line |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
771 offset = copy(cur, end, buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
772 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
773 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
774 // trailing data without eol |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
775 line = string(buf, offset); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
776 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
777 if (gzclose(in) != Z_OK) error("failed gzclose"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
778 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
779 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
780 void process_fagz(gzFile in) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
781 { //read gzip fasta file and extract sequence |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
782 //passes read to process_read |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
783 char buf[BUFLEN]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
784 char* offset = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
785 string line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
786 string sequence = "", acc = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
787 int c1, c2, target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
788 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
789 cout << "true" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
790 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
791 while (true) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
792 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
793 int err, len = sizeof(buf)-(offset-buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
794 if (len == 0) error("Buffer to small for input line lengths"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
795 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
796 len = gzread(in, offset, len); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
797 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
798 if (len == 0) break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
799 if (len < 0) error(gzerror(in, &err)); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
800 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
801 char* cur = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
802 char* end = offset+len; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
803 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
804 for (char* eol; (cur<end) && (eol = find(cur, end, '\n')) < end; cur = eol + 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
805 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
806 line = string(cur, eol); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
807 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
808 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
809 if (line.length() > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
810 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
811 if (line.at(0) == '>') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
812 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
813 if (sequence.length() > KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
814 { // process previous sequence |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
815 target = process_read(sequence, acc, 0, sequence.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
816 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
817 sequence = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
818 acc = line.substr(1,line.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
819 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
820 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
821 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
822 sequence += line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
823 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
824 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
825 //cout << line << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
826 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
827 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
828 // any trailing data in [eol, end) now is a partial line |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
829 offset = copy(cur, end, buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
830 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
831 if (sequence.length() > KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
832 { //last one |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
833 target = process_read(sequence, acc, 0, sequence.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
834 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
835 // trailing data without eol |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
836 line = string(buf, offset); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
837 if (gzclose(in) != Z_OK) error("failed gzclose"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
838 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
839 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
840 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
841 int process_gzkmers(gzFile in) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
842 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
843 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
844 char buf[BUFLEN]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
845 char* offset = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
846 string line; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
847 int target, count, org, position, tct=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
848 string sequence; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
849 char strand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
850 bool fstrand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
851 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
852 while (true) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
853 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
854 int err, len = sizeof(buf)-(offset-buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
855 if (len == 0) error("Buffer to small for input line lengths"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
856 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
857 len = gzread(in, offset, len); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
858 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
859 if (len == 0) break; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
860 if (len < 0) error(gzerror(in, &err)); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
861 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
862 char* cur = buf; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
863 char* end = offset+len; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
864 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
865 for (char* eol; (cur<end) && (eol = find(cur, end, '\n')) < end; cur = eol + 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
866 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
867 line = string(cur, eol); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
868 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
869 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
870 if (line.length() > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
871 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
872 replace(line.begin(), line.end(), ',', ' '); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
873 istringstream ss(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
874 if (ss >> sequence >> target >> org >> position >> strand >> count) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
875 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
876 fstrand = (strand == 'F'); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
877 process_kmer(sequence, target, org, position, fstrand); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
878 tct++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
879 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
880 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
881 //cout << line << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
882 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
883 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
884 // any trailing data in [eol, end) now is a partial line |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
885 offset = copy(cur, end, buf); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
886 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
887 // trailing data without eol |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
888 line = string(buf, offset); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
889 if (gzclose(in) != Z_OK) error("failed gzclose"); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
890 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
891 return tct; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
892 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
893 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
894 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
895 void process_fq(string rname) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
896 { //read fastq file and extract sequence and quality lines |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
897 //passes them to process_qual |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
898 string line, seq, acc= "", lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
899 ifstream fin; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
900 int target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
901 int mod4=0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
902 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
903 fin.open(rname); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
904 if (fin) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
905 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
906 while (getline(fin, line)) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
907 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
908 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
909 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
910 stringstream linestream(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
911 linestream >> lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
912 if (lseq.length() > 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
913 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
914 if (mod4 == 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
915 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
916 seq = lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
917 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
918 else if (mod4 == 0) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
919 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
920 acc = lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
921 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
922 else if (mod4 == 3) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
923 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
924 process_qual(acc, seq, lseq); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
925 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
926 mod4 = (mod4 + 1) % 4; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
927 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
928 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
929 fin.close(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
930 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
931 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
932 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
933 void process_fa(string rname) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
934 { //read unzipped fasta file and extract sequence |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
935 //passes read to process_read |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
936 string sequence = "", line, lseq, acc= ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
937 ifstream fin; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
938 int target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
939 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
940 fin.open(rname); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
941 if (fin) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
942 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
943 while (getline(fin, line)) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
944 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
945 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
946 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
947 stringstream linestream(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
948 linestream >> lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
949 if (lseq[0] == '>') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
950 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
951 if (sequence.length() > KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
952 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
953 target = process_read(sequence, acc, 0, sequence.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
954 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
955 sequence = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
956 acc = lseq.substr(1,lseq.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
957 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
958 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
959 sequence += lseq; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
960 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
961 fin.close(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
962 if (sequence.length() > KSIZE) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
963 { //last one |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
964 target = process_read(sequence, acc, 0, sequence.length()-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
965 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
966 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
967 else |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
968 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
969 cout << "nark " << rname << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
970 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
971 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
972 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
973 int main(int argc, char* argv[]) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
974 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
975 int i,j,num_jobs; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
976 vtype target; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
977 ptype position; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
978 char strand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
979 bool fstrand; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
980 string header, jstr, fname, line, r1name, r2name = "", oname1, oname2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
981 int refi, targi, strni, count, org, num_targ; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
982 ifstream fin; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
983 string genus, species, acc, sequence, lseq, remainder; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
984 vector<vector<string>> fnames; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
985 vector<string> jnames; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
986 vector<int> jcounts; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
987 vector<string>::iterator it2; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
988 string trname, s1, s2, s3,s4; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
989 string argst, wdir = "", dname = "", jname = "", jfile = "", jdir = ""; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
990 string iname = "mitochondria_data.txt"; //text file of groups for probe design |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
991 string tname = "mitochondria_tree.txt"; //edges in taxonomy tree |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
992 string pname = "mitochondria_probes.txt.gz"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
993 if (argc > 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
994 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
995 for (i = 1; i < argc; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
996 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
997 argst = argv[i]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
998 if (argst == "-wdir") |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
999 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1000 wdir = argv[i+1]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1001 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1002 if (argst == "-f1") |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1003 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1004 r1name =argv[i+1]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1005 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1006 if (argst == "-f2") |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1007 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1008 r2name =argv[i+1]; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1009 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1010 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1011 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1012 iname = wdir + iname; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1013 tname = wdir + tname; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1014 pname = wdir + pname; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1015 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1016 ht = new Hashtable(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1017 //load strain list |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1018 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1019 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1020 cout << "r1 " << r1name << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1021 cout << "r2 " << r2name << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1022 cout << "wd " << wdir << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1023 fin.open(iname); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1024 if (fin) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1025 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1026 while (getline(fin, line)) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1027 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1028 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1029 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1030 if (line.length() > 1) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1031 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1032 stringstream linestream(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1033 linestream >> targi >> acc; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1034 accession.push_back(acc); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1035 targno.push_back(targi); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1036 if (targi > num_targ) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1037 num_targ = targi; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1038 num_orgs++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1039 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1040 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1041 fin.close(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1042 cout << num_orgs << " strains" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1043 num_targ++; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1044 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1045 taxonomy = new Tree1(num_targ); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1046 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1047 //load taxonomy tree |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1048 fin.open(tname); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1049 if (fin) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1050 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1051 while (getline(fin, line)) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1052 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1053 if (line.back() == '\r') |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1054 line.pop_back(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1055 stringstream linestream(line); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1056 linestream >> i >> j; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1057 taxonomy->add_edge(i,j); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1058 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1059 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1060 else exit(1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1061 fin.close(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1062 cout << "tree loaded" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1063 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1064 //load kmer database |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1065 tct = process_gzkmers(gzopen(pname.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1066 cout << tct << " kmers loaded" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1067 if (tct < 2) exit(1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1068 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1069 //process reads |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1070 gcount.resize(num_targ,0); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1071 ucount.resize(num_targ,0); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1072 fill(gcount.begin(), gcount.end(), 0); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1073 fill(ucount.begin(), ucount.end(), 0); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1074 kmer_seen.clear(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1075 tct = 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1076 s1 = ".fastq.gz"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1077 s2 = ".fasta"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1078 s3 = ".fastq"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1079 s4 = ".fasta.gz"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1080 int slen = r1name.length(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1081 char sch = r1name.at(slen-1); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1082 cout << slen << " : " << sch << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1083 if (equal(s1.rbegin(), s1.rend(), r1name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1084 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1085 process_fqgz(gzopen(r1name.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1086 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1087 else if (equal(s2.rbegin(), s2.rend(), r1name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1088 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1089 process_fa(r1name.c_str()); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1090 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1091 else if (equal(s3.rbegin(), s3.rend(), r1name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1092 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1093 process_fq(r1name.c_str()); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1094 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1095 else if (equal(s4.rbegin(), s4.rend(), r1name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1096 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1097 process_fagz(gzopen(r1name.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1098 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1099 cout << tct << " reads loaded" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1100 if (r2name.length() > 1 && r2name != "none") |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1101 { //second file? |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1102 if (equal(s1.rbegin(), s1.rend(), r2name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1103 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1104 process_fqgz(gzopen(r2name.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1105 cout << tct << " reads loaded" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1106 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1107 else if (equal(s2.rbegin(), s2.rend(), r2name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1108 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1109 process_fa(r2name.c_str()); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1110 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1111 else if (equal(s3.rbegin(), s3.rend(), r2name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1112 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1113 process_fq(r2name.c_str()); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1114 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1115 else if (equal(s4.rbegin(), s4.rend(), r2name.rbegin())) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1116 { |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1117 process_fagz(gzopen(r2name.c_str(), "rb")); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1118 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1119 cout << tct << " reads loaded" << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1120 } |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1121 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1122 oname2 = wdir + "result.txt"; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1123 ofstream out2(oname2); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1124 for (i=0; i<num_targ; i++) |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1125 out2 << i << "," << gcount[i] << "," << ucount[i] << endl; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1126 out2.close(); |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1127 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1128 delete taxonomy; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1129 delete ht; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1130 |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1131 return 0; |
1472b4f4fbfe
planemo upload commit a4aeec0f52695b1cbda85ed1faf85d66ba897ab2
cstrittmatter
parents:
diff
changeset
|
1132 } |