Mercurial > repos > matces > carpet_toolsuite
comparison carpet-src-1/tools/CARPET/com_uni.cpp @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cdd489d98766 |
---|---|
1 /* | |
2 * Copyright 2009 Matteo Cesaroni, Lucilla Luzi | |
3 * | |
4 * This program is free software; ; you can redistribute it and/or modify | |
5 * it under the terms of the GNU Lesser General Public License as published by | |
6 * the Free Software Foundation; either version 3 of the License, or (at your | |
7 * option) any later version. | |
8 * | |
9 * This program is distributed in the hope that it will be useful, | |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 * GNU General Public License for more details. | |
13 | |
14 */ | |
15 | |
16 #include <iostream> | |
17 #include <fstream> | |
18 #include <string> | |
19 #include <vector> | |
20 #include <algorithm> | |
21 #include <sstream> | |
22 #include <deque> | |
23 #include <map> | |
24 #include <ctime> | |
25 #include <cstdlib> | |
26 | |
27 using namespace std; | |
28 | |
29 inline void Tokenize(const string& str, | |
30 vector<string>& tokens, | |
31 const string& delimiters = " ") | |
32 { | |
33 // Skip delimiters at beginning. | |
34 string::size_type lastPos = str.find_first_not_of(delimiters, 0); | |
35 // Find first "non-delimiter". | |
36 string::size_type pos = str.find_first_of(delimiters, lastPos); | |
37 | |
38 while (string::npos != pos || string::npos != lastPos) | |
39 { | |
40 // Found a token, add it to the vector. | |
41 tokens.push_back(str.substr(lastPos, pos - lastPos)); | |
42 // Skip delimiters. Note the "not_of" | |
43 lastPos = str.find_first_not_of(delimiters, pos); | |
44 // Find next "non-delimiter" | |
45 pos = str.find_first_of(delimiters, lastPos); | |
46 } | |
47 } | |
48 | |
49 | |
50 typedef struct { | |
51 int inizioprobe; | |
52 int fineprobe; | |
53 string score; | |
54 string campo1; | |
55 string campo2; | |
56 string strand; | |
57 string index; | |
58 int file; | |
59 } Probe; | |
60 | |
61 struct Comparatore2 { | |
62 bool operator()(const Probe& s1, const Probe& s2) const { | |
63 if (s1.inizioprobe < s2.inizioprobe) { | |
64 return true; | |
65 } | |
66 else if (s1.inizioprobe == s2.inizioprobe) { | |
67 if (s1.fineprobe < s2.fineprobe) { | |
68 return true; | |
69 } | |
70 else if (s1.fineprobe == s2.fineprobe){ | |
71 return true; | |
72 } | |
73 else { | |
74 return false; | |
75 } | |
76 | |
77 } | |
78 else { | |
79 return false; | |
80 } | |
81 | |
82 } | |
83 }; | |
84 | |
85 int main (int argc, char * const argv[]) { | |
86 | |
87 string concatenate=argv[3]; | |
88 int dist_t=atoi(argv[1]); | |
89 string choice=argv[2]; | |
90 | |
91 string name_out= argv[6]; | |
92 ofstream resfile; | |
93 resfile.open (name_out.c_str()); | |
94 | |
95 | |
96 | |
97 if (concatenate=="no" && choice!="union"){ | |
98 | |
99 //print "flank=$win , type=$type col6=%overlap, concatenate=$concatenate"; | |
100 if (choice=="common"){ | |
101 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=%overlap, concatenate="<<concatenate; | |
102 } | |
103 if (choice=="unique"){ | |
104 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=score o p-value"; | |
105 } | |
106 | |
107 string line; | |
108 Probe thisprobe; | |
109 Probe thisanno; | |
110 int overlap=0; | |
111 vector<string> arraypro; | |
112 map<string, vector<Probe> > seq; | |
113 map<string, vector<Probe> > annotation; | |
114 map<string, vector<Probe> >::iterator itseq; | |
115 | |
116 ifstream seque_file(argv[4]); | |
117 while (getline(seque_file, line)) { | |
118 string s4; | |
119 s4.assign(line, 0, 1); | |
120 if (line=="" || s4=="#"){ | |
121 continue; | |
122 } | |
123 arraypro.clear(); | |
124 Tokenize(line, arraypro, "\t"); | |
125 string chr2 = (arraypro[0].c_str()); | |
126 thisprobe.inizioprobe=atoi(arraypro[3].c_str()); | |
127 thisprobe.fineprobe=atoi(arraypro[4].c_str()); | |
128 thisprobe.campo1=(arraypro[1].c_str()); | |
129 thisprobe.campo2=(arraypro[2].c_str()); | |
130 thisprobe.score=(arraypro[5].c_str()); | |
131 thisprobe.strand=(arraypro[6].c_str()); | |
132 thisprobe.index=(arraypro[8].c_str()); | |
133 seq[chr2].push_back(thisprobe); | |
134 } | |
135 | |
136 | |
137 ifstream anno_file(argv[5]); | |
138 while (getline(anno_file, line)) { | |
139 string s4; | |
140 s4.assign(line, 0, 1); | |
141 if (line=="" || s4=="#"){ | |
142 continue; | |
143 } | |
144 arraypro.clear(); | |
145 Tokenize(line, arraypro, "\t"); | |
146 string chr3= (arraypro[0].c_str()); | |
147 thisanno.inizioprobe=atoi(arraypro[3].c_str()); | |
148 thisanno.fineprobe=atoi(arraypro[4].c_str()); | |
149 thisanno.campo1=(arraypro[1].c_str()); | |
150 thisanno.campo2=(arraypro[2].c_str()); | |
151 thisanno.score=(arraypro[5].c_str()); | |
152 thisanno.strand=(arraypro[6].c_str()); | |
153 thisanno.index=(arraypro[8].c_str()); | |
154 annotation[chr3].push_back(thisanno); | |
155 } | |
156 | |
157 | |
158 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ | |
159 | |
160 vector <Probe> seq_chr = (*itseq).second; | |
161 vector <Probe> anno_chr = annotation[(*itseq).first]; | |
162 if(anno_chr.size()==0 && choice=="unique"){ | |
163 for (int i=0; i<seq_chr.size();i++){ | |
164 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
165 } | |
166 continue; | |
167 } | |
168 if(anno_chr.size()==0 && choice=="common"){ | |
169 continue; | |
170 } | |
171 sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); | |
172 sort (anno_chr.begin(),anno_chr.end(),Comparatore2()); | |
173 | |
174 int finefine=0; | |
175 | |
176 for (int i=0; i<anno_chr.size();i++){ | |
177 if(anno_chr[i].fineprobe<=finefine){ | |
178 anno_chr[i].fineprobe=finefine; | |
179 } | |
180 if(anno_chr[i].fineprobe>finefine){ | |
181 finefine=anno_chr[i].fineprobe; | |
182 } | |
183 } | |
184 | |
185 for (int i=0; i<seq_chr.size();i++){ | |
186 int start_array=0; | |
187 int fine_array=anno_chr.size(); | |
188 int pos=1; | |
189 int trovato=0; | |
190 | |
191 while (pos>0){ | |
192 pos=(fine_array-start_array)/2; | |
193 int position=start_array+pos; | |
194 | |
195 if((seq_chr[i].inizioprobe-dist_t)<anno_chr[position].inizioprobe){ | |
196 fine_array=position; | |
197 } | |
198 if((seq_chr[i].inizioprobe-dist_t)>anno_chr[position].inizioprobe){ | |
199 start_array=position; | |
200 } | |
201 if((seq_chr[i].inizioprobe-dist_t)<=anno_chr[position].fineprobe && (seq_chr[i].fineprobe+dist_t)>=anno_chr[position].inizioprobe){ | |
202 if (choice=="common"){ | |
203 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ | |
204 overlap=(seq_chr[i].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
205 } | |
206 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ | |
207 overlap=(anno_chr[position].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
208 } | |
209 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ | |
210 overlap=(anno_chr[position].fineprobe-seq_chr[i].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
211 } | |
212 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ | |
213 overlap=100; | |
214 } | |
215 if (overlap<0){ | |
216 overlap=-1; | |
217 } | |
218 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<overlap<<"\t"<<seq_chr[i].strand<<"\t.\t"<<"ValueA:"<<seq_chr[i].score<<"~"<<"ValueB:"<<anno_chr[position].score<<endl; | |
219 } | |
220 trovato=1; | |
221 break; | |
222 } | |
223 } | |
224 if (choice=="unique" && trovato==0){ | |
225 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
226 } | |
227 } | |
228 } | |
229 } | |
230 | |
231 if (concatenate=="yes" || choice == "union"){ | |
232 | |
233 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=#overlaping regions, concatenate="<<concatenate; | |
234 | |
235 string line; | |
236 Probe thisprobe; | |
237 Probe thisanno; | |
238 vector<string> arraypro; | |
239 map<string, vector<Probe> > seq; | |
240 map<string, vector<Probe> > annotation; | |
241 map<string, vector<Probe> >::iterator itseq; | |
242 string concatenate=argv[3]; | |
243 int dist_t=atoi(argv[1]); | |
244 string choice=argv[2]; | |
245 | |
246 ifstream seque_file(argv[4]); | |
247 while (getline(seque_file, line)) { | |
248 string s4; | |
249 s4.assign(line, 0, 1); | |
250 if (line=="" || s4=="#"){ | |
251 continue; | |
252 } | |
253 arraypro.clear(); | |
254 Tokenize(line, arraypro, "\t"); | |
255 string chr2 = (arraypro[0].c_str()); | |
256 thisprobe.inizioprobe=atoi(arraypro[3].c_str()); | |
257 thisprobe.fineprobe=atoi(arraypro[4].c_str()); | |
258 thisprobe.campo2=(arraypro[2].c_str()); | |
259 thisprobe.campo1=(arraypro[1].c_str()); | |
260 thisprobe.score=(arraypro[5].c_str()); | |
261 thisprobe.index=(arraypro[8].c_str()); | |
262 thisprobe.strand=(arraypro[6].c_str()); | |
263 thisprobe.file=1; | |
264 seq[chr2].push_back(thisprobe); | |
265 } | |
266 | |
267 ifstream anno_file(argv[5]); | |
268 while (getline(anno_file, line)) { | |
269 string s4; | |
270 s4.assign(line, 0, 1); | |
271 if (line=="" || s4=="#"){ | |
272 continue; | |
273 } | |
274 arraypro.clear(); | |
275 Tokenize(line, arraypro, "\t"); | |
276 string chr3= (arraypro[0].c_str()); | |
277 thisanno.inizioprobe=atoi(arraypro[3].c_str()); | |
278 thisanno.fineprobe=atoi(arraypro[4].c_str()); | |
279 thisanno.campo2=(arraypro[2].c_str()); | |
280 thisanno.campo1=(arraypro[1].c_str()); | |
281 thisanno.score=(arraypro[5].c_str()); | |
282 thisanno.index=(arraypro[8].c_str()); | |
283 thisanno.strand=(arraypro[6].c_str()); | |
284 thisanno.file=2; | |
285 seq[chr3].push_back(thisanno); | |
286 } | |
287 | |
288 int inizio; | |
289 int fine; | |
290 string annot; | |
291 int overlap; | |
292 int inizio_ann; | |
293 int fine_ann; | |
294 | |
295 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ | |
296 | |
297 vector <Probe> seq_chr = (*itseq).second; | |
298 sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); | |
299 | |
300 for (int i=0; i<seq_chr.size();i++){ | |
301 inizio = seq_chr[i].inizioprobe; | |
302 fine=seq_chr[i].fineprobe; | |
303 int file_t=0; | |
304 int file_t2=0; | |
305 int entrato=1; | |
306 int z=1; | |
307 if(seq_chr[i].file==1){ | |
308 file_t=1; | |
309 } | |
310 if(seq_chr[i].file==2){ | |
311 file_t2=1; | |
312 } | |
313 if(i==(seq_chr.size()-1)){ | |
314 if (choice=="union"){ | |
315 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
316 } | |
317 } | |
318 | |
319 //cout<<"x"<<(*itseq).first<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].file<<endl; | |
320 for (int y=i+1; y<seq_chr.size(); y++){ | |
321 if((inizio-dist_t)<=seq_chr[y].fineprobe && (fine+dist_t)>=seq_chr[y].inizioprobe){ | |
322 if(seq_chr[y].file==1){ | |
323 file_t=1; | |
324 } | |
325 if(seq_chr[y].file==2){ | |
326 file_t2=1; | |
327 } | |
328 if(seq_chr[y].fineprobe>fine){ | |
329 fine=seq_chr[y].fineprobe; | |
330 } | |
331 entrato=2; | |
332 i++; | |
333 z++; | |
334 } | |
335 if(seq_chr[y].inizioprobe>fine || y==seq_chr.size()-1){ | |
336 if (choice == "union" && entrato==1){ | |
337 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<inizio<<"\t"<<fine<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
338 } | |
339 if (choice == "union" && entrato==2){ | |
340 resfile<<(*itseq).first<<"\tcommon\tcommon\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t.\t.\tcommon"<<endl; | |
341 } | |
342 if (choice == "common" && entrato==2 && file_t == 1 && file_t2 == 1 && concatenate=="yes"){ | |
343 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
344 } | |
345 break; | |
346 } | |
347 } | |
348 } | |
349 } | |
350 } | |
351 } | |
352 |