comparison carpet-src-1/tools/CARPET/com_uni.cpp @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cdd489d98766
1 /*
2 * Copyright 2009 Matteo Cesaroni, Lucilla Luzi
3 *
4 * This program is free software; ; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation; either version 3 of the License, or (at your
7 * option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13
14 */
15
16 #include <iostream>
17 #include <fstream>
18 #include <string>
19 #include <vector>
20 #include <algorithm>
21 #include <sstream>
22 #include <deque>
23 #include <map>
24 #include <ctime>
25 #include <cstdlib>
26
27 using namespace std;
28
29 inline void Tokenize(const string& str,
30 vector<string>& tokens,
31 const string& delimiters = " ")
32 {
33 // Skip delimiters at beginning.
34 string::size_type lastPos = str.find_first_not_of(delimiters, 0);
35 // Find first "non-delimiter".
36 string::size_type pos = str.find_first_of(delimiters, lastPos);
37
38 while (string::npos != pos || string::npos != lastPos)
39 {
40 // Found a token, add it to the vector.
41 tokens.push_back(str.substr(lastPos, pos - lastPos));
42 // Skip delimiters. Note the "not_of"
43 lastPos = str.find_first_not_of(delimiters, pos);
44 // Find next "non-delimiter"
45 pos = str.find_first_of(delimiters, lastPos);
46 }
47 }
48
49
50 typedef struct {
51 int inizioprobe;
52 int fineprobe;
53 string score;
54 string campo1;
55 string campo2;
56 string strand;
57 string index;
58 int file;
59 } Probe;
60
61 struct Comparatore2 {
62 bool operator()(const Probe& s1, const Probe& s2) const {
63 if (s1.inizioprobe < s2.inizioprobe) {
64 return true;
65 }
66 else if (s1.inizioprobe == s2.inizioprobe) {
67 if (s1.fineprobe < s2.fineprobe) {
68 return true;
69 }
70 else if (s1.fineprobe == s2.fineprobe){
71 return true;
72 }
73 else {
74 return false;
75 }
76
77 }
78 else {
79 return false;
80 }
81
82 }
83 };
84
85 int main (int argc, char * const argv[]) {
86
87 string concatenate=argv[3];
88 int dist_t=atoi(argv[1]);
89 string choice=argv[2];
90
91 string name_out= argv[6];
92 ofstream resfile;
93 resfile.open (name_out.c_str());
94
95
96
97 if (concatenate=="no" && choice!="union"){
98
99 //print "flank=$win , type=$type col6=%overlap, concatenate=$concatenate";
100 if (choice=="common"){
101 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=%overlap, concatenate="<<concatenate;
102 }
103 if (choice=="unique"){
104 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=score o p-value";
105 }
106
107 string line;
108 Probe thisprobe;
109 Probe thisanno;
110 int overlap=0;
111 vector<string> arraypro;
112 map<string, vector<Probe> > seq;
113 map<string, vector<Probe> > annotation;
114 map<string, vector<Probe> >::iterator itseq;
115
116 ifstream seque_file(argv[4]);
117 while (getline(seque_file, line)) {
118 string s4;
119 s4.assign(line, 0, 1);
120 if (line=="" || s4=="#"){
121 continue;
122 }
123 arraypro.clear();
124 Tokenize(line, arraypro, "\t");
125 string chr2 = (arraypro[0].c_str());
126 thisprobe.inizioprobe=atoi(arraypro[3].c_str());
127 thisprobe.fineprobe=atoi(arraypro[4].c_str());
128 thisprobe.campo1=(arraypro[1].c_str());
129 thisprobe.campo2=(arraypro[2].c_str());
130 thisprobe.score=(arraypro[5].c_str());
131 thisprobe.strand=(arraypro[6].c_str());
132 thisprobe.index=(arraypro[8].c_str());
133 seq[chr2].push_back(thisprobe);
134 }
135
136
137 ifstream anno_file(argv[5]);
138 while (getline(anno_file, line)) {
139 string s4;
140 s4.assign(line, 0, 1);
141 if (line=="" || s4=="#"){
142 continue;
143 }
144 arraypro.clear();
145 Tokenize(line, arraypro, "\t");
146 string chr3= (arraypro[0].c_str());
147 thisanno.inizioprobe=atoi(arraypro[3].c_str());
148 thisanno.fineprobe=atoi(arraypro[4].c_str());
149 thisanno.campo1=(arraypro[1].c_str());
150 thisanno.campo2=(arraypro[2].c_str());
151 thisanno.score=(arraypro[5].c_str());
152 thisanno.strand=(arraypro[6].c_str());
153 thisanno.index=(arraypro[8].c_str());
154 annotation[chr3].push_back(thisanno);
155 }
156
157
158 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){
159
160 vector <Probe> seq_chr = (*itseq).second;
161 vector <Probe> anno_chr = annotation[(*itseq).first];
162 if(anno_chr.size()==0 && choice=="unique"){
163 for (int i=0; i<seq_chr.size();i++){
164 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl;
165 }
166 continue;
167 }
168 if(anno_chr.size()==0 && choice=="common"){
169 continue;
170 }
171 sort (seq_chr.begin(),seq_chr.end(),Comparatore2());
172 sort (anno_chr.begin(),anno_chr.end(),Comparatore2());
173
174 int finefine=0;
175
176 for (int i=0; i<anno_chr.size();i++){
177 if(anno_chr[i].fineprobe<=finefine){
178 anno_chr[i].fineprobe=finefine;
179 }
180 if(anno_chr[i].fineprobe>finefine){
181 finefine=anno_chr[i].fineprobe;
182 }
183 }
184
185 for (int i=0; i<seq_chr.size();i++){
186 int start_array=0;
187 int fine_array=anno_chr.size();
188 int pos=1;
189 int trovato=0;
190
191 while (pos>0){
192 pos=(fine_array-start_array)/2;
193 int position=start_array+pos;
194
195 if((seq_chr[i].inizioprobe-dist_t)<anno_chr[position].inizioprobe){
196 fine_array=position;
197 }
198 if((seq_chr[i].inizioprobe-dist_t)>anno_chr[position].inizioprobe){
199 start_array=position;
200 }
201 if((seq_chr[i].inizioprobe-dist_t)<=anno_chr[position].fineprobe && (seq_chr[i].fineprobe+dist_t)>=anno_chr[position].inizioprobe){
202 if (choice=="common"){
203 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){
204 overlap=(seq_chr[i].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe);
205 }
206 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){
207 overlap=(anno_chr[position].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe);
208 }
209 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){
210 overlap=(anno_chr[position].fineprobe-seq_chr[i].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe);
211 }
212 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){
213 overlap=100;
214 }
215 if (overlap<0){
216 overlap=-1;
217 }
218 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<overlap<<"\t"<<seq_chr[i].strand<<"\t.\t"<<"ValueA:"<<seq_chr[i].score<<"~"<<"ValueB:"<<anno_chr[position].score<<endl;
219 }
220 trovato=1;
221 break;
222 }
223 }
224 if (choice=="unique" && trovato==0){
225 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl;
226 }
227 }
228 }
229 }
230
231 if (concatenate=="yes" || choice == "union"){
232
233 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=#overlaping regions, concatenate="<<concatenate;
234
235 string line;
236 Probe thisprobe;
237 Probe thisanno;
238 vector<string> arraypro;
239 map<string, vector<Probe> > seq;
240 map<string, vector<Probe> > annotation;
241 map<string, vector<Probe> >::iterator itseq;
242 string concatenate=argv[3];
243 int dist_t=atoi(argv[1]);
244 string choice=argv[2];
245
246 ifstream seque_file(argv[4]);
247 while (getline(seque_file, line)) {
248 string s4;
249 s4.assign(line, 0, 1);
250 if (line=="" || s4=="#"){
251 continue;
252 }
253 arraypro.clear();
254 Tokenize(line, arraypro, "\t");
255 string chr2 = (arraypro[0].c_str());
256 thisprobe.inizioprobe=atoi(arraypro[3].c_str());
257 thisprobe.fineprobe=atoi(arraypro[4].c_str());
258 thisprobe.campo2=(arraypro[2].c_str());
259 thisprobe.campo1=(arraypro[1].c_str());
260 thisprobe.score=(arraypro[5].c_str());
261 thisprobe.index=(arraypro[8].c_str());
262 thisprobe.strand=(arraypro[6].c_str());
263 thisprobe.file=1;
264 seq[chr2].push_back(thisprobe);
265 }
266
267 ifstream anno_file(argv[5]);
268 while (getline(anno_file, line)) {
269 string s4;
270 s4.assign(line, 0, 1);
271 if (line=="" || s4=="#"){
272 continue;
273 }
274 arraypro.clear();
275 Tokenize(line, arraypro, "\t");
276 string chr3= (arraypro[0].c_str());
277 thisanno.inizioprobe=atoi(arraypro[3].c_str());
278 thisanno.fineprobe=atoi(arraypro[4].c_str());
279 thisanno.campo2=(arraypro[2].c_str());
280 thisanno.campo1=(arraypro[1].c_str());
281 thisanno.score=(arraypro[5].c_str());
282 thisanno.index=(arraypro[8].c_str());
283 thisanno.strand=(arraypro[6].c_str());
284 thisanno.file=2;
285 seq[chr3].push_back(thisanno);
286 }
287
288 int inizio;
289 int fine;
290 string annot;
291 int overlap;
292 int inizio_ann;
293 int fine_ann;
294
295 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){
296
297 vector <Probe> seq_chr = (*itseq).second;
298 sort (seq_chr.begin(),seq_chr.end(),Comparatore2());
299
300 for (int i=0; i<seq_chr.size();i++){
301 inizio = seq_chr[i].inizioprobe;
302 fine=seq_chr[i].fineprobe;
303 int file_t=0;
304 int file_t2=0;
305 int entrato=1;
306 int z=1;
307 if(seq_chr[i].file==1){
308 file_t=1;
309 }
310 if(seq_chr[i].file==2){
311 file_t2=1;
312 }
313 if(i==(seq_chr.size()-1)){
314 if (choice=="union"){
315 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl;
316 }
317 }
318
319 //cout<<"x"<<(*itseq).first<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].file<<endl;
320 for (int y=i+1; y<seq_chr.size(); y++){
321 if((inizio-dist_t)<=seq_chr[y].fineprobe && (fine+dist_t)>=seq_chr[y].inizioprobe){
322 if(seq_chr[y].file==1){
323 file_t=1;
324 }
325 if(seq_chr[y].file==2){
326 file_t2=1;
327 }
328 if(seq_chr[y].fineprobe>fine){
329 fine=seq_chr[y].fineprobe;
330 }
331 entrato=2;
332 i++;
333 z++;
334 }
335 if(seq_chr[y].inizioprobe>fine || y==seq_chr.size()-1){
336 if (choice == "union" && entrato==1){
337 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<inizio<<"\t"<<fine<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl;
338 }
339 if (choice == "union" && entrato==2){
340 resfile<<(*itseq).first<<"\tcommon\tcommon\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t.\t.\tcommon"<<endl;
341 }
342 if (choice == "common" && entrato==2 && file_t == 1 && file_t2 == 1 && concatenate=="yes"){
343 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl;
344 }
345 break;
346 }
347 }
348 }
349 }
350 }
351 }
352