Mercurial > repos > matces > carpet_toolsuite
comparison carpet-src-1/tools/CARPET/com_uni.cpp @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
| author | matces |
|---|---|
| date | Tue, 07 Jun 2011 16:50:41 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cdd489d98766 |
|---|---|
| 1 /* | |
| 2 * Copyright 2009 Matteo Cesaroni, Lucilla Luzi | |
| 3 * | |
| 4 * This program is free software; ; you can redistribute it and/or modify | |
| 5 * it under the terms of the GNU Lesser General Public License as published by | |
| 6 * the Free Software Foundation; either version 3 of the License, or (at your | |
| 7 * option) any later version. | |
| 8 * | |
| 9 * This program is distributed in the hope that it will be useful, | |
| 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 12 * GNU General Public License for more details. | |
| 13 | |
| 14 */ | |
| 15 | |
| 16 #include <iostream> | |
| 17 #include <fstream> | |
| 18 #include <string> | |
| 19 #include <vector> | |
| 20 #include <algorithm> | |
| 21 #include <sstream> | |
| 22 #include <deque> | |
| 23 #include <map> | |
| 24 #include <ctime> | |
| 25 #include <cstdlib> | |
| 26 | |
| 27 using namespace std; | |
| 28 | |
| 29 inline void Tokenize(const string& str, | |
| 30 vector<string>& tokens, | |
| 31 const string& delimiters = " ") | |
| 32 { | |
| 33 // Skip delimiters at beginning. | |
| 34 string::size_type lastPos = str.find_first_not_of(delimiters, 0); | |
| 35 // Find first "non-delimiter". | |
| 36 string::size_type pos = str.find_first_of(delimiters, lastPos); | |
| 37 | |
| 38 while (string::npos != pos || string::npos != lastPos) | |
| 39 { | |
| 40 // Found a token, add it to the vector. | |
| 41 tokens.push_back(str.substr(lastPos, pos - lastPos)); | |
| 42 // Skip delimiters. Note the "not_of" | |
| 43 lastPos = str.find_first_not_of(delimiters, pos); | |
| 44 // Find next "non-delimiter" | |
| 45 pos = str.find_first_of(delimiters, lastPos); | |
| 46 } | |
| 47 } | |
| 48 | |
| 49 | |
| 50 typedef struct { | |
| 51 int inizioprobe; | |
| 52 int fineprobe; | |
| 53 string score; | |
| 54 string campo1; | |
| 55 string campo2; | |
| 56 string strand; | |
| 57 string index; | |
| 58 int file; | |
| 59 } Probe; | |
| 60 | |
| 61 struct Comparatore2 { | |
| 62 bool operator()(const Probe& s1, const Probe& s2) const { | |
| 63 if (s1.inizioprobe < s2.inizioprobe) { | |
| 64 return true; | |
| 65 } | |
| 66 else if (s1.inizioprobe == s2.inizioprobe) { | |
| 67 if (s1.fineprobe < s2.fineprobe) { | |
| 68 return true; | |
| 69 } | |
| 70 else if (s1.fineprobe == s2.fineprobe){ | |
| 71 return true; | |
| 72 } | |
| 73 else { | |
| 74 return false; | |
| 75 } | |
| 76 | |
| 77 } | |
| 78 else { | |
| 79 return false; | |
| 80 } | |
| 81 | |
| 82 } | |
| 83 }; | |
| 84 | |
| 85 int main (int argc, char * const argv[]) { | |
| 86 | |
| 87 string concatenate=argv[3]; | |
| 88 int dist_t=atoi(argv[1]); | |
| 89 string choice=argv[2]; | |
| 90 | |
| 91 string name_out= argv[6]; | |
| 92 ofstream resfile; | |
| 93 resfile.open (name_out.c_str()); | |
| 94 | |
| 95 | |
| 96 | |
| 97 if (concatenate=="no" && choice!="union"){ | |
| 98 | |
| 99 //print "flank=$win , type=$type col6=%overlap, concatenate=$concatenate"; | |
| 100 if (choice=="common"){ | |
| 101 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=%overlap, concatenate="<<concatenate; | |
| 102 } | |
| 103 if (choice=="unique"){ | |
| 104 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=score o p-value"; | |
| 105 } | |
| 106 | |
| 107 string line; | |
| 108 Probe thisprobe; | |
| 109 Probe thisanno; | |
| 110 int overlap=0; | |
| 111 vector<string> arraypro; | |
| 112 map<string, vector<Probe> > seq; | |
| 113 map<string, vector<Probe> > annotation; | |
| 114 map<string, vector<Probe> >::iterator itseq; | |
| 115 | |
| 116 ifstream seque_file(argv[4]); | |
| 117 while (getline(seque_file, line)) { | |
| 118 string s4; | |
| 119 s4.assign(line, 0, 1); | |
| 120 if (line=="" || s4=="#"){ | |
| 121 continue; | |
| 122 } | |
| 123 arraypro.clear(); | |
| 124 Tokenize(line, arraypro, "\t"); | |
| 125 string chr2 = (arraypro[0].c_str()); | |
| 126 thisprobe.inizioprobe=atoi(arraypro[3].c_str()); | |
| 127 thisprobe.fineprobe=atoi(arraypro[4].c_str()); | |
| 128 thisprobe.campo1=(arraypro[1].c_str()); | |
| 129 thisprobe.campo2=(arraypro[2].c_str()); | |
| 130 thisprobe.score=(arraypro[5].c_str()); | |
| 131 thisprobe.strand=(arraypro[6].c_str()); | |
| 132 thisprobe.index=(arraypro[8].c_str()); | |
| 133 seq[chr2].push_back(thisprobe); | |
| 134 } | |
| 135 | |
| 136 | |
| 137 ifstream anno_file(argv[5]); | |
| 138 while (getline(anno_file, line)) { | |
| 139 string s4; | |
| 140 s4.assign(line, 0, 1); | |
| 141 if (line=="" || s4=="#"){ | |
| 142 continue; | |
| 143 } | |
| 144 arraypro.clear(); | |
| 145 Tokenize(line, arraypro, "\t"); | |
| 146 string chr3= (arraypro[0].c_str()); | |
| 147 thisanno.inizioprobe=atoi(arraypro[3].c_str()); | |
| 148 thisanno.fineprobe=atoi(arraypro[4].c_str()); | |
| 149 thisanno.campo1=(arraypro[1].c_str()); | |
| 150 thisanno.campo2=(arraypro[2].c_str()); | |
| 151 thisanno.score=(arraypro[5].c_str()); | |
| 152 thisanno.strand=(arraypro[6].c_str()); | |
| 153 thisanno.index=(arraypro[8].c_str()); | |
| 154 annotation[chr3].push_back(thisanno); | |
| 155 } | |
| 156 | |
| 157 | |
| 158 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ | |
| 159 | |
| 160 vector <Probe> seq_chr = (*itseq).second; | |
| 161 vector <Probe> anno_chr = annotation[(*itseq).first]; | |
| 162 if(anno_chr.size()==0 && choice=="unique"){ | |
| 163 for (int i=0; i<seq_chr.size();i++){ | |
| 164 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
| 165 } | |
| 166 continue; | |
| 167 } | |
| 168 if(anno_chr.size()==0 && choice=="common"){ | |
| 169 continue; | |
| 170 } | |
| 171 sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); | |
| 172 sort (anno_chr.begin(),anno_chr.end(),Comparatore2()); | |
| 173 | |
| 174 int finefine=0; | |
| 175 | |
| 176 for (int i=0; i<anno_chr.size();i++){ | |
| 177 if(anno_chr[i].fineprobe<=finefine){ | |
| 178 anno_chr[i].fineprobe=finefine; | |
| 179 } | |
| 180 if(anno_chr[i].fineprobe>finefine){ | |
| 181 finefine=anno_chr[i].fineprobe; | |
| 182 } | |
| 183 } | |
| 184 | |
| 185 for (int i=0; i<seq_chr.size();i++){ | |
| 186 int start_array=0; | |
| 187 int fine_array=anno_chr.size(); | |
| 188 int pos=1; | |
| 189 int trovato=0; | |
| 190 | |
| 191 while (pos>0){ | |
| 192 pos=(fine_array-start_array)/2; | |
| 193 int position=start_array+pos; | |
| 194 | |
| 195 if((seq_chr[i].inizioprobe-dist_t)<anno_chr[position].inizioprobe){ | |
| 196 fine_array=position; | |
| 197 } | |
| 198 if((seq_chr[i].inizioprobe-dist_t)>anno_chr[position].inizioprobe){ | |
| 199 start_array=position; | |
| 200 } | |
| 201 if((seq_chr[i].inizioprobe-dist_t)<=anno_chr[position].fineprobe && (seq_chr[i].fineprobe+dist_t)>=anno_chr[position].inizioprobe){ | |
| 202 if (choice=="common"){ | |
| 203 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ | |
| 204 overlap=(seq_chr[i].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
| 205 } | |
| 206 if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ | |
| 207 overlap=(anno_chr[position].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
| 208 } | |
| 209 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ | |
| 210 overlap=(anno_chr[position].fineprobe-seq_chr[i].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); | |
| 211 } | |
| 212 if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ | |
| 213 overlap=100; | |
| 214 } | |
| 215 if (overlap<0){ | |
| 216 overlap=-1; | |
| 217 } | |
| 218 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<overlap<<"\t"<<seq_chr[i].strand<<"\t.\t"<<"ValueA:"<<seq_chr[i].score<<"~"<<"ValueB:"<<anno_chr[position].score<<endl; | |
| 219 } | |
| 220 trovato=1; | |
| 221 break; | |
| 222 } | |
| 223 } | |
| 224 if (choice=="unique" && trovato==0){ | |
| 225 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
| 226 } | |
| 227 } | |
| 228 } | |
| 229 } | |
| 230 | |
| 231 if (concatenate=="yes" || choice == "union"){ | |
| 232 | |
| 233 cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=#overlaping regions, concatenate="<<concatenate; | |
| 234 | |
| 235 string line; | |
| 236 Probe thisprobe; | |
| 237 Probe thisanno; | |
| 238 vector<string> arraypro; | |
| 239 map<string, vector<Probe> > seq; | |
| 240 map<string, vector<Probe> > annotation; | |
| 241 map<string, vector<Probe> >::iterator itseq; | |
| 242 string concatenate=argv[3]; | |
| 243 int dist_t=atoi(argv[1]); | |
| 244 string choice=argv[2]; | |
| 245 | |
| 246 ifstream seque_file(argv[4]); | |
| 247 while (getline(seque_file, line)) { | |
| 248 string s4; | |
| 249 s4.assign(line, 0, 1); | |
| 250 if (line=="" || s4=="#"){ | |
| 251 continue; | |
| 252 } | |
| 253 arraypro.clear(); | |
| 254 Tokenize(line, arraypro, "\t"); | |
| 255 string chr2 = (arraypro[0].c_str()); | |
| 256 thisprobe.inizioprobe=atoi(arraypro[3].c_str()); | |
| 257 thisprobe.fineprobe=atoi(arraypro[4].c_str()); | |
| 258 thisprobe.campo2=(arraypro[2].c_str()); | |
| 259 thisprobe.campo1=(arraypro[1].c_str()); | |
| 260 thisprobe.score=(arraypro[5].c_str()); | |
| 261 thisprobe.index=(arraypro[8].c_str()); | |
| 262 thisprobe.strand=(arraypro[6].c_str()); | |
| 263 thisprobe.file=1; | |
| 264 seq[chr2].push_back(thisprobe); | |
| 265 } | |
| 266 | |
| 267 ifstream anno_file(argv[5]); | |
| 268 while (getline(anno_file, line)) { | |
| 269 string s4; | |
| 270 s4.assign(line, 0, 1); | |
| 271 if (line=="" || s4=="#"){ | |
| 272 continue; | |
| 273 } | |
| 274 arraypro.clear(); | |
| 275 Tokenize(line, arraypro, "\t"); | |
| 276 string chr3= (arraypro[0].c_str()); | |
| 277 thisanno.inizioprobe=atoi(arraypro[3].c_str()); | |
| 278 thisanno.fineprobe=atoi(arraypro[4].c_str()); | |
| 279 thisanno.campo2=(arraypro[2].c_str()); | |
| 280 thisanno.campo1=(arraypro[1].c_str()); | |
| 281 thisanno.score=(arraypro[5].c_str()); | |
| 282 thisanno.index=(arraypro[8].c_str()); | |
| 283 thisanno.strand=(arraypro[6].c_str()); | |
| 284 thisanno.file=2; | |
| 285 seq[chr3].push_back(thisanno); | |
| 286 } | |
| 287 | |
| 288 int inizio; | |
| 289 int fine; | |
| 290 string annot; | |
| 291 int overlap; | |
| 292 int inizio_ann; | |
| 293 int fine_ann; | |
| 294 | |
| 295 for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ | |
| 296 | |
| 297 vector <Probe> seq_chr = (*itseq).second; | |
| 298 sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); | |
| 299 | |
| 300 for (int i=0; i<seq_chr.size();i++){ | |
| 301 inizio = seq_chr[i].inizioprobe; | |
| 302 fine=seq_chr[i].fineprobe; | |
| 303 int file_t=0; | |
| 304 int file_t2=0; | |
| 305 int entrato=1; | |
| 306 int z=1; | |
| 307 if(seq_chr[i].file==1){ | |
| 308 file_t=1; | |
| 309 } | |
| 310 if(seq_chr[i].file==2){ | |
| 311 file_t2=1; | |
| 312 } | |
| 313 if(i==(seq_chr.size()-1)){ | |
| 314 if (choice=="union"){ | |
| 315 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
| 316 } | |
| 317 } | |
| 318 | |
| 319 //cout<<"x"<<(*itseq).first<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].file<<endl; | |
| 320 for (int y=i+1; y<seq_chr.size(); y++){ | |
| 321 if((inizio-dist_t)<=seq_chr[y].fineprobe && (fine+dist_t)>=seq_chr[y].inizioprobe){ | |
| 322 if(seq_chr[y].file==1){ | |
| 323 file_t=1; | |
| 324 } | |
| 325 if(seq_chr[y].file==2){ | |
| 326 file_t2=1; | |
| 327 } | |
| 328 if(seq_chr[y].fineprobe>fine){ | |
| 329 fine=seq_chr[y].fineprobe; | |
| 330 } | |
| 331 entrato=2; | |
| 332 i++; | |
| 333 z++; | |
| 334 } | |
| 335 if(seq_chr[y].inizioprobe>fine || y==seq_chr.size()-1){ | |
| 336 if (choice == "union" && entrato==1){ | |
| 337 resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<inizio<<"\t"<<fine<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
| 338 } | |
| 339 if (choice == "union" && entrato==2){ | |
| 340 resfile<<(*itseq).first<<"\tcommon\tcommon\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t.\t.\tcommon"<<endl; | |
| 341 } | |
| 342 if (choice == "common" && entrato==2 && file_t == 1 && file_t2 == 1 && concatenate=="yes"){ | |
| 343 resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; | |
| 344 } | |
| 345 break; | |
| 346 } | |
| 347 } | |
| 348 } | |
| 349 } | |
| 350 } | |
| 351 } | |
| 352 |
