Mercurial > repos > matces > carpet_toolsuite
view carpet-src-1/tools/CARPET/com_uni.cpp @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
line wrap: on
line source
/* * Copyright 2009 Matteo Cesaroni, Lucilla Luzi * * This program is free software; ; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at your * option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #include <iostream> #include <fstream> #include <string> #include <vector> #include <algorithm> #include <sstream> #include <deque> #include <map> #include <ctime> #include <cstdlib> using namespace std; inline void Tokenize(const string& str, vector<string>& tokens, const string& delimiters = " ") { // Skip delimiters at beginning. string::size_type lastPos = str.find_first_not_of(delimiters, 0); // Find first "non-delimiter". string::size_type pos = str.find_first_of(delimiters, lastPos); while (string::npos != pos || string::npos != lastPos) { // Found a token, add it to the vector. tokens.push_back(str.substr(lastPos, pos - lastPos)); // Skip delimiters. Note the "not_of" lastPos = str.find_first_not_of(delimiters, pos); // Find next "non-delimiter" pos = str.find_first_of(delimiters, lastPos); } } typedef struct { int inizioprobe; int fineprobe; string score; string campo1; string campo2; string strand; string index; int file; } Probe; struct Comparatore2 { bool operator()(const Probe& s1, const Probe& s2) const { if (s1.inizioprobe < s2.inizioprobe) { return true; } else if (s1.inizioprobe == s2.inizioprobe) { if (s1.fineprobe < s2.fineprobe) { return true; } else if (s1.fineprobe == s2.fineprobe){ return true; } else { return false; } } else { return false; } } }; int main (int argc, char * const argv[]) { string concatenate=argv[3]; int dist_t=atoi(argv[1]); string choice=argv[2]; string name_out= argv[6]; ofstream resfile; resfile.open (name_out.c_str()); if (concatenate=="no" && choice!="union"){ //print "flank=$win , type=$type col6=%overlap, concatenate=$concatenate"; if (choice=="common"){ cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=%overlap, concatenate="<<concatenate; } if (choice=="unique"){ cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=score o p-value"; } string line; Probe thisprobe; Probe thisanno; int overlap=0; vector<string> arraypro; map<string, vector<Probe> > seq; map<string, vector<Probe> > annotation; map<string, vector<Probe> >::iterator itseq; ifstream seque_file(argv[4]); while (getline(seque_file, line)) { string s4; s4.assign(line, 0, 1); if (line=="" || s4=="#"){ continue; } arraypro.clear(); Tokenize(line, arraypro, "\t"); string chr2 = (arraypro[0].c_str()); thisprobe.inizioprobe=atoi(arraypro[3].c_str()); thisprobe.fineprobe=atoi(arraypro[4].c_str()); thisprobe.campo1=(arraypro[1].c_str()); thisprobe.campo2=(arraypro[2].c_str()); thisprobe.score=(arraypro[5].c_str()); thisprobe.strand=(arraypro[6].c_str()); thisprobe.index=(arraypro[8].c_str()); seq[chr2].push_back(thisprobe); } ifstream anno_file(argv[5]); while (getline(anno_file, line)) { string s4; s4.assign(line, 0, 1); if (line=="" || s4=="#"){ continue; } arraypro.clear(); Tokenize(line, arraypro, "\t"); string chr3= (arraypro[0].c_str()); thisanno.inizioprobe=atoi(arraypro[3].c_str()); thisanno.fineprobe=atoi(arraypro[4].c_str()); thisanno.campo1=(arraypro[1].c_str()); thisanno.campo2=(arraypro[2].c_str()); thisanno.score=(arraypro[5].c_str()); thisanno.strand=(arraypro[6].c_str()); thisanno.index=(arraypro[8].c_str()); annotation[chr3].push_back(thisanno); } for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ vector <Probe> seq_chr = (*itseq).second; vector <Probe> anno_chr = annotation[(*itseq).first]; if(anno_chr.size()==0 && choice=="unique"){ for (int i=0; i<seq_chr.size();i++){ resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; } continue; } if(anno_chr.size()==0 && choice=="common"){ continue; } sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); sort (anno_chr.begin(),anno_chr.end(),Comparatore2()); int finefine=0; for (int i=0; i<anno_chr.size();i++){ if(anno_chr[i].fineprobe<=finefine){ anno_chr[i].fineprobe=finefine; } if(anno_chr[i].fineprobe>finefine){ finefine=anno_chr[i].fineprobe; } } for (int i=0; i<seq_chr.size();i++){ int start_array=0; int fine_array=anno_chr.size(); int pos=1; int trovato=0; while (pos>0){ pos=(fine_array-start_array)/2; int position=start_array+pos; if((seq_chr[i].inizioprobe-dist_t)<anno_chr[position].inizioprobe){ fine_array=position; } if((seq_chr[i].inizioprobe-dist_t)>anno_chr[position].inizioprobe){ start_array=position; } if((seq_chr[i].inizioprobe-dist_t)<=anno_chr[position].fineprobe && (seq_chr[i].fineprobe+dist_t)>=anno_chr[position].inizioprobe){ if (choice=="common"){ if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ overlap=(seq_chr[i].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); } if (seq_chr[i].inizioprobe<=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ overlap=(anno_chr[position].fineprobe-anno_chr[position].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); } if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe>=anno_chr[position].fineprobe){ overlap=(anno_chr[position].fineprobe-seq_chr[i].inizioprobe)*100/(seq_chr[i].fineprobe-seq_chr[i].inizioprobe); } if (seq_chr[i].inizioprobe>=anno_chr[position].inizioprobe && seq_chr[i].fineprobe<=anno_chr[position].fineprobe){ overlap=100; } if (overlap<0){ overlap=-1; } resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<overlap<<"\t"<<seq_chr[i].strand<<"\t.\t"<<"ValueA:"<<seq_chr[i].score<<"~"<<"ValueB:"<<anno_chr[position].score<<endl; } trovato=1; break; } } if (choice=="unique" && trovato==0){ resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; } } } } if (concatenate=="yes" || choice == "union"){ cout<<"flank="<<dist_t<<" , type="<<choice<<" , col6=#overlaping regions, concatenate="<<concatenate; string line; Probe thisprobe; Probe thisanno; vector<string> arraypro; map<string, vector<Probe> > seq; map<string, vector<Probe> > annotation; map<string, vector<Probe> >::iterator itseq; string concatenate=argv[3]; int dist_t=atoi(argv[1]); string choice=argv[2]; ifstream seque_file(argv[4]); while (getline(seque_file, line)) { string s4; s4.assign(line, 0, 1); if (line=="" || s4=="#"){ continue; } arraypro.clear(); Tokenize(line, arraypro, "\t"); string chr2 = (arraypro[0].c_str()); thisprobe.inizioprobe=atoi(arraypro[3].c_str()); thisprobe.fineprobe=atoi(arraypro[4].c_str()); thisprobe.campo2=(arraypro[2].c_str()); thisprobe.campo1=(arraypro[1].c_str()); thisprobe.score=(arraypro[5].c_str()); thisprobe.index=(arraypro[8].c_str()); thisprobe.strand=(arraypro[6].c_str()); thisprobe.file=1; seq[chr2].push_back(thisprobe); } ifstream anno_file(argv[5]); while (getline(anno_file, line)) { string s4; s4.assign(line, 0, 1); if (line=="" || s4=="#"){ continue; } arraypro.clear(); Tokenize(line, arraypro, "\t"); string chr3= (arraypro[0].c_str()); thisanno.inizioprobe=atoi(arraypro[3].c_str()); thisanno.fineprobe=atoi(arraypro[4].c_str()); thisanno.campo2=(arraypro[2].c_str()); thisanno.campo1=(arraypro[1].c_str()); thisanno.score=(arraypro[5].c_str()); thisanno.index=(arraypro[8].c_str()); thisanno.strand=(arraypro[6].c_str()); thisanno.file=2; seq[chr3].push_back(thisanno); } int inizio; int fine; string annot; int overlap; int inizio_ann; int fine_ann; for ( itseq=seq.begin() ; itseq != seq.end(); itseq++ ){ vector <Probe> seq_chr = (*itseq).second; sort (seq_chr.begin(),seq_chr.end(),Comparatore2()); for (int i=0; i<seq_chr.size();i++){ inizio = seq_chr[i].inizioprobe; fine=seq_chr[i].fineprobe; int file_t=0; int file_t2=0; int entrato=1; int z=1; if(seq_chr[i].file==1){ file_t=1; } if(seq_chr[i].file==2){ file_t2=1; } if(i==(seq_chr.size()-1)){ if (choice=="union"){ resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; } } //cout<<"x"<<(*itseq).first<<"\t"<<seq_chr[i].inizioprobe<<"\t"<<seq_chr[i].fineprobe<<"\t"<<seq_chr[i].file<<endl; for (int y=i+1; y<seq_chr.size(); y++){ if((inizio-dist_t)<=seq_chr[y].fineprobe && (fine+dist_t)>=seq_chr[y].inizioprobe){ if(seq_chr[y].file==1){ file_t=1; } if(seq_chr[y].file==2){ file_t2=1; } if(seq_chr[y].fineprobe>fine){ fine=seq_chr[y].fineprobe; } entrato=2; i++; z++; } if(seq_chr[y].inizioprobe>fine || y==seq_chr.size()-1){ if (choice == "union" && entrato==1){ resfile<<(*itseq).first<<"\tfile_"<<seq_chr[i].file<<"\tunique\t"<<inizio<<"\t"<<fine<<"\t"<<seq_chr[i].score<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; } if (choice == "union" && entrato==2){ resfile<<(*itseq).first<<"\tcommon\tcommon\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t.\t.\tcommon"<<endl; } if (choice == "common" && entrato==2 && file_t == 1 && file_t2 == 1 && concatenate=="yes"){ resfile<<(*itseq).first<<"\t"<<seq_chr[i].campo1<<"\t"<<seq_chr[i].campo2<<"\t"<<inizio<<"\t"<<fine<<"\t"<<z<<"\t"<<seq_chr[i].strand<<"\t.\t"<<seq_chr[i].index<<endl; } break; } } } } } }