Mercurial > repos > youngkim > ezbamqc
view ezBAMQC/src/ezBAMQC/Mappability.cpp @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
line wrap: on
line source
// // Mappablity.cpp // BAMQC_c++ // // Created by Ying Jin on 10/28/15. // Copyright (c) 2015 Ying Jin. All rights reserved. // #include "Mappability.h" #include "htslib/sam.h" //#include <stdio> #include <iostream> #include <fstream> #include <stdlib.h> #include <vector> int bam_cigar2mapped_read_len(int n_cigar, const uint32_t *cigar) { int k, l; for (k = l = 0; k < n_cigar; ++k) { if (bam_cigar_op(cigar[k])==BAM_CMATCH) { l += bam_cigar_oplen(cigar[k]);} } return l; } void bam_cigar2Clip(int n_cigar, const uint32_t *cigar,std::vector<std::pair<int, int> > * clip_pos_len) { int k, pos; //std::cout << "number of cigar " << n_cigar <<std::endl; for (k = pos = 0; k < n_cigar; ++k) if (bam_cigar_op(cigar[k]) == BAM_CSOFT_CLIP ) { clip_pos_len->push_back(std::pair<int,int> (pos,bam_cigar_oplen(cigar[k]))); pos += bam_cigar_oplen(cigar[k]); } else { if (bam_cigar_op(cigar[k]) == BAM_CMATCH || bam_cigar_op(cigar[k]) == BAM_CINS || bam_cigar_op(cigar[k]) == BAM_CEQUAL || bam_cigar_op(cigar[k]) == BAM_CDIFF ) { pos += bam_cigar_oplen(cigar[k]);} } return ; } Clipping_prof::Clipping_prof(std::string outfile_data,std::string outfile_fig,int qcut,int sample_size) { clip_data_file = outfile_data + ".clipping_profile.xls"; clip_script_file = outfile_data + ".clipping_profile.r"; clip_fig_file = outfile_fig+ ".clipping_profile.png"; mapq_fig_file = outfile_fig+ ".mapq_profile.png"; mapq_data_file = outfile_data + ".mapq_profile.xls"; mapq_script_file = outfile_data + ".mapq_profile.r"; readlen_data_file = outfile_data + ".readlen_profile.xls"; readlen_fig_file = outfile_fig+ ".readlen_profile.png"; readlen_script_file = outfile_data + ".ReadLen_plot.r"; samplesize = sample_size; q_cutoff = qcut; read_len =0; } Clipping_prof::~Clipping_prof(){ } Clipping_prof::Clipping_prof(int mapq,int sample_size){ q_cutoff = mapq; samplesize = sample_size; read_len =0; } int Clipping_prof::get_max_read_len(){ return this->read_len; } void Clipping_prof::write(int total_read) { std::string read_pos =""; std::string clip_count = ""; std::string mapq_val = ""; std::string mapq_count = ""; std::string readlen_val = ""; std::string readlen_count = ""; int max_mapq = 0; if (read_len > MAX_READ_LEN) { std::cout << "read length greater than 10000." << std::endl; return; } try{ std::ofstream OUT ; OUT.open (readlen_data_file, std::ofstream::out); OUT << "Position\tRead_Total\tRead_Len_mapped\n"; //soft_clip_profile[0] = 1; for(auto& kv : readLen_list ){ readlen_val += ',' + std::to_string(kv.first); readlen_count += ',' + std::to_string(kv.second); OUT << kv.first << '\t' << total_read << '\t' << kv.second << '\n'; } OUT.close(); }catch(std::ofstream::failure e ){ std::cout << "Error in writing clipping profile." << std::endl; return; } try{ std::ofstream OUT ; OUT.open (clip_data_file, std::ofstream::out); OUT << "Position\tRead_Total\tRead_clipped\n"; //soft_clip_profile[0] = 1; for(int i=0; i< read_len; i++ ){ read_pos += ',' + std::to_string(i); clip_count += ',' + std::to_string(soft_clip_profile[i]); OUT << i << '\t' << total_read << '\t' << soft_clip_profile[i] << '\n'; } OUT.close(); }catch(std::ofstream::failure e ){ std::cout << "Error in writing clipping profile." << std::endl; return; } try{ if(read_pos != ""){ std::ofstream ROUT ; ROUT.open(clip_script_file,std::ofstream::out); ROUT << "png(\"" << clip_fig_file << "\",width=500,height=500,units=\"px\")\n"; ROUT << "read_pos=c(" << read_pos.substr(1) << ")\n" ; //ROUT <<'read_pos=read_pos[2:length(read_pos)]\n'; ROUT << "count=c(" << clip_count.substr(1) << ")\n"; //ROUT << "count=count[2:length(count)]\n"; ROUT << "plot(read_pos,1-(count/" << total_read <<"),pch=20,xlab=\"Position of reads\",ylab=\"Mappability\",col=\"blue\")\n"; ROUT << "dev.state=dev.off()\n"; ROUT.close(); } }catch(std::ofstream::failure e ){ std::cout << "Error in writing clipping script file." << std::endl; return; } try{ std::ofstream OUT; OUT.open(mapq_data_file,std::ofstream::out); OUT << "MAPQ\tRead_Total\tRead_with_mapq\n"; for(auto& kv :mapqlist ){ mapq_val += ',' + std::to_string(kv.first); mapq_count += ',' + std::to_string(kv.second); if (kv.first > max_mapq ){ max_mapq = kv.first; } OUT << std::to_string(kv.first) + '\t' + std::to_string(total_read) + '\t' + std::to_string(kv.second) +'\n'; } OUT.close(); }catch(std::ofstream::failure e ){ std::cout << "Error in writing mapping quality profile." << std::endl; return; } if (mapqlist.size() > 0 ){ try { if(mapq_val != ""){ std::ofstream ROUT; ROUT.open(mapq_script_file,std::ofstream::out); ROUT << "png(\"" << mapq_fig_file << "\",width=500,height=500,units=\"px\")\n"; ROUT << "mapq_val=c(" << mapq_val.substr(1) << ")\n"; ROUT<< "mapq_count=c(" << mapq_count.substr(1) << ")\n"; ROUT << "xname=c(\"<3\",\"<10\",\"<20\",\"<30\",\"30-" << std::to_string(max_mapq) << "\")\n"; ROUT <<"freq = rep(0,5)\n"; ROUT << "freq[1] = sum(mapq_count[which(mapq_val<3)])/" << std::to_string(total_read) << "*100\n"; ROUT <<"freq[2] = sum(mapq_count[which(mapq_val<10)])/" << std::to_string(total_read) << "*100\n"; ROUT << "freq[3] = sum(mapq_count[which(mapq_val<20)])/" << std::to_string(total_read) << "*100\n"; ROUT << "freq[4] = sum(mapq_count[which(mapq_val<30)])/" << std::to_string(total_read) << "*100\n"; ROUT<< "freq[5] = 100\n"; ROUT << "barplot(freq,beside=T,xlab=\"Mapping Quality\",border=\"NA\",space=1.5,main=\"Mapping Quality\",ylim=c(0,100),ylab=\"Cumulative proportion (%)\",col=\"blue\",names.arg=xname)\n"; ROUT << "dev.state=dev.off()\n"; ROUT.close(); } }catch(std::ofstream::failure e ){ std::cout << "Error in writing mapping quality script file." << std::endl; return; } } //std::cout << readLen_list.size() << std::endl; if (readLen_list.size() > 0) { try { std::ofstream ROUT; ROUT.open(readlen_script_file,std::ofstream::out); ROUT << "png(\"" << readlen_fig_file << "\",width=500,height=500,units=\"px\")\n"; ROUT << "readlen_val=c(" << readlen_val.substr(1) << ")\n"; ROUT<< "readlen_count=c(" << readlen_count.substr(1) << ")\n"; ROUT << "plot(readlen_val,(readlen_count/" << total_read <<"),pch=20,xlab=\"Mapped Read Length\",ylab=\"Proportion\",col=\"blue\")\n"; ROUT << "dev.state=dev.off()\n"; ROUT.close(); }catch(std::ofstream::failure e ){ std::cout << "Error in writing mapping quality script file." << std::endl; return; } } } void Clipping_prof::add(Clipping_prof * clip_prof) { for (auto& j : clip_prof->mapqlist){ std::map<int,int>::iterator it; it = mapqlist.find(j.first); if (it != mapqlist.end()){ mapqlist[j.first] += j.second; } else { mapqlist[j.first] = j.second; } } for (int i=0;i< MAX_READ_LEN; i++){ soft_clip_profile[i] += clip_prof->soft_clip_profile[i]; } if (clip_prof->get_max_read_len() > read_len) { read_len = clip_prof->get_max_read_len(); } for (auto& kv : clip_prof->readLen_list){ std::map<int,int>::iterator it; it = readLen_list.find(kv.first); if (it != readLen_list.end()){ readLen_list[kv.first] += kv.second; } else { readLen_list[kv.first] = kv.second; } } } void Clipping_prof::set_qual(int mapq) { std::map<int,int>::iterator it; it = mapqlist.find(mapq); if (it != mapqlist.end()) { mapqlist[mapq] += 1; } else{ mapqlist[mapq] = 1; } } void Clipping_prof::set(int len,uint32_t n_cigar, uint32_t * cigar,int mapq) { int r = bam_cigar2mapped_read_len(n_cigar,cigar); //std::cout << "read len " << r << std::endl; //std::cout << "number of cigar " << n_cigar <<std::endl; if(read_len < len) { read_len = len; } std::map<int,int>::iterator it; it = mapqlist.find(mapq); if (it != mapqlist.end()) { mapqlist[mapq] += 1; } else{ mapqlist[mapq] = 1; } //std::cout << "after mapq " << mapq << std::endl; it = readLen_list.find(r); if(it!=readLen_list.end()){ readLen_list[r] += 1; } else{ readLen_list[r] = 1; } //std::cout << "after read len " << r << std::endl; std::vector<std::pair<int, int> > soft_clip_pos_len ; bam_cigar2Clip(n_cigar,cigar,&soft_clip_pos_len); //std::cout << "soft clip len " << soft_clip_pos_len.size() << std::endl; if(soft_clip_pos_len.size() > 0) { for (auto p : soft_clip_pos_len){ int pos = p.first; int len = p.second; //std::cout << pos << "\t" << len << std::endl; for (int j = 0; j< len;j++){ soft_clip_profile[pos+j] +=1; } } } }