Mercurial > repos > vipints > deseq_hts
diff deseq-hts_1.0/mex/read.cpp @ 0:94a108763d9e draft
deseq-hts version 1.0 wraps the DESeq 1.6.0
author | vipints |
---|---|
date | Wed, 09 May 2012 20:43:47 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/read.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,214 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include "read.h" + +CRead::CRead() { + read_id = NULL; + sam_line = NULL; + start_pos = 0; + matches = 0; + mismatches = 0; + multiple_alignment_index = 0; + strand = NULL; + left = false; + right = false; + reverse = false; +} + +CRead::~CRead() { + delete[] read_id; + delete[] sam_line; + delete[] strand; +} + +/* + * Augments 'coverage' array at the positions covered by the read in the queried interval. + */ +void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage) +{ + // block1 block2 + // |=====|======|============|===========|======|====| + // ^ ^ ^ + // p_start_pos | p_end_pos + // start_pos + // |0000001111111111111000000000000111111100000| + // *coverage + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + int32_t from, to; + from = block_starts[n]+start_pos-p_start_pos; + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + if (from < 0) + from = 0; + if (to < 0) + continue; + else if (to > len) + to = len; + for (int bp=from; bp<to; bp++) { + coverage[bp]++; + } + } +} +int CRead::get_last_position() +{ + if (block_starts.size()>0) // this if for some reason zero in case of softclips + return start_pos+block_starts.back()+block_lengths.back(); + return -1; +} + +/* + * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read). + * These indices can be used to build up a sparse matrix of reads x positions. + */ +void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) { + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + uint32_t from, to; + if (block_starts[n]+start_pos-p_start_pos >= 0) + from = block_starts[n]+start_pos-p_start_pos; + else + from = 0; + if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0) + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + else + to = 0; + for (int bp=from; bp<to&bp<len; bp++) { + reads[reads_c] = row_idx+1; // row indices for sparse matrix + reads[reads_c+1] = bp+1; // column indices for sparse matrix + reads_c += 2; + } + } +} + +void CRead::get_acc_splice_sites(vector<int>* acc_pos) +{ + if (strand[0]=='+') + { + for (int k=1;k<block_starts.size(); k++) + acc_pos->push_back(start_pos+block_starts[k]-1); + } + else if (strand[0]=='-') + { + for (int k=1;k<block_starts.size(); k++) + acc_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } +} + +void CRead::get_don_splice_sites(vector<int>* don_pos) +{ + + if (strand[0]=='+') + { + for (int k=1;k<block_starts.size(); k++) + don_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } + else if (strand[0]=='-') + { + for (int k=1;k<block_starts.size(); k++) + don_pos->push_back(start_pos+block_starts[k]-1); + } +} + +int CRead::min_exon_len() +{ + int min = 1e8; + for (int k=0;k<block_starts.size(); k++) + if (block_lengths[k]<min) + min = block_lengths[k]; + return min; +} + +int CRead::max_intron_len() +{ + int max = 0; + for (int k=1;k<block_starts.size(); k++) + if (block_starts[k]-(block_starts[k-1]+block_lengths[k-1])>max) + max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]); + return max; +} + +/* + * Adds start and end of introns in the read consecutively to the 'introns' vector. + */ +void CRead::get_introns(vector<int>* introns) +{ + for (int i=1; i<block_starts.size(); i++) + { + int istart = block_starts[i-1]+block_lengths[i-1]+start_pos; + int iend = block_starts[i]+start_pos-1; + introns->push_back(istart); + introns->push_back(iend); + //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend); + } +} +void CRead::get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2) +{ + for (int i=1; i<block_starts.size(); i++) + { + uint32_t istart = block_starts[i-1]+block_lengths[i-1]+start_pos; + uint32_t iend = block_starts[i]+start_pos-1; + intron_starts->push_back(istart); + intron_ends->push_back(iend); + block_len1->push_back(block_lengths[i-1]) ; + block_len2->push_back(block_lengths[i]) ; + } +} + +bool CRead::operator==(const CRead& read) const +{ + if (block_starts.size()!=read.block_starts.size()) + return false; + if (block_lengths.size()!=read.block_lengths.size()) + return false; + if (start_pos!=read.start_pos) + return false; + if (strand[0] != read.strand[0]) + return false; + for (int i=0; i<block_starts.size(); i++) + if (block_starts[i]!=read.block_starts[i]) + return false; + for (int i=0; i<block_lengths.size(); i++) + if (block_lengths[i]!=read.block_lengths[i]) + return false; + return true; +} + +void CRead::print() +{ + fprintf(stdout, "start_pos: %d\n", start_pos); + fprintf(stdout, "starts:"); + for (int i=0; i<block_starts.size(); i++) + { + fprintf(stdout, " %d", block_starts[i]); + } + fprintf(stdout, "\n"); + + fprintf(stdout, "lengths:"); + for (int i=0; i<block_starts.size(); i++) + { + fprintf(stdout, " %d", block_lengths[i]); + } + fprintf(stdout, "\n"); +} + +void CRead::set_strand(char s) +{ + delete[] strand; + strand = new char [2]; + strand[0] = s; + strand[1] = '0'; +} + +int CRead::get_mismatches() +{ + return mismatches ; +}