Mercurial > repos > xilinxu > xilinxu
diff fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.h @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.h Thu Aug 14 04:52:17 2014 -0400 @@ -0,0 +1,250 @@ +/* + FASTX-toolkit - FASTA/FASTQ preprocessing tools. + Copyright (C) 2009 A. Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifndef __SEQUENCE_ALIGNMENT_HEADER__ +#define __SEQUENCE_ALIGNMENT_HEADER__ + +#include <err.h> + +struct SequenceAlignmentResults +{ + int alignment_found ; + + size_t query_size ; + size_t query_start ; + size_t query_end ; + + size_t target_size ; + size_t target_start ; + size_t target_end ; + + size_t gaps; + size_t neutral_matches ; + size_t matches ; + size_t mismatches ; + + float score ; + + std::string query_alignment ; + std::string target_alignment ; + + std::string query_sequence ; + std::string target_sequence ; + + SequenceAlignmentResults() : + alignment_found(false), + query_size(0), + query_start(0), + query_end(0), + + target_size(0), + target_start(0), + target_end(0), + + gaps(0), + neutral_matches(0), + matches(0), + mismatches(0), + + score(0) + { + } + + void print( std::ostream& ostrm = std::cout ) const; + + virtual ~SequenceAlignmentResults() {} +} ; + + +class SequenceAlignment +{ +protected: + typedef float score_type; + + typedef enum { + FROM_UPPER = 1, + FROM_LEFT = 2, + FROM_UPPER_LEFT = 3, + FROM_NOWHERE = 4 + //STOP_MARKER = 5 + } DIRECTION ; + + std::vector < score_type > query_border ; + std::vector < score_type > target_border ; + + std::vector< std::vector< score_type > > score_matrix ; + std::vector< std::vector< DIRECTION > > origin_matrix ; + std::vector< std::vector< char > > match_matrix ; + + score_type _gap_panelty ; + score_type _match_panelty ; + score_type _mismatch_panelty ; + score_type _neutral_panelty ; + + + SequenceAlignmentResults _alignment_results ; + + std::string _query_sequence; + std::string _target_sequence; + +public: + SequenceAlignment ( ) ; + virtual ~SequenceAlignment() {} + + size_t matrix_width() const { return score_matrix.size(); } + size_t matrix_height() const { return score_matrix[0].size(); } + + score_type gap_panelty() const { return _gap_panelty ; } + score_type match_panelty() const { return _match_panelty ; } + score_type mismatch_panelty() const { return _mismatch_panelty ; } + score_type neutral_panelty() const { return _neutral_panelty ; } + + const std::string& query_sequence() const { return _query_sequence; } + const std::string& target_sequence() const { return _target_sequence; } + + char query_nucleotide(size_t query_index) const { return _query_sequence[query_index] ; } + char target_nucleotide(size_t target_index) const { return _target_sequence[target_index] ; } + + const SequenceAlignmentResults& results() const { return _alignment_results; } + + char match_value ( const char q, const char t ) const + { + if ( q=='N' || t=='N' ) + return 'N' ; + + return ( q==t ) ? 'M' : 'x' ; + } + + char match ( const size_t query_index, const size_t target_index) const + { + return match_matrix[query_index][target_index]; + } + DIRECTION origin ( const size_t query_index, const size_t target_index) const + { + return origin_matrix[query_index][target_index]; + } + + score_type score ( const size_t query_index, const size_t target_index) const + { + return score_matrix[query_index][target_index]; + } + + score_type safe_score ( const ssize_t query_index, const ssize_t target_index) const + { + if (query_index==-1) + return target_border[target_index]; + if (target_index==-1) + return query_border[query_index]; + + return score_matrix[query_index][target_index]; + } + + score_type nucleotide_match_score(const size_t query_index, const size_t target_index) const + { + char q = query_nucleotide(query_index); + char t = target_nucleotide(target_index); + + if ( q=='N' && t=='N' ) + return 0.0 ; + + if ( q=='N' || t=='N' ) + return neutral_panelty() ; + + return ( q==t ) ? match_panelty() : mismatch_panelty() ; + } + + void print_matrix(std::ostream& strm = std::cout) const; + + #if 0 + score_type calculate_alignment_score(const size_t query_index, const size_t target_index) const + { + score_type score = -100000000; + + /* + score_type + + //Score from the left-cell + if ( query_index > 0 ) + if ( (score(query_index-1,target_index) + gap_panelty()) > score) + score = score_matrix[query_index-1][target_index] + gap_panelty(); + + //Score from the upper-cell + if ( target_index > 0 ) + if ((score_matrix[query_index][target_index-1] + gap_panelty()) > score) + score = score_matrix[query_index][target_index-1] + gap_panelty(); + + //Score from the upper-left-cell + if ( target_index>0 && query_index> 0) { + if (score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) > score) + score = score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) ; + }*/ + return score; + + } + #endif + + const SequenceAlignmentResults& align ( const std::string& query, const std::string& target ) ; + +protected: + void resize_matrix(size_t width, size_t height); + void populate_match_matrix(); + + virtual void reset_alignment_results() ; + + virtual void set_sequences ( const std::string& _query, const std::string &target ) ; + virtual void reset_matrix( size_t width, size_t height ) = 0 ; + virtual void populate_matrix ( ) = 0; + virtual void find_optimal_alignment ( ) = 0 ; + virtual void post_process() ; +} ; + +#if 0 +class LocalSequenceAlignment : public SequenceAlignment +{ +protected: + size_t highest_scored_query_index ; + size_t highest_scored_target_index ; + +public: + virtual void reset_matrix( size_t width, size_t height ) ; + virtual void populate_matrix ( ) ; + virtual void find_optimal_alignment ( ) ; +}; +#endif + + +class HalfLocalSequenceAlignment : public SequenceAlignment +{ +protected: + size_t highest_scored_query_index ; + size_t highest_scored_target_index ; + +public: + virtual void set_sequences ( const std::string& _query, const std::string &target ) ; + virtual void reset_matrix( size_t width, size_t height ) ; + virtual void populate_matrix ( ) ; + virtual void find_optimal_alignment ( ) ; + virtual void post_process() ; + + bool starting_point_close_to_end_of_sequences(const size_t query_index, const size_t target_index) const; + void find_alignment_starting_point(ssize_t &new_query_index, ssize_t &new_target_index) const; + + SequenceAlignmentResults find_optimal_alignment_from_point ( const size_t query_start, const size_t target_start ) const ; +}; + +#endif +