Mercurial > repos > xilinxu > xilinxu
view fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.h @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line source
/* FASTX-toolkit - FASTA/FASTQ preprocessing tools. Copyright (C) 2009 A. Gordon (gordon@cshl.edu) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __SEQUENCE_ALIGNMENT_HEADER__ #define __SEQUENCE_ALIGNMENT_HEADER__ #include <err.h> struct SequenceAlignmentResults { int alignment_found ; size_t query_size ; size_t query_start ; size_t query_end ; size_t target_size ; size_t target_start ; size_t target_end ; size_t gaps; size_t neutral_matches ; size_t matches ; size_t mismatches ; float score ; std::string query_alignment ; std::string target_alignment ; std::string query_sequence ; std::string target_sequence ; SequenceAlignmentResults() : alignment_found(false), query_size(0), query_start(0), query_end(0), target_size(0), target_start(0), target_end(0), gaps(0), neutral_matches(0), matches(0), mismatches(0), score(0) { } void print( std::ostream& ostrm = std::cout ) const; virtual ~SequenceAlignmentResults() {} } ; class SequenceAlignment { protected: typedef float score_type; typedef enum { FROM_UPPER = 1, FROM_LEFT = 2, FROM_UPPER_LEFT = 3, FROM_NOWHERE = 4 //STOP_MARKER = 5 } DIRECTION ; std::vector < score_type > query_border ; std::vector < score_type > target_border ; std::vector< std::vector< score_type > > score_matrix ; std::vector< std::vector< DIRECTION > > origin_matrix ; std::vector< std::vector< char > > match_matrix ; score_type _gap_panelty ; score_type _match_panelty ; score_type _mismatch_panelty ; score_type _neutral_panelty ; SequenceAlignmentResults _alignment_results ; std::string _query_sequence; std::string _target_sequence; public: SequenceAlignment ( ) ; virtual ~SequenceAlignment() {} size_t matrix_width() const { return score_matrix.size(); } size_t matrix_height() const { return score_matrix[0].size(); } score_type gap_panelty() const { return _gap_panelty ; } score_type match_panelty() const { return _match_panelty ; } score_type mismatch_panelty() const { return _mismatch_panelty ; } score_type neutral_panelty() const { return _neutral_panelty ; } const std::string& query_sequence() const { return _query_sequence; } const std::string& target_sequence() const { return _target_sequence; } char query_nucleotide(size_t query_index) const { return _query_sequence[query_index] ; } char target_nucleotide(size_t target_index) const { return _target_sequence[target_index] ; } const SequenceAlignmentResults& results() const { return _alignment_results; } char match_value ( const char q, const char t ) const { if ( q=='N' || t=='N' ) return 'N' ; return ( q==t ) ? 'M' : 'x' ; } char match ( const size_t query_index, const size_t target_index) const { return match_matrix[query_index][target_index]; } DIRECTION origin ( const size_t query_index, const size_t target_index) const { return origin_matrix[query_index][target_index]; } score_type score ( const size_t query_index, const size_t target_index) const { return score_matrix[query_index][target_index]; } score_type safe_score ( const ssize_t query_index, const ssize_t target_index) const { if (query_index==-1) return target_border[target_index]; if (target_index==-1) return query_border[query_index]; return score_matrix[query_index][target_index]; } score_type nucleotide_match_score(const size_t query_index, const size_t target_index) const { char q = query_nucleotide(query_index); char t = target_nucleotide(target_index); if ( q=='N' && t=='N' ) return 0.0 ; if ( q=='N' || t=='N' ) return neutral_panelty() ; return ( q==t ) ? match_panelty() : mismatch_panelty() ; } void print_matrix(std::ostream& strm = std::cout) const; #if 0 score_type calculate_alignment_score(const size_t query_index, const size_t target_index) const { score_type score = -100000000; /* score_type //Score from the left-cell if ( query_index > 0 ) if ( (score(query_index-1,target_index) + gap_panelty()) > score) score = score_matrix[query_index-1][target_index] + gap_panelty(); //Score from the upper-cell if ( target_index > 0 ) if ((score_matrix[query_index][target_index-1] + gap_panelty()) > score) score = score_matrix[query_index][target_index-1] + gap_panelty(); //Score from the upper-left-cell if ( target_index>0 && query_index> 0) { if (score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) > score) score = score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) ; }*/ return score; } #endif const SequenceAlignmentResults& align ( const std::string& query, const std::string& target ) ; protected: void resize_matrix(size_t width, size_t height); void populate_match_matrix(); virtual void reset_alignment_results() ; virtual void set_sequences ( const std::string& _query, const std::string &target ) ; virtual void reset_matrix( size_t width, size_t height ) = 0 ; virtual void populate_matrix ( ) = 0; virtual void find_optimal_alignment ( ) = 0 ; virtual void post_process() ; } ; #if 0 class LocalSequenceAlignment : public SequenceAlignment { protected: size_t highest_scored_query_index ; size_t highest_scored_target_index ; public: virtual void reset_matrix( size_t width, size_t height ) ; virtual void populate_matrix ( ) ; virtual void find_optimal_alignment ( ) ; }; #endif class HalfLocalSequenceAlignment : public SequenceAlignment { protected: size_t highest_scored_query_index ; size_t highest_scored_target_index ; public: virtual void set_sequences ( const std::string& _query, const std::string &target ) ; virtual void reset_matrix( size_t width, size_t height ) ; virtual void populate_matrix ( ) ; virtual void find_optimal_alignment ( ) ; virtual void post_process() ; bool starting_point_close_to_end_of_sequences(const size_t query_index, const size_t target_index) const; void find_alignment_starting_point(ssize_t &new_query_index, ssize_t &new_target_index) const; SequenceAlignmentResults find_optimal_alignment_from_point ( const size_t query_start, const size_t target_start ) const ; }; #endif