diff fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.h @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastx_toolkit-0.0.6/src/libfastx/sequence_alignment.h	Thu Aug 14 04:52:17 2014 -0400
@@ -0,0 +1,250 @@
+/*
+    FASTX-toolkit - FASTA/FASTQ preprocessing tools.
+    Copyright (C) 2009  A. Gordon (gordon@cshl.edu)
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as
+    published by the Free Software Foundation, either version 3 of the
+    License, or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+#ifndef __SEQUENCE_ALIGNMENT_HEADER__
+#define __SEQUENCE_ALIGNMENT_HEADER__
+
+#include <err.h>
+
+struct SequenceAlignmentResults
+{
+	int alignment_found ;
+
+	size_t query_size ;
+	size_t query_start ;
+	size_t query_end ;
+
+	size_t target_size ;
+	size_t target_start ;
+	size_t target_end ;
+
+	size_t gaps;
+	size_t neutral_matches ;
+	size_t matches ;
+	size_t mismatches ;
+
+	float score ;
+
+	std::string query_alignment ;
+	std::string target_alignment ;
+
+	std::string query_sequence ;
+	std::string target_sequence ;
+
+	SequenceAlignmentResults() :
+		alignment_found(false),
+		query_size(0),
+		query_start(0),
+		query_end(0),
+
+		target_size(0),
+		target_start(0),
+		target_end(0),
+
+		gaps(0),
+		neutral_matches(0),	
+		matches(0),
+		mismatches(0),
+
+		score(0)
+	{
+	} 
+
+	void print( std::ostream& ostrm = std::cout ) const;
+
+	virtual ~SequenceAlignmentResults() {}
+} ;
+
+
+class SequenceAlignment
+{
+protected:
+	typedef float score_type;
+
+	typedef enum {
+		FROM_UPPER = 1,
+		FROM_LEFT  = 2,
+		FROM_UPPER_LEFT = 3,
+		FROM_NOWHERE = 4
+		//STOP_MARKER = 5 
+	} DIRECTION ;
+
+	std::vector < score_type > query_border ;
+	std::vector < score_type > target_border ;
+
+	std::vector< std::vector< score_type >  > score_matrix ;
+	std::vector< std::vector< DIRECTION >  > origin_matrix ;
+	std::vector< std::vector< char > > match_matrix ;
+
+	score_type _gap_panelty ;
+	score_type _match_panelty ;
+	score_type _mismatch_panelty ;
+	score_type _neutral_panelty ;
+
+ 
+	SequenceAlignmentResults _alignment_results ;
+
+	std::string _query_sequence;
+	std::string _target_sequence;
+
+public:
+	SequenceAlignment ( ) ;
+	virtual ~SequenceAlignment() {}
+
+	size_t matrix_width() const { return  score_matrix.size(); }
+	size_t matrix_height() const { return  score_matrix[0].size(); }
+
+	score_type gap_panelty() const { return _gap_panelty ; }
+	score_type match_panelty() const { return _match_panelty ; }
+	score_type mismatch_panelty() const { return _mismatch_panelty ; }
+	score_type neutral_panelty() const { return _neutral_panelty ; }
+
+	const std::string& query_sequence() const { return _query_sequence; }
+	const std::string& target_sequence() const { return _target_sequence; }
+
+	char query_nucleotide(size_t query_index) const { return _query_sequence[query_index] ; }
+	char target_nucleotide(size_t target_index) const { return _target_sequence[target_index] ; }
+
+	const SequenceAlignmentResults& results() const { return _alignment_results; }
+
+	char match_value ( const char q, const char t ) const
+	{
+		if ( q=='N' || t=='N' ) 
+			return 'N' ;
+		
+		return ( q==t ) ? 'M' : 'x' ;
+	}
+
+	char match ( const size_t query_index, const size_t target_index) const 
+	{
+		return match_matrix[query_index][target_index];
+	}
+	DIRECTION origin (  const size_t query_index, const size_t target_index) const 
+	{
+		return origin_matrix[query_index][target_index];
+	}
+
+	score_type score ( const size_t query_index, const size_t target_index) const 
+	{
+		return score_matrix[query_index][target_index];
+	}
+
+	score_type safe_score ( const ssize_t query_index, const ssize_t target_index) const 
+	{
+		if (query_index==-1)
+			return target_border[target_index];
+		if (target_index==-1)
+			return query_border[query_index];
+
+		return score_matrix[query_index][target_index];
+	}
+
+	score_type nucleotide_match_score(const size_t query_index, const size_t target_index) const
+	{
+		char q = query_nucleotide(query_index);
+		char t = target_nucleotide(target_index);
+
+		if ( q=='N' && t=='N' )
+			return 0.0 ;
+
+		if ( q=='N' || t=='N' )
+			return neutral_panelty() ;
+
+		return ( q==t ) ? match_panelty() : mismatch_panelty() ;
+	}
+
+	void print_matrix(std::ostream& strm = std::cout) const;
+
+	#if 0
+	score_type calculate_alignment_score(const size_t query_index, const size_t target_index) const
+	{
+		score_type score = -100000000;
+
+		/*
+		score_type
+
+		//Score from the left-cell
+		if ( query_index > 0 )
+			if ( (score(query_index-1,target_index) + gap_panelty()) > score)
+				score = score_matrix[query_index-1][target_index] + gap_panelty();
+
+		//Score from the upper-cell
+		if ( target_index  > 0 ) 
+			if ((score_matrix[query_index][target_index-1] + gap_panelty()) > score)
+				score = score_matrix[query_index][target_index-1] + gap_panelty();
+
+		//Score from the upper-left-cell
+		if ( target_index>0 && query_index> 0) {
+			if (score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) > score) 
+				score = score_matrix[query_index-1][target_index-1] + match_score(query_index,target_index) ;
+		}*/
+		return score;
+
+	}
+	#endif
+
+	const SequenceAlignmentResults& align ( const std::string& query, const std::string& target ) ;
+
+protected:
+	void resize_matrix(size_t width, size_t height);
+	void populate_match_matrix();
+
+	virtual void reset_alignment_results() ; 
+
+	virtual void set_sequences ( const std::string& _query, const std::string &target ) ;
+	virtual void reset_matrix( size_t width, size_t height ) = 0 ;
+	virtual void populate_matrix ( ) = 0;
+	virtual void find_optimal_alignment ( ) = 0 ;
+	virtual void post_process() ;
+} ;
+
+#if 0
+class LocalSequenceAlignment : public SequenceAlignment
+{
+protected:
+	size_t highest_scored_query_index ;
+	size_t highest_scored_target_index ;
+
+public:
+	virtual void reset_matrix( size_t width, size_t height )  ;
+	virtual void populate_matrix ( ) ;
+	virtual void find_optimal_alignment ( )  ;
+};
+#endif
+
+
+class HalfLocalSequenceAlignment : public SequenceAlignment
+{
+protected:
+	size_t highest_scored_query_index ;
+	size_t highest_scored_target_index ;
+
+public:
+	virtual void set_sequences ( const std::string& _query, const std::string &target ) ;
+	virtual void reset_matrix( size_t width, size_t height )  ;
+	virtual void populate_matrix ( ) ;
+	virtual void find_optimal_alignment ( )  ;
+	virtual void post_process() ;
+
+	bool starting_point_close_to_end_of_sequences(const size_t query_index, const size_t target_index) const;
+	void find_alignment_starting_point(ssize_t &new_query_index, ssize_t &new_target_index) const;
+
+	SequenceAlignmentResults find_optimal_alignment_from_point ( const size_t query_start, const size_t target_start ) const ;
+};
+
+#endif
+