Mercurial > repos > dereeper > sniplay
view egglib/egglib-2.1.5/include/egglib-cpp/Align.hpp @ 9:98c37a5d67f4 draft
Uploaded
author | dereeper |
---|---|
date | Wed, 07 Feb 2018 22:08:47 -0500 |
parents | 420b57c3c185 |
children |
line wrap: on
line source
/* Copyright 2008-2009 Stéphane De Mita, Mathieu Siol This file is part of the EggLib library. EggLib is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. EggLib is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with EggLib. If not, see <http://www.gnu.org/licenses/>. */ #ifndef EGGLIB_ALIGN_HPP #define EGGLIB_ALIGN_HPP #include "Container.hpp" #include "CharMatrix.hpp" #include <vector> /** \mainpage Summary * * This is the automatically-generated reference manual of the C++ * egglib-cpp library. The library is presented as several modules, but * note that they are only used to structure the documentation. * * There is a single namespace (egglib) in which all classes are * defined. See an example of programming with egglib-cpp in the * EggLib package main documentation. Use "Modules" or "Classes" above * to navigate in the library reference manual. * */ /** \defgroup core core * * \brief Central core of the C++ library of Egglib * * Data storage classes, parsers/formatters and tools, plus exception * types. * */ namespace egglib { /** \brief Handles a sequence alignment * * \ingroup core * * Creation from a file or string stream should be performed using * the class Fasta. Align objects can be created by deep copy from * both Align and Container type. In the latter case, the length are * artificially equalized by "?" characters. Align objects can be * created from a DataMatrix object (and all the way arround) using * the specific class DMAConverter. * * Sequences are represented by two strings (name and sequence) and * an integer (group) that can be accessed or modified by index.The * order of sequences is guaranteed to be conserved, as if Align was * a list of triplets (name, sequence, group). * * The data matrix is implemented as continuous array (char**) and * allows efficient access and modification of data. For very large * data matrices you might claim immediately the required memory * using the constructor Align(unsigned int, char**). * */ class Align : public Container, public CharMatrix { public: /** \brief Creates an empty alignment * */ Align(); /** \brief Creates an alignment from a data matrix. * * Allows you to create an object from data stored in a char* * array. The array's dimensions must be passed to the * constructor, and as a result there is not need to * terminate each sequence by a NULL character. * * \param number_of_sequences the number of sequences (the * length of the first dimension of the array). * * \param alignment_length the length of sequences (the * length of all lines of the array). * * \param cstring_array the pointer to the data matrix. * */ Align(unsigned int number_of_sequences, unsigned int alignment_length, char const * const * const cstring_array); /** \brief Creates an alignment with given dimensions * * Allows you to allocate directly a data matrix of a given * size. Names are empty strings, groups 0, and all * characters are ?. * * \param number_of_sequences the number of sequences (the * length of the first dimension of the array). * * \param alignment_length the length of sequences (the * length of all lines of the array). * */ Align(unsigned int number_of_sequences, unsigned int alignment_length); /** \brief Copy constructor * */ Align(const Align& align); /** \brief Copy constructor accepting a Container object * * All but the longest sequences are padded with ? to match * the longest sequence's length. * */ Align(const Container& container); /** \brief Copy operator * */ Align& operator=(const Align& align); /** \brief Copy operator accepting a Container object * * All but the longest sequences are padded with ? to match * the longest sequence's length. * */ Align& operator=(const Container& container); /** \brief Destructor * */ virtual ~Align(); /** \brief Adds a sequence * * If the object already contains at least one sequence, the * new sequence must have the same length. Otherwise, a * EggUnalignedError is raised. * * \param name the name of the sequence. * \param sequence the sequence string. * \param group the group index of the sequence. * \return The new number of sequences. * */ virtual unsigned int append(const char* name, const char* sequence, unsigned int group=0); /** \brief Removes a position (column) of the alignment * * \param pos the position to remove in the alignment. * \return The new length of the alignment. * */ virtual unsigned int removePosition(unsigned int pos); /** \brief Removes a sequence from the alignment * * \param pos the index of the sequence to remove. * \return The new number of sequences. * */ virtual unsigned int remove(unsigned int pos); /** \brief Replace a sequence string * * The new sequence must have the same length than the * alignment. Otherwise, a EggUnalignedError is raised. * * \param seq the index of the sequence to change. * \param sequence the new sequence. * */ virtual void sequence(unsigned int seq, const char* sequence); /** \brief Gets the name of a given sequence * * \param pos the index of the sequence. * * \return The sequence string for that particular sequence. * */ virtual inline const char* sequence(unsigned int pos) const { return Container::sequence(pos); } /** \brief Alignment length * * Returns 0 if the alignment is empty. * */ virtual unsigned int ls() const; /** \brief Length of a given sequence * * Calling this function is exactly the same as calling ls() * (without arguments), regardless of the index provided, * except that an exception is thrown if the index is out of * bounds. Provided for compatibility with Container. * * \param pos the index of the sequence. * \return the length of the alignment. * */ virtual unsigned int ls(unsigned int pos) const; /** \brief Fast and unsecure accessor * * This accessor doesn't perform out-of-bound checking! * * \param s the index of the sequence (line). * \param p the position in the alignment (column). * \return The character at the given position. * */ inline char character(unsigned int s, unsigned int p) const { return sequences[s][p]; } /** \brief Gets a nucleotide * * This modifier does perform out-of-bound checking. * The specified position must exist. * * \param sequence the index of the sequence (line). * \param position the position in the alignment (column). * \return the character at the given position. * */ virtual char get(unsigned int sequence, unsigned int position) const; /** \brief Sets a matrix position to a new character * * This modifier does perform out-of-bound checking. * The specified position must exist. * * \param sequence the index of the sequence (line). * \param position the position in the alignment (column). * \param ch the new character value. */ virtual void set(unsigned int sequence, unsigned position, char ch); /** \brief Reverse a given column in binary data * * The specified column must contain only "0" ans "1" characters. * "0" is replaced by "1" and all the way around * */ void binSwitch(unsigned int pos); /** \brief Extracts specified positions (columns) of the alignment * * All the specified sites are extracted in the specified * order. This function is suitable for bootstrap (resample * allowing redrawing the same site) and permutations. * * This function doesn't perform out-of-bound checking. * * \param list_of_sites a vector containing alignment * positions. * * \return A copy of the object containing the specified * set of positions. * */ Align vslice(std::vector<unsigned int> list_of_sites); /** \brief Extracts a range of positions (columns) * * \param a the first position. * * \param b the index immediately passed the last sequence to * extract. * * \return A copy of the object containing the specified * range of sequences. * * Positions a to b-1 are extracted, provided that the * indices fit in the current length of sequences. To extract * all sequences, use align.vslice(0, align.ls()). * * Note: invalid ranges will be silently supported. If * a>=ls or b<=a, an empty object is returned. If b>ns, * ls will be substituted to a. */ Align vslice(unsigned int a, unsigned int b); /** \brief Deletes all the content of the object * */ virtual void clear(); /** \brief Same as ns() * */ inline unsigned int numberOfSequences() const { return _ns; } /** \brief Same as ls() * */ inline unsigned int numberOfSites() const { return _ls; } /** \brief Gets a group label (insecure) * */ inline unsigned int populationLabel(unsigned int sequenceIndex) const { return groups[sequenceIndex]; } /** \brief Just return the passed value * */ inline double sitePosition(unsigned int position) const { return (double) position; } protected: /// This function is not available for alignments virtual void appendSequence(unsigned int pos, const char* sequence) {} // Initializer (creates a valid empty alignment) virtual void init(); // Makes a deep copy of the specified data matrix - if cstring_array is NULL, then ignores it and pads with ?'s virtual void setFromSource(unsigned int number_of_sequences, unsigned int alignment_length, const char* const * const cstring_array); // Copies from a Container virtual void copyObject(const Container&); // Copies from an Align virtual void copyObject(const Align&); // Alignment length unsigned int _ls; }; } #endif