diff egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/egglib/egglib-2.1.5/include/egglib-cpp/BaseDiversity.hpp	Wed Feb 07 22:08:47 2018 -0500
@@ -0,0 +1,169 @@
+/*
+    Copyright 2009 Stéphane De Mita, Mathieu Siol
+
+    This file is part of the EggLib library.
+
+    EggLib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    EggLib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with EggLib.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef EGGLIB_BASEDIVERSITY_HPP
+#define EGGLIB_BASEDIVERSITY_HPP
+
+#include "CharMatrix.hpp"
+#include "SitePolymorphism.hpp"
+#include <string>
+
+/** \defgroup polymorphism polymorphism
+ *
+ * \brief Diversity analyses
+ *
+ * Two classes are contained in this module: NucleotideDiversity, that
+ * performs site-centered polymorphism analyses, and HaplotypeDiversity,
+ * that performs haplotype-centered analyses. The detection of
+ * polymorphic sites is common to both, through the base class
+ * BaseDiversity. However this phase must be repeated when stats from
+ * the two classes are needed. To reduce the computational burden, the
+ * function reserve() can be use, that directly allocates needed memory
+ * when the eventual number of polymorphic sites is known prior to
+ * analysis (even if not precisely). For both classes, a set of
+ * statistics are computed immediately upon load of a data set. For
+ * NucleotideDiversity, additional statistics are computed per group
+ * upon use of the corresponding accessors. This number of operations
+ * performed several times is strictly limited. This is particularly
+ * useful when different statistics are needed for a given alignment.
+ * However, this system allows not computing unnecessary statistics to
+ * a certain extend.
+ * 
+ */
+
+namespace egglib {
+
+    /** \brief Base class of diversity classes
+    *
+    * Mutualizes the analysis of polymorphic sites through the method
+    * importSites() and related accessors.
+    * 
+    * \ingroup polymorphism
+    *
+    */
+    class BaseDiversity {
+    
+        public:
+    
+           /** \brief Constructor
+            * 
+            */ 
+            BaseDiversity();
+            
+           /** \brief Destructor
+            * 
+            */ 
+            virtual ~BaseDiversity();
+            
+           /** \brief Reserve sufficient memory for a given number of
+            * polymorphic sites.
+            * 
+            * This method makes importSite function faster when you
+            * already know how many polymorphic sites to expect, since
+            * the necessary memory will be allocated prior the screening
+            * of data. It is possible to use reserve() even if with a
+            * number of sites that is not matching what importSites()
+            * will find.
+            * 
+            * \param numberOfSites a strictly positive integer.
+            * 
+            */
+            virtual void reserve(unsigned int numberOfSites);
+
+            /// Gets a site
+            const SitePolymorphism* get_site(unsigned int index) const;
+
+            /// Gets a site position
+            unsigned int get_position(unsigned int index) const;
+
+           /** \brief Predefined mapping string for DNA data
+            * 
+            */
+            static const std::string dnaMapping;
+
+
+           /** \brief Predefined mapping string for RNA data
+            * 
+            */
+            static const std::string rnaMapping;
+
+
+           /** \brief Predefined mapping string for amino acid data
+            * 
+            */
+            static const std::string aaMapping;
+
+
+            /// Clears and re-initializes object
+            virtual void reset();
+
+
+        protected:
+    
+            virtual void init();
+            virtual void clear();
+    
+            // 
+            void importSites(CharMatrix& data, bool allowMultipleMutations,
+                double minimumExploitableData, unsigned int ignoreFrequency,
+                std::string characterMapping, bool useZeroAsAncestral,
+                bool ignoreOutgroup);
+
+            // 
+            void analyzeSite(CharMatrix& data, unsigned int index, double maxMissingData, bool ignoreOutgroup); // analyzes a site, adds a Site to the Site container if the site is polymorphic
+            unsigned int getPopIndex(unsigned int label) const;  // returns v_npop if not found
+            
+            SitePolymorphism** v_sites;  // holder of polymorphic site addresses
+            bool* v_orientables;         // stores whether the sites are orientable or not
+            unsigned int* v_sitePositions;   // stores position of sites
+
+            unsigned int  v_reserved;
+            unsigned int  v_ns;       // maximum number of sequences analyzed (max of sites' ns)
+            unsigned int  v_S;        // number of polymorphic sites
+            unsigned int  v_So;       // number of orientable sites
+            unsigned int  v_eta;      // number of mutation (whatever multiple)
+            double        v_nseff;    // average number of analyzed sequence
+            unsigned int  v_lseff;    // number of analyzed sites
+            double        v_nseffo;   // average number of analyzed sequences for analyzes with outgroup
+            unsigned int  v_lseffo;   // number of analyzed sites for analyzes with outgroup
+            unsigned int  v_npop;     // number of populations
+            unsigned int *v_popLabel; // label of each pop
+
+            // options
+            bool          p_allowMultipleMutations;
+            double        p_minimumExploitableData;
+            std::string   p_characterMapping;
+            unsigned int  p_pos_sep_mapping;
+            bool          p_useZeroAsAncestral;
+            unsigned int  p_ignoreFrequency;
+
+
+    
+        private:
+            
+            BaseDiversity(const BaseDiversity& source) { }
+            
+            BaseDiversity& operator=(const BaseDiversity& source) {
+                return *this;
+            }
+
+    };
+}
+
+#endif