diff egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp @ 9:98c37a5d67f4 draft

Uploaded
author dereeper
date Wed, 07 Feb 2018 22:08:47 -0500
parents 420b57c3c185
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/egglib/egglib-2.1.5/include/egglib-cpp/Fasta.hpp	Wed Feb 07 22:08:47 2018 -0500
@@ -0,0 +1,278 @@
+/*
+    Copyright 2008-2009 Stéphane De Mita, Mathieu Siol
+
+    This file is part of the EggLib library.
+
+    EggLib is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    EggLib is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with EggLib.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef EGGLIB_FASTA_HPP
+#define EGGLIB_FASTA_HPP
+
+#include <istream>
+#include <iostream>
+#include <string>
+#include "Container.hpp"
+
+namespace egglib {
+
+   /** \brief Fasta parser/formatted
+    *
+    * \ingroup core
+    *
+    * Reads a multifasta sequence file from a string, a stream or a file
+    * and returns a Container. See the description of the format below.
+    * Formats a fasta string from a sequence container object and places
+    * it in a string, a stream of a file. All methods are static and the
+    * class cannot be instantiated. The methods parsef and formatf will
+    * open the file for you while the others will read/write directly
+    * in a string.
+    * 
+    * Specifications of the fasta format:
+    * 
+    *    - The number of sequences is not limited.
+    * 
+    *    - Each sequence is preceded by a header limited to a single
+    *      line and starting by a ">" character.
+    * 
+    *    - The header length is not limited and all characters are
+    *      allowed but white spaces and special characters are
+    *      discouraged.
+    * 
+    *    - Group indices are specified by \@0, \@1, \@2...  strings
+    *      appearing at the end of the header string (just before the
+    *      carriage return). Note that group labels are ignored by
+    *      default.
+    * 
+    *    - Group indices are ignored unless specifically specified in a
+    *      parser's options.
+    * 
+    *    - The sequence itself continues on following lines until the
+    *      next ">" character or the end of the file.
+    * 
+    *    - White spaces, tab and carriage returns are allowed at any
+    *      position There is no limitation in length and different
+    *      sequences can have different lengths.
+    * 
+    *    - Although the standard is lower case characters, Fasta
+    *      assumes upper case characters and only supports lower case
+    *      characters (and converts them to upper case characters).
+    *      Information coded by change in case is lost.
+    *
+    */
+    class Fasta {
+
+      public:
+
+       /** \brief Imports a fasta file
+         *
+         * Imports the content of the file as is. Calls the method
+         * pase(std::istream*, bool) by creating its own istream.
+         *
+         * \param fname the name of a fasta file.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return A Container object containing the sequences.
+         * 
+         */
+        static Container parsef(const char* fname, bool importGroupLabels=false);
+
+
+       /** \brief Imports a fasta file
+         *
+         * Imports the content of the file as is. Calls the method
+         * pase(std::istream*, bool) by creating its own istream. This
+         * method expects a reference to a Container to which the
+         * sequences will be appended.
+         *
+         * \param fname the name of a fasta file.
+         * 
+         * \param container a Container instance, empty or not.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return Nothings: the new sequences are appended to the
+         * Container passed as argument.
+         * 
+         */
+        static void parsef(const char* fname, Container& container, bool importGroupLabels=false);
+
+
+       /** \brief Imports a fasta file
+         *
+         * Imports the content of the file as is. Calls the method
+         * pase(std::istream*, bool) by creating its own istream.
+         *
+         * \param str a string containing the data.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return A Container object containing the sequences.
+         * 
+         */
+        static Container parse(const std::string& str, bool importGroupLabels=false);
+
+
+       /** \brief Imports a fasta file
+         *
+         * Imports the content of the file as is. Calls the method
+         * pase(std::istream*, bool) by creating its own istream. This
+         * method expects a reference to a Container to which the
+         * sequences will be appended.
+         *
+         * \param str a string containing the data.
+         * 
+         * \param container a Container instance, empty or not.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return Nothing: new sequences are appended to the Container
+         * passed as argument.
+         * 
+         */
+        static void parse(const std::string& str, Container& container, bool importGroupLabels=false);
+
+
+       /** \brief Imports a fasta file from an open stream
+         *
+         * Imports the content of the file as is.
+         *
+         * \param stream an open stream (file or string) containing the
+         * data.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return A Container object containing the sequences.
+         * 
+         */
+        static Container parse(std::istream& stream, bool importGroupLabels=false);
+
+
+       /** \brief Imports a fasta file from an open stream
+         *
+         * Imports the content of the file as is. This
+         * method expects a reference to a Container to which the
+         * sequences will be appended.
+         *
+         * \param stream an open stream (file or string) containing the
+         * data.
+         * 
+         * \param container a Container instance, empty or not.
+         * 
+         * \param importGroupLabels if set to true, scan automatically
+         * for groups. The format is @ followed by an integer, placed
+         * at the end of the header string(sequences without labels
+         * will be treated as \@0).
+         * 
+         * \return Nothing: the new sequences are appended to the
+         * Container passed as argument.
+         * 
+         */
+        static void parse(std::istream& stream, Container& container, bool importGroupLabels=false);
+        
+        
+       /** \brief Export sequences as fasta
+        *
+        * \param fname the name of the file where to place the result.
+        * 
+        * \param container Container object to export.
+        * 
+        * \param exportGroupLabels if set to true, exports group
+        * indices as a \@x at the end of the sequence name, where x is
+        * the group index. Otherwise, this information is discarded.
+        * 
+        * \param lineLength the number of characters to place on a
+        * single line. If zero, no newlines are inserted within
+        * sequences.
+        * 
+        */
+        static void formatf(const char* fname, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);
+
+
+       /** \brief Export sequences as fasta
+        *
+        * \param file an open stream.
+        * 
+        * \param container Container object to export.
+        * 
+        * \param exportGroupLabels if set to true, exports group
+        * indices as a \@x at the end of the sequence name, where x is
+        * the group index. Otherwise, this information is discarded.
+        * 
+        * \param lineLength the number of characters to place on a
+        * single line. If zero, no newlines are inserted within
+        * sequences.
+        * 
+        */
+        static void format(std::ostream& file, const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);
+
+
+       /** \brief Export sequences as fasta
+        * 
+        * This medod creates internally an ostringstream, calls the
+        * method format(ostream, container, bool) and returns the
+        * resulting string.
+        *
+        * \param container Container object to export.
+        * 
+        * \param exportGroupLabels if set to true, exports group
+        * indices as a \@x at the end of the sequence name, where x is
+        * the group index. Otherwise, this information is discarded.
+        * 
+        * \param lineLength the number of characters to place on a
+        * single line. If zero, no newlines are inserted within
+        * sequences.
+        * 
+        * \return The formatted string.
+        * 
+        */
+        static std::string format(const Container& container, bool exportGroupLabels=false, unsigned int lineLength=50);
+
+          
+          
+      protected:
+        
+        /// This class cannot be instantiated
+        Fasta() { }
+        
+        /// This class cannot be instantiated
+        Fasta(const Fasta& source) { }
+        
+        /// This class cannot be or copied
+        Fasta& operator=(const Fasta& source) { return *this; }
+        
+        /// This class cannot be instantiated
+        virtual ~Fasta() { }
+
+        
+    };
+}
+
+#endif