Mercurial > repos > xilinxu > xilinxu
view fastx_toolkit-0.0.6/src/libfastx/fastx.h @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line source
/* FASTX-toolkit - FASTA/FASTQ preprocessing tools. Copyright (C) 2009 A. Gordon (gordon@cshl.edu) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __FASTX_HEADER__ #define __FASTX_HEADER__ #ifdef __cplusplus extern "C" { #endif #ifndef PATH_MAX #include <linux/limits.h> #endif #define MIN_QUALITY_VALUE (-50) #define MAX_QUALITY_VALUE 50 #define QUALITY_VALUES_RANGE (MAX_QUALITY_VALUE-MIN_QUALITY_VALUE) #ifndef MAX_SEQ_LINE_LENGTH #define MAX_SEQ_LINE_LENGTH (25000) #endif typedef enum { FASTA_ONLY=0, FASTA_OR_FASTQ=1, FASTQ_ONLY=2 } ALLOWED_INPUT_FILE_TYPES; typedef enum { DISALLOW_N=0, ALLOW_N=1 } ALLOWED_INPUT_UNKNOWN_BASES; typedef enum { REQUIRE_UPPERCASE=0, ALLOW_LOWERCASE=1 } ALLOWED_INPUT_CASE; typedef enum { OUTPUT_FASTA=0, OUTPUT_FASTQ_ASCII_QUAL=1, OUTPUT_FASTQ_NUMERIC_QUAL=2, OUTPUT_SAME_AS_INPUT=3 } OUTPUT_FILE_TYPE; #pragma pack(1) typedef struct { /* Record data - common for FASTA/FASTQ */ char input_sequence_id_prefix[1]; //DON'T touch this - this hack will read the entire name into the variable 'name', //leaving the prefix ('>' or '@') in 'input_sequence_id_name'. char name[MAX_SEQ_LINE_LENGTH+1]; char nucleotides[MAX_SEQ_LINE_LENGTH+1]; /* Record data - only for FASTQ */ char input_name2_prefix[1]; //same hack as 'input_sequence_id_prefix' char name2[MAX_SEQ_LINE_LENGTH+1]; int quality[MAX_SEQ_LINE_LENGTH+1]; //note: this is NOT ascii values, but numerical values // numeric quality scores and ASCII quality scores // are automatically converted to numbers (-15 to 40) /* Configuration */ int allow_input_filetype; // 0 = Allow only FASTA int allow_N; // 1 = N is valid nucleotide, 0 = only A/G/C/T are valid int allow_lowercase; int read_fastq; // 1 = Input is FASTQ (only if allow_input_fastq==1) int read_fastq_ascii; // 1 = Input is FASTQ with ASCII quality scores (0 = with numeric quality scores) int write_fastq; // 0 = Write only FASTA (regardless of input type) int write_fastq_ascii; // 1 = Write ASCII quality scores, 0 = write numeric quality scores int compress_output; // 1 = pass output through GZIP int copy_input_fastq_format_to_output ; // 1 = copy 'read_fastq_ascii' to 'write_fastq_ascii' // so that the output format is the same as the input /* Internal data */ int allowed_nucleotides[256]; //quick lookup table for valid input char output_sequence_id_prefix; // '>' or '@', depending on the requested output type char input_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h> unsigned long long input_line_number; char output_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h> size_t num_input_sequences; size_t num_output_sequences; size_t num_input_reads; size_t num_output_reads; FILE* input; FILE* output; } FASTX ; void fastx_init_reader(FASTX *pFASTX, const char* filename, ALLOWED_INPUT_FILE_TYPES allowed_input_filetype, ALLOWED_INPUT_UNKNOWN_BASES allow_N, ALLOWED_INPUT_CASE allow_lowercase); // If the sequence identifier is collapsed (= "N-N") returns the reads_count, // otherwise, returns 1 int get_reads_count(const FASTX *pFASTX); void fastx_init_writer(FASTX *pFASTX, const char* filename, OUTPUT_FILE_TYPE output_type, int compress_output); int fastx_read_next_record(FASTX *pFASTX); void fastx_write_record(FASTX *pFASTX); size_t num_input_sequences(const FASTX *pFASTX); size_t num_input_reads(const FASTX *pFASTX); size_t num_output_sequences(const FASTX *pFASTX); size_t num_output_reads(const FASTX *pFASTX); #ifdef __cplusplus } #endif #endif