Mercurial > repos > xilinxu > xilinxu
comparison fastx_toolkit-0.0.6/src/libfastx/fastx.h @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:dfe9332138cf | 3:997f5136985f |
---|---|
1 /* | |
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools. | |
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu) | |
4 | |
5 This program is free software: you can redistribute it and/or modify | |
6 it under the terms of the GNU Affero General Public License as | |
7 published by the Free Software Foundation, either version 3 of the | |
8 License, or (at your option) any later version. | |
9 | |
10 This program is distributed in the hope that it will be useful, | |
11 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 GNU Affero General Public License for more details. | |
14 | |
15 You should have received a copy of the GNU Affero General Public License | |
16 along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 */ | |
18 #ifndef __FASTX_HEADER__ | |
19 #define __FASTX_HEADER__ | |
20 | |
21 #ifdef __cplusplus | |
22 extern "C" { | |
23 #endif | |
24 | |
25 #ifndef PATH_MAX | |
26 #include <linux/limits.h> | |
27 #endif | |
28 | |
29 #define MIN_QUALITY_VALUE (-50) | |
30 #define MAX_QUALITY_VALUE 50 | |
31 #define QUALITY_VALUES_RANGE (MAX_QUALITY_VALUE-MIN_QUALITY_VALUE) | |
32 | |
33 | |
34 #ifndef MAX_SEQ_LINE_LENGTH | |
35 #define MAX_SEQ_LINE_LENGTH (25000) | |
36 #endif | |
37 | |
38 typedef enum { | |
39 FASTA_ONLY=0, | |
40 FASTA_OR_FASTQ=1, | |
41 FASTQ_ONLY=2 | |
42 } ALLOWED_INPUT_FILE_TYPES; | |
43 | |
44 typedef enum { | |
45 DISALLOW_N=0, | |
46 ALLOW_N=1 | |
47 } ALLOWED_INPUT_UNKNOWN_BASES; | |
48 | |
49 typedef enum { | |
50 REQUIRE_UPPERCASE=0, | |
51 ALLOW_LOWERCASE=1 | |
52 } ALLOWED_INPUT_CASE; | |
53 | |
54 typedef enum { | |
55 OUTPUT_FASTA=0, | |
56 OUTPUT_FASTQ_ASCII_QUAL=1, | |
57 OUTPUT_FASTQ_NUMERIC_QUAL=2, | |
58 OUTPUT_SAME_AS_INPUT=3 | |
59 } OUTPUT_FILE_TYPE; | |
60 | |
61 #pragma pack(1) | |
62 typedef struct | |
63 { | |
64 /* Record data - common for FASTA/FASTQ */ | |
65 char input_sequence_id_prefix[1]; //DON'T touch this - this hack will read the entire name into the variable 'name', | |
66 //leaving the prefix ('>' or '@') in 'input_sequence_id_name'. | |
67 char name[MAX_SEQ_LINE_LENGTH+1]; | |
68 char nucleotides[MAX_SEQ_LINE_LENGTH+1]; | |
69 /* Record data - only for FASTQ */ | |
70 char input_name2_prefix[1]; //same hack as 'input_sequence_id_prefix' | |
71 char name2[MAX_SEQ_LINE_LENGTH+1]; | |
72 int quality[MAX_SEQ_LINE_LENGTH+1]; //note: this is NOT ascii values, but numerical values | |
73 // numeric quality scores and ASCII quality scores | |
74 // are automatically converted to numbers (-15 to 40) | |
75 | |
76 /* Configuration */ | |
77 int allow_input_filetype; // 0 = Allow only FASTA | |
78 int allow_N; // 1 = N is valid nucleotide, 0 = only A/G/C/T are valid | |
79 int allow_lowercase; | |
80 int read_fastq; // 1 = Input is FASTQ (only if allow_input_fastq==1) | |
81 int read_fastq_ascii; // 1 = Input is FASTQ with ASCII quality scores (0 = with numeric quality scores) | |
82 int write_fastq; // 0 = Write only FASTA (regardless of input type) | |
83 int write_fastq_ascii; // 1 = Write ASCII quality scores, 0 = write numeric quality scores | |
84 int compress_output; // 1 = pass output through GZIP | |
85 | |
86 int copy_input_fastq_format_to_output ; // 1 = copy 'read_fastq_ascii' to 'write_fastq_ascii' | |
87 // so that the output format is the same as the input | |
88 | |
89 | |
90 /* Internal data */ | |
91 int allowed_nucleotides[256]; //quick lookup table for valid input | |
92 char output_sequence_id_prefix; // '>' or '@', depending on the requested output type | |
93 | |
94 char input_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h> | |
95 unsigned long long input_line_number; | |
96 char output_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h> | |
97 | |
98 size_t num_input_sequences; | |
99 size_t num_output_sequences; | |
100 size_t num_input_reads; | |
101 size_t num_output_reads; | |
102 | |
103 FILE* input; | |
104 FILE* output; | |
105 } FASTX ; | |
106 | |
107 | |
108 void fastx_init_reader(FASTX *pFASTX, const char* filename, | |
109 ALLOWED_INPUT_FILE_TYPES allowed_input_filetype, | |
110 ALLOWED_INPUT_UNKNOWN_BASES allow_N, | |
111 ALLOWED_INPUT_CASE allow_lowercase); | |
112 | |
113 // If the sequence identifier is collapsed (= "N-N") returns the reads_count, | |
114 // otherwise, returns 1 | |
115 int get_reads_count(const FASTX *pFASTX); | |
116 | |
117 void fastx_init_writer(FASTX *pFASTX, | |
118 const char* filename, | |
119 OUTPUT_FILE_TYPE output_type, | |
120 int compress_output); | |
121 | |
122 int fastx_read_next_record(FASTX *pFASTX); | |
123 | |
124 void fastx_write_record(FASTX *pFASTX); | |
125 | |
126 size_t num_input_sequences(const FASTX *pFASTX); | |
127 size_t num_input_reads(const FASTX *pFASTX); | |
128 size_t num_output_sequences(const FASTX *pFASTX); | |
129 size_t num_output_reads(const FASTX *pFASTX); | |
130 | |
131 #ifdef __cplusplus | |
132 } | |
133 #endif | |
134 | |
135 #endif | |
136 |