annotate fastx_toolkit-0.0.6/src/libfastx/fastx.h @ 3:997f5136985f draft default tip

Uploaded
author xilinxu
date Thu, 14 Aug 2014 04:52:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
1 /*
997f5136985f Uploaded
xilinxu
parents:
diff changeset
2 FASTX-toolkit - FASTA/FASTQ preprocessing tools.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
3 Copyright (C) 2009 A. Gordon (gordon@cshl.edu)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
4
997f5136985f Uploaded
xilinxu
parents:
diff changeset
5 This program is free software: you can redistribute it and/or modify
997f5136985f Uploaded
xilinxu
parents:
diff changeset
6 it under the terms of the GNU Affero General Public License as
997f5136985f Uploaded
xilinxu
parents:
diff changeset
7 published by the Free Software Foundation, either version 3 of the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
8 License, or (at your option) any later version.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
9
997f5136985f Uploaded
xilinxu
parents:
diff changeset
10 This program is distributed in the hope that it will be useful,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
997f5136985f Uploaded
xilinxu
parents:
diff changeset
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
997f5136985f Uploaded
xilinxu
parents:
diff changeset
13 GNU Affero General Public License for more details.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
14
997f5136985f Uploaded
xilinxu
parents:
diff changeset
15 You should have received a copy of the GNU Affero General Public License
997f5136985f Uploaded
xilinxu
parents:
diff changeset
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
17 */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
18 #ifndef __FASTX_HEADER__
997f5136985f Uploaded
xilinxu
parents:
diff changeset
19 #define __FASTX_HEADER__
997f5136985f Uploaded
xilinxu
parents:
diff changeset
20
997f5136985f Uploaded
xilinxu
parents:
diff changeset
21 #ifdef __cplusplus
997f5136985f Uploaded
xilinxu
parents:
diff changeset
22 extern "C" {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
23 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
24
997f5136985f Uploaded
xilinxu
parents:
diff changeset
25 #ifndef PATH_MAX
997f5136985f Uploaded
xilinxu
parents:
diff changeset
26 #include <linux/limits.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
27 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
28
997f5136985f Uploaded
xilinxu
parents:
diff changeset
29 #define MIN_QUALITY_VALUE (-50)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
30 #define MAX_QUALITY_VALUE 50
997f5136985f Uploaded
xilinxu
parents:
diff changeset
31 #define QUALITY_VALUES_RANGE (MAX_QUALITY_VALUE-MIN_QUALITY_VALUE)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
32
997f5136985f Uploaded
xilinxu
parents:
diff changeset
33
997f5136985f Uploaded
xilinxu
parents:
diff changeset
34 #ifndef MAX_SEQ_LINE_LENGTH
997f5136985f Uploaded
xilinxu
parents:
diff changeset
35 #define MAX_SEQ_LINE_LENGTH (25000)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
36 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
37
997f5136985f Uploaded
xilinxu
parents:
diff changeset
38 typedef enum {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
39 FASTA_ONLY=0,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
40 FASTA_OR_FASTQ=1,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
41 FASTQ_ONLY=2
997f5136985f Uploaded
xilinxu
parents:
diff changeset
42 } ALLOWED_INPUT_FILE_TYPES;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
43
997f5136985f Uploaded
xilinxu
parents:
diff changeset
44 typedef enum {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
45 DISALLOW_N=0,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
46 ALLOW_N=1
997f5136985f Uploaded
xilinxu
parents:
diff changeset
47 } ALLOWED_INPUT_UNKNOWN_BASES;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
48
997f5136985f Uploaded
xilinxu
parents:
diff changeset
49 typedef enum {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
50 REQUIRE_UPPERCASE=0,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
51 ALLOW_LOWERCASE=1
997f5136985f Uploaded
xilinxu
parents:
diff changeset
52 } ALLOWED_INPUT_CASE;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
53
997f5136985f Uploaded
xilinxu
parents:
diff changeset
54 typedef enum {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
55 OUTPUT_FASTA=0,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
56 OUTPUT_FASTQ_ASCII_QUAL=1,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
57 OUTPUT_FASTQ_NUMERIC_QUAL=2,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
58 OUTPUT_SAME_AS_INPUT=3
997f5136985f Uploaded
xilinxu
parents:
diff changeset
59 } OUTPUT_FILE_TYPE;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
60
997f5136985f Uploaded
xilinxu
parents:
diff changeset
61 #pragma pack(1)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
62 typedef struct
997f5136985f Uploaded
xilinxu
parents:
diff changeset
63 {
997f5136985f Uploaded
xilinxu
parents:
diff changeset
64 /* Record data - common for FASTA/FASTQ */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
65 char input_sequence_id_prefix[1]; //DON'T touch this - this hack will read the entire name into the variable 'name',
997f5136985f Uploaded
xilinxu
parents:
diff changeset
66 //leaving the prefix ('>' or '@') in 'input_sequence_id_name'.
997f5136985f Uploaded
xilinxu
parents:
diff changeset
67 char name[MAX_SEQ_LINE_LENGTH+1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
68 char nucleotides[MAX_SEQ_LINE_LENGTH+1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
69 /* Record data - only for FASTQ */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
70 char input_name2_prefix[1]; //same hack as 'input_sequence_id_prefix'
997f5136985f Uploaded
xilinxu
parents:
diff changeset
71 char name2[MAX_SEQ_LINE_LENGTH+1];
997f5136985f Uploaded
xilinxu
parents:
diff changeset
72 int quality[MAX_SEQ_LINE_LENGTH+1]; //note: this is NOT ascii values, but numerical values
997f5136985f Uploaded
xilinxu
parents:
diff changeset
73 // numeric quality scores and ASCII quality scores
997f5136985f Uploaded
xilinxu
parents:
diff changeset
74 // are automatically converted to numbers (-15 to 40)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
75
997f5136985f Uploaded
xilinxu
parents:
diff changeset
76 /* Configuration */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
77 int allow_input_filetype; // 0 = Allow only FASTA
997f5136985f Uploaded
xilinxu
parents:
diff changeset
78 int allow_N; // 1 = N is valid nucleotide, 0 = only A/G/C/T are valid
997f5136985f Uploaded
xilinxu
parents:
diff changeset
79 int allow_lowercase;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
80 int read_fastq; // 1 = Input is FASTQ (only if allow_input_fastq==1)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
81 int read_fastq_ascii; // 1 = Input is FASTQ with ASCII quality scores (0 = with numeric quality scores)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
82 int write_fastq; // 0 = Write only FASTA (regardless of input type)
997f5136985f Uploaded
xilinxu
parents:
diff changeset
83 int write_fastq_ascii; // 1 = Write ASCII quality scores, 0 = write numeric quality scores
997f5136985f Uploaded
xilinxu
parents:
diff changeset
84 int compress_output; // 1 = pass output through GZIP
997f5136985f Uploaded
xilinxu
parents:
diff changeset
85
997f5136985f Uploaded
xilinxu
parents:
diff changeset
86 int copy_input_fastq_format_to_output ; // 1 = copy 'read_fastq_ascii' to 'write_fastq_ascii'
997f5136985f Uploaded
xilinxu
parents:
diff changeset
87 // so that the output format is the same as the input
997f5136985f Uploaded
xilinxu
parents:
diff changeset
88
997f5136985f Uploaded
xilinxu
parents:
diff changeset
89
997f5136985f Uploaded
xilinxu
parents:
diff changeset
90 /* Internal data */
997f5136985f Uploaded
xilinxu
parents:
diff changeset
91 int allowed_nucleotides[256]; //quick lookup table for valid input
997f5136985f Uploaded
xilinxu
parents:
diff changeset
92 char output_sequence_id_prefix; // '>' or '@', depending on the requested output type
997f5136985f Uploaded
xilinxu
parents:
diff changeset
93
997f5136985f Uploaded
xilinxu
parents:
diff changeset
94 char input_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
95 unsigned long long input_line_number;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
96 char output_file_name[PATH_MAX]; //in linux, PATH_MAX is defined in <linux/limits.h>
997f5136985f Uploaded
xilinxu
parents:
diff changeset
97
997f5136985f Uploaded
xilinxu
parents:
diff changeset
98 size_t num_input_sequences;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
99 size_t num_output_sequences;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
100 size_t num_input_reads;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
101 size_t num_output_reads;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
102
997f5136985f Uploaded
xilinxu
parents:
diff changeset
103 FILE* input;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
104 FILE* output;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
105 } FASTX ;
997f5136985f Uploaded
xilinxu
parents:
diff changeset
106
997f5136985f Uploaded
xilinxu
parents:
diff changeset
107
997f5136985f Uploaded
xilinxu
parents:
diff changeset
108 void fastx_init_reader(FASTX *pFASTX, const char* filename,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
109 ALLOWED_INPUT_FILE_TYPES allowed_input_filetype,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
110 ALLOWED_INPUT_UNKNOWN_BASES allow_N,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
111 ALLOWED_INPUT_CASE allow_lowercase);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
112
997f5136985f Uploaded
xilinxu
parents:
diff changeset
113 // If the sequence identifier is collapsed (= "N-N") returns the reads_count,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
114 // otherwise, returns 1
997f5136985f Uploaded
xilinxu
parents:
diff changeset
115 int get_reads_count(const FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
116
997f5136985f Uploaded
xilinxu
parents:
diff changeset
117 void fastx_init_writer(FASTX *pFASTX,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
118 const char* filename,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
119 OUTPUT_FILE_TYPE output_type,
997f5136985f Uploaded
xilinxu
parents:
diff changeset
120 int compress_output);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
121
997f5136985f Uploaded
xilinxu
parents:
diff changeset
122 int fastx_read_next_record(FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
123
997f5136985f Uploaded
xilinxu
parents:
diff changeset
124 void fastx_write_record(FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
125
997f5136985f Uploaded
xilinxu
parents:
diff changeset
126 size_t num_input_sequences(const FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
127 size_t num_input_reads(const FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
128 size_t num_output_sequences(const FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
129 size_t num_output_reads(const FASTX *pFASTX);
997f5136985f Uploaded
xilinxu
parents:
diff changeset
130
997f5136985f Uploaded
xilinxu
parents:
diff changeset
131 #ifdef __cplusplus
997f5136985f Uploaded
xilinxu
parents:
diff changeset
132 }
997f5136985f Uploaded
xilinxu
parents:
diff changeset
133 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
134
997f5136985f Uploaded
xilinxu
parents:
diff changeset
135 #endif
997f5136985f Uploaded
xilinxu
parents:
diff changeset
136