Mercurial > repos > xilinxu > xilinxu
view fastx_toolkit-0.0.6/src/fastq_to_fasta/fastq_to_fasta.c @ 3:997f5136985f draft default tip
Uploaded
author | xilinxu |
---|---|
date | Thu, 14 Aug 2014 04:52:17 -0400 |
parents | |
children |
line wrap: on
line source
/* FASTX-toolkit - FASTA/FASTQ preprocessing tools. Copyright (C) 2009 A. Gordon (gordon@cshl.edu) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <getopt.h> #include <errno.h> #include <err.h> #include <config.h> #include "fastx.h" #include "fastx_args.h" const char* usage= "usage: fastq_to_fasta [-h] [-r] [-n] [-v] [-z] [-i INFILE] [-o OUTFILE]\n" \ "\n" \ "version " VERSION "\n" \ " [-h] = This helpful help screen.\n" \ " [-r] = Rename sequence identifiers to numbers.\n" \ " [-n] = keep sequences with unknown (N) nucleotides.\n" \ " Default is to discard such sequences.\n" \ " [-v] = Verbose - report number of sequences.\n" \ " If [-o] is specified, report will be printed to STDOUT.\n" \ " If [-o] is not specified (and output goes to STDOUT),\n" \ " report will be printed to STDERR.\n" \ " [-z] = Compress output with GZIP.\n" \ " [-i INFILE] = FASTA/Q input file. default is STDIN.\n" \ " [-o OUTFILE] = FASTA output file. default is STDOUT.\n" \ "\n"; FASTX fastx; int flag_rename_seqid = 0; int flag_discard_N = 1 ; int parse_program_args(int __attribute__((unused)) optind, int optc, char __attribute__((unused)) *optarg) { switch(optc) { case 'n': flag_discard_N = 0 ; break; case 'r': flag_rename_seqid = 1; break; default: errx(1, __FILE__ ":%d: Unknown argument (%c)", __LINE__, optc ) ; } return 1; } int main(int argc, char* argv[]) { fastx_parse_cmdline(argc, argv, "rn", parse_program_args); fastx_init_reader(&fastx, get_input_filename(), FASTQ_ONLY, ALLOW_N, REQUIRE_UPPERCASE); fastx_init_writer(&fastx, get_output_filename(), OUTPUT_FASTA, compress_output_flag()); while ( fastx_read_next_record(&fastx) ) { //See if the input sequence contained 'N' nucleotides if ( flag_discard_N && (strchr(fastx.nucleotides,'N') != NULL)) continue; if ( flag_rename_seqid ) snprintf(fastx.name, sizeof(fastx.name), "%zu", num_output_reads(&fastx)+1) ; fastx_write_record(&fastx); } //Print verbose report if ( verbose_flag() ) { fprintf(get_report_file(), "Input: %zu reads.\n", num_input_reads(&fastx) ) ; fprintf(get_report_file(), "Output: %zu reads.\n", num_output_reads(&fastx) ) ; if ( flag_discard_N ) { size_t discarded = num_input_reads(&fastx) - num_output_reads(&fastx) ; fprintf(get_report_file(), "discarded %zu (%zu%%) low-quality reads.\n", discarded, (discarded*100)/( num_input_reads(&fastx) ) ) ; } } return 0; }