Mercurial > repos > alvarofaure > bitlab
comparison chromeister/src/reverseComplement.c @ 0:7fdf47a0bae8 draft
Uploaded
author | alvarofaure |
---|---|
date | Wed, 12 Dec 2018 07:18:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7fdf47a0bae8 |
---|---|
1 /* reverseComplement.c | |
2 Program reading a FASTA file as input (one or multiple sequence) | |
3 and then returning the reverse complementary sequence in the output file. | |
4 It is important to note that for multiple sequence file the first input | |
5 sequence is the last one at the output file and the last one in the input | |
6 is the first one in the output. | |
7 oscart@uma.es | |
8 */ | |
9 | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <string.h> | |
13 #include <ctype.h> | |
14 #include <inttypes.h> | |
15 #include "commonFunctions.h" | |
16 | |
17 #define SEQSIZE 2000000000 | |
18 #define READINPUT 10000 | |
19 #define WSIZE 32 | |
20 #define NREADS 1000000 | |
21 | |
22 int main(int ac, char** av) { | |
23 FILE *fIn, *fOut; | |
24 int64_t i, j, nR, seqLen = 0; | |
25 char *seq, c, toW; | |
26 long *offset = NULL; | |
27 | |
28 if (ac != 3) | |
29 terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT"); | |
30 | |
31 /** | |
32 * Allocating memory for the sequence | |
33 * Fixed amount of memory, change it to loadSeqDB? | |
34 */ | |
35 if ((seq = (char*) malloc(SEQSIZE)) == NULL) | |
36 terror("memory for Seq"); | |
37 | |
38 if ((fIn = fopen(av[1], "rt")) == NULL) | |
39 terror("opening IN sequence FASTA file"); | |
40 | |
41 if ((fOut = fopen(av[2], "wt")) == NULL) | |
42 terror("opening OUT sequence Words file"); | |
43 | |
44 if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL) | |
45 terror("memory for offsets"); | |
46 | |
47 for(i=0;i<NREADS;i++){ | |
48 offset[i]=0; | |
49 } | |
50 | |
51 nR = 0; | |
52 c = fgetc(fIn); | |
53 while(!feof(fIn)){ | |
54 if(c == '>'){ | |
55 offset[nR++] = ftell(fIn)-1; | |
56 } | |
57 c = fgetc(fIn); | |
58 } | |
59 | |
60 for(i=nR-1; i>=0; i--){ | |
61 fseek(fIn, offset[i], SEEK_SET); | |
62 //READ and write header | |
63 if(fgets(seq, READINPUT, fIn)==NULL){ | |
64 terror("Empty file"); | |
65 } | |
66 fprintf(fOut, "%s", seq); | |
67 //READ and write sequence | |
68 c = fgetc(fIn); | |
69 while(c != '>' && !feof(fIn)){ | |
70 if(isupper(c) || islower(c)){ | |
71 seq[seqLen++]=c; | |
72 } | |
73 c = fgetc(fIn); | |
74 } | |
75 for(j=seqLen-1; j >= 0; j--){ | |
76 switch(seq[j]){ | |
77 case 'A': | |
78 toW = 'T'; | |
79 break; | |
80 case 'C': | |
81 toW = 'G'; | |
82 break; | |
83 case 'G': | |
84 toW = 'C'; | |
85 break; | |
86 case 'T': | |
87 toW = 'A'; | |
88 break; | |
89 case 'U': | |
90 toW = 'A'; | |
91 break; | |
92 case 'a': | |
93 toW = 't'; | |
94 break; | |
95 case 'c': | |
96 toW = 'g'; | |
97 break; | |
98 case 'g': | |
99 toW = 'c'; | |
100 break; | |
101 case 't': | |
102 toW = 'a'; | |
103 break; | |
104 case 'u': | |
105 toW = 'a'; | |
106 break; | |
107 default: | |
108 toW = seq[j]; | |
109 break; | |
110 } | |
111 fwrite(&toW, sizeof(char), 1, fOut); | |
112 } | |
113 toW='\n'; | |
114 fwrite(&toW, sizeof(char), 1, fOut); | |
115 seqLen=0; | |
116 } | |
117 | |
118 fclose(fIn); | |
119 fclose(fOut); | |
120 | |
121 return 0; | |
122 } | |
123 | |
124 |