Mercurial > repos > bitlab > imsame
comparison IMSAME/src/reverseComplement.c @ 0:762009a91895 draft
Uploaded
author | bitlab |
---|---|
date | Sat, 15 Dec 2018 18:04:10 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:762009a91895 |
---|---|
1 /* reverseComplement.c | |
2 Program reading a FASTA file as input (one or multiple sequence) | |
3 and then returning the reverse complementary sequence in the output file. | |
4 It is important to note that for multiple sequence file the first input | |
5 sequence is the last one at the output file and the last one in the input | |
6 is the first one in the output. | |
7 oscart@uma.es | |
8 */ | |
9 | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <string.h> | |
13 #include <ctype.h> | |
14 #include <inttypes.h> | |
15 #include "commonFunctions.h" | |
16 | |
17 #define SEQSIZE 2000000000 | |
18 #define READINPUT 10000 | |
19 #define WSIZE 32 | |
20 #define NREADS 1000000 | |
21 | |
22 int main(int ac, char** av) { | |
23 FILE *fIn, *fOut; | |
24 int64_t i, j, nR, seqLen = 0; | |
25 char *seq, c, toW; | |
26 long *offset = NULL; | |
27 | |
28 if (ac != 3) | |
29 terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT"); | |
30 | |
31 /** | |
32 * Allocating memory for the sequence | |
33 * Fixed amount of memory, change it to loadSeqDB? | |
34 */ | |
35 if ((seq = (char*) malloc(SEQSIZE)) == NULL) | |
36 terror("memory for Seq"); | |
37 | |
38 if ((fIn = fopen(av[1], "rt")) == NULL) | |
39 terror("opening IN sequence FASTA file"); | |
40 | |
41 if ((fOut = fopen(av[2], "wt")) == NULL) | |
42 terror("opening OUT sequence Words file"); | |
43 | |
44 if ((offset = (long *) malloc(sizeof(long)*NREADS)) == NULL) | |
45 terror("memory for offsets"); | |
46 | |
47 for(i=0;i<NREADS;i++){ | |
48 offset[i]=0; | |
49 } | |
50 | |
51 nR = 0; | |
52 c = fgetc(fIn); | |
53 while(!feof(fIn)){ | |
54 if(c == '>'){ | |
55 offset[nR++] = ftell(fIn)-1; | |
56 } | |
57 c = fgetc(fIn); | |
58 } | |
59 | |
60 for(i=nR-1; i>=0; i--){ | |
61 fseek(fIn, offset[i], SEEK_SET); | |
62 //READ and write header | |
63 fgets(seq, READINPUT, fIn); | |
64 | |
65 fprintf(fOut, "%s", seq); | |
66 //READ and write sequence | |
67 c = fgetc(fIn); | |
68 while(c != '>' && !feof(fIn)){ | |
69 if(isupper(c) || islower(c)){ | |
70 seq[seqLen++]=c; | |
71 } | |
72 c = fgetc(fIn); | |
73 } | |
74 for(j=seqLen-1; j >= 0; j--){ | |
75 switch(seq[j]){ | |
76 case 'A': | |
77 toW = 'T'; | |
78 break; | |
79 case 'C': | |
80 toW = 'G'; | |
81 break; | |
82 case 'G': | |
83 toW = 'C'; | |
84 break; | |
85 case 'T': | |
86 toW = 'A'; | |
87 break; | |
88 case 'U': | |
89 toW = 'A'; | |
90 break; | |
91 case 'a': | |
92 toW = 't'; | |
93 break; | |
94 case 'c': | |
95 toW = 'g'; | |
96 break; | |
97 case 'g': | |
98 toW = 'c'; | |
99 break; | |
100 case 't': | |
101 toW = 'a'; | |
102 break; | |
103 case 'u': | |
104 toW = 'a'; | |
105 break; | |
106 default: | |
107 toW = seq[j]; | |
108 break; | |
109 } | |
110 fwrite(&toW, sizeof(char), 1, fOut); | |
111 } | |
112 toW='\n'; | |
113 fwrite(&toW, sizeof(char), 1, fOut); | |
114 seqLen=0; | |
115 } | |
116 | |
117 fclose(fIn); | |
118 fclose(fOut); | |
119 | |
120 return 0; | |
121 } | |
122 | |
123 |