Mercurial > repos > portiahollyoak > fastuniq
comparison source/fastq.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
| author | portiahollyoak |
|---|---|
| date | Thu, 02 Jun 2016 11:34:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:816cb55b5a2d |
|---|---|
| 1 /**************************************************************************** | |
| 2 * The 'FASTQ_ALL' structure group was used to store nucleotide sequence in | |
| 3 * fastq format, including basic operation function as well. | |
| 4 * | |
| 5 * This file was written by Haibin Xu, December 2011. | |
| 6 ****************************************************************************/ | |
| 7 | |
| 8 #include "fastq.h" | |
| 9 | |
| 10 FASTQ_ALL *fastq_create() | |
| 11 { | |
| 12 /* create a FASTQ_ALL sequence. If successful, return the point to it, | |
| 13 * otherwise, return NULL/. | |
| 14 */ | |
| 15 FASTQ_ALL *fq; | |
| 16 | |
| 17 if((fq=(FASTQ_ALL *)malloc(sizeof(FASTQ_ALL)))==NULL) | |
| 18 return NULL; | |
| 19 | |
| 20 fq->description_1=NULL; | |
| 21 fq->sequence=NULL; | |
| 22 fq->description_2=NULL; | |
| 23 fq->quality=NULL; | |
| 24 | |
| 25 return fq; | |
| 26 } | |
| 27 | |
| 28 int fastq_remove(FASTQ_ALL *fq) | |
| 29 { | |
| 30 /* free the FASTQ sequence. If successful, return 0, otherwise return 1. | |
| 31 */ | |
| 32 if(fq==NULL) | |
| 33 return 1; | |
| 34 | |
| 35 if(fq->description_1!=NULL) | |
| 36 free(fq->description_1); | |
| 37 if(fq->sequence!=NULL) | |
| 38 free(fq->sequence); | |
| 39 if(fq->description_2!=NULL) | |
| 40 free(fq->description_2); | |
| 41 if(fq->quality!=NULL) | |
| 42 free(fq->quality); | |
| 43 | |
| 44 free(fq); | |
| 45 | |
| 46 return 0; | |
| 47 } | |
| 48 | |
| 49 int fastq_clear(FASTQ_ALL *fq) | |
| 50 { | |
| 51 /* clear the FASTQ sequence. If successful, return 0, otherwise return 1. | |
| 52 */ | |
| 53 if(fq==NULL) | |
| 54 return 1; | |
| 55 | |
| 56 if(fq->description_1!=NULL) | |
| 57 { | |
| 58 free(fq->description_1); | |
| 59 fq->description_1=NULL; | |
| 60 } | |
| 61 if(fq->sequence!=NULL) | |
| 62 { | |
| 63 free(fq->sequence); | |
| 64 fq->sequence=NULL; | |
| 65 } | |
| 66 if(fq->description_2!=NULL) | |
| 67 { | |
| 68 free(fq->description_2); | |
| 69 fq->description_2=NULL; | |
| 70 } | |
| 71 if(fq->quality!=NULL) | |
| 72 { | |
| 73 free(fq->quality); | |
| 74 fq->quality=NULL; | |
| 75 } | |
| 76 | |
| 77 return 0; | |
| 78 } | |
| 79 | |
| 80 long fastq_get_serial(FASTQ_ALL *fq) | |
| 81 { | |
| 82 /* get sequence serial from FASTQ description in format '@serial_number'. | |
| 83 * If successful return the serial, otherwise return -1. | |
| 84 */ | |
| 85 long serial; | |
| 86 | |
| 87 if(fq==NULL || fq->description_1==NULL || fq->description_1[0]=='\0') | |
| 88 return -1; | |
| 89 | |
| 90 if((sscanf(fq->description_1, "@%ld", &serial))!=1) | |
| 91 return -1; | |
| 92 | |
| 93 return serial; | |
| 94 } | |
| 95 | |
| 96 int fastq_scanf(FASTQ_ALL *fq, FILE *fp_in, | |
| 97 int whether_append_description, int whether_append_quality) | |
| 98 { | |
| 99 /* read a FASTQ sequence from input file, including description (whether_append_description=1) | |
| 100 * or not (whether_append_description=0), including quality (whether_append_quality=1) or not | |
| 101 * (whether_append_quality=0). If successful, return 0, otherwise, clear fq | |
| 102 * and return 1. | |
| 103 */ | |
| 104 char description_1[FASTQ_DESCRIPTION_MAX_LENGTH], sequence[FASTQ_SEQUENCE_MAX_LENGTH]; | |
| 105 char description_2[FASTQ_DESCRIPTION_MAX_LENGTH], quality[FASTQ_SEQUENCE_MAX_LENGTH]; | |
| 106 | |
| 107 char *p_description_1, *p_sequence, *p_description_2, *p_quality; | |
| 108 | |
| 109 if(fp_in==NULL || fq==NULL) | |
| 110 return 1; | |
| 111 | |
| 112 fastq_clear(fq); | |
| 113 | |
| 114 /* read the FASTQ sequence */ | |
| 115 fgets(description_1, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); | |
| 116 fgets(sequence, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); | |
| 117 fgets(description_2, FASTQ_DESCRIPTION_MAX_LENGTH, fp_in); | |
| 118 fgets(quality, FASTQ_SEQUENCE_MAX_LENGTH, fp_in); | |
| 119 | |
| 120 /* check whether integrity of the FASTQ sequence */ | |
| 121 if(description_1[0]=='\0' || sequence[0]=='\0' || description_2[0]=='\0' || | |
| 122 quality[0]=='\0' || description_1[0]!='@'|| description_2[0]!='+' || | |
| 123 description_1[strlen(description_1)-1]!='\n' || | |
| 124 sequence[strlen(sequence)-1]!='\n' || | |
| 125 description_2[strlen(description_2)-1]!='\n') | |
| 126 return 1; | |
| 127 | |
| 128 /* remove return character at the end */ | |
| 129 if(description_1[strlen(description_1)-1]=='\n') | |
| 130 description_1[strlen(description_1)-1]='\0'; | |
| 131 if(sequence[strlen(sequence)-1]=='\n') | |
| 132 sequence[strlen(sequence)-1]='\0'; | |
| 133 if(description_2[strlen(description_2)-1]=='\n') | |
| 134 description_2[strlen(description_2)-1]='\0'; | |
| 135 if(quality[strlen(quality)-1]=='\n') | |
| 136 quality[strlen(quality)-1]='\0'; | |
| 137 | |
| 138 /* append the sequence information to fq */ | |
| 139 if((p_sequence=(char *)malloc(strlen(sequence)+1))==NULL) | |
| 140 return 1; | |
| 141 strcpy(p_sequence, sequence); | |
| 142 fq->sequence=p_sequence; | |
| 143 | |
| 144 if(whether_append_quality==1) | |
| 145 { | |
| 146 if((p_quality=(char *)malloc(strlen(quality)+1))==NULL) | |
| 147 { | |
| 148 fastq_clear(fq); | |
| 149 return 1; | |
| 150 } | |
| 151 strcpy(p_quality, quality); | |
| 152 fq->quality=p_quality; | |
| 153 } | |
| 154 if(whether_append_description==1) | |
| 155 { | |
| 156 if((p_description_1=(char *)malloc(strlen(description_1)+1))==NULL) | |
| 157 { | |
| 158 fastq_clear(fq); | |
| 159 return 1; | |
| 160 } | |
| 161 strcpy(p_description_1, description_1); | |
| 162 fq->description_1=p_description_1; | |
| 163 | |
| 164 if((p_description_2=(char *)malloc(strlen(description_2)+1))==NULL) | |
| 165 { | |
| 166 fastq_clear(fq); | |
| 167 return 1; | |
| 168 } | |
| 169 strcpy(p_description_2, description_2); | |
| 170 fq->description_2=p_description_2; | |
| 171 } | |
| 172 | |
| 173 return 0; | |
| 174 } | |
| 175 | |
| 176 int fastq_printf(FASTQ_ALL *fq, FILE *fp_out, char *format, long serial) | |
| 177 { | |
| 178 /* write sequence into output file in FASTQ format(format='fq') or FASTA format(format='fa') | |
| 179 * using the original description (serial=-1) or the new serial. | |
| 180 * If successful, return 0, otherwise return 1. | |
| 181 */ | |
| 182 if(fp_out==NULL || fq==NULL) | |
| 183 return 1; | |
| 184 | |
| 185 if(strcmp(format, "fq")==0) /* output in FASTQ format */ | |
| 186 { | |
| 187 if(serial==-1) | |
| 188 { | |
| 189 if(fq->description_1!=NULL) | |
| 190 { | |
| 191 fputs(fq->description_1, fp_out); | |
| 192 fputc('\n', fp_out); | |
| 193 fputs(fq->sequence, fp_out); | |
| 194 fputc('\n', fp_out); | |
| 195 fputs(fq->description_2, fp_out); | |
| 196 fputc('\n', fp_out); | |
| 197 fputs(fq->quality, fp_out); | |
| 198 fputc('\n', fp_out); | |
| 199 } | |
| 200 else | |
| 201 { | |
| 202 fputc('@', fp_out); | |
| 203 fputc('\n', fp_out); | |
| 204 fputs(fq->sequence, fp_out); | |
| 205 fputc('\n', fp_out); | |
| 206 fputc('+', fp_out); | |
| 207 fputc('\n', fp_out); | |
| 208 fputs(fq->quality, fp_out); | |
| 209 fputc('\n', fp_out); | |
| 210 } | |
| 211 } | |
| 212 else | |
| 213 { | |
| 214 fprintf(fp_out, "@%ld\n", serial); | |
| 215 fputs(fq->sequence, fp_out); | |
| 216 fputc('\n', fp_out); | |
| 217 fprintf(fp_out, "+%ld\n", serial); | |
| 218 fputs(fq->quality, fp_out); | |
| 219 fputc('\n', fp_out); | |
| 220 } | |
| 221 } | |
| 222 else if(strcmp(format, "fa")==0) /* output in FASTQ format */ | |
| 223 { | |
| 224 if(serial==-1) | |
| 225 { | |
| 226 if(fq->description_1!=NULL) | |
| 227 { | |
| 228 fputc('>', fp_out); | |
| 229 fputs(&(fq->description_1[1]), fp_out); | |
| 230 fputc('\n', fp_out); | |
| 231 fputs(fq->sequence, fp_out); | |
| 232 fputc('\n', fp_out); | |
| 233 } | |
| 234 else | |
| 235 { | |
| 236 fputc('>', fp_out); | |
| 237 fputc('\n', fp_out); | |
| 238 fputs(fq->sequence, fp_out); | |
| 239 fputc('\n', fp_out); | |
| 240 } | |
| 241 } | |
| 242 else | |
| 243 { | |
| 244 fprintf(fp_out, ">%ld\n", serial); | |
| 245 fputs(fq->sequence, fp_out); | |
| 246 fputc('\n', fp_out); | |
| 247 } | |
| 248 } | |
| 249 else | |
| 250 return 1; | |
| 251 | |
| 252 return 0; | |
| 253 } | |
| 254 | |
| 255 long fastq_get_length(FASTQ_ALL *fq) | |
| 256 { | |
| 257 /* return the length of FASTQ sequence, is any error, return -1 | |
| 258 */ | |
| 259 | |
| 260 if(fq==NULL) | |
| 261 return -1; | |
| 262 if(fq->sequence==NULL) | |
| 263 return 0; | |
| 264 return strlen(fq->sequence); | |
| 265 } | |
| 266 | |
| 267 | |
| 268 | |
| 269 | |
| 270 | |
| 271 | |
| 272 | |
| 273 |
