Mercurial > repos > portiahollyoak > fastuniq
comparison source/fastq_pair.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
| author | portiahollyoak |
|---|---|
| date | Thu, 02 Jun 2016 11:34:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:816cb55b5a2d |
|---|---|
| 1 /**************************************************************************** | |
| 2 * The 'FASTQ_PAIR' structure group was used to store paired reads and | |
| 3 * qualities, including basic operation function as well. | |
| 4 * | |
| 5 * This file was written by Haibin Xu, December 2011. | |
| 6 ****************************************************************************/ | |
| 7 | |
| 8 #include "fastq_pair.h" | |
| 9 | |
| 10 FASTQ_PAIR *fastq_pair_create() | |
| 11 { | |
| 12 /* create a FASTQ pair. If successful, return the point to it, | |
| 13 * otherwise, return NULL. | |
| 14 */ | |
| 15 FASTQ_PAIR *fq_pair; | |
| 16 | |
| 17 if((fq_pair=(FASTQ_PAIR *)malloc(sizeof(FASTQ_PAIR)))==NULL) | |
| 18 return NULL; | |
| 19 | |
| 20 fq_pair->seq_left=NULL; | |
| 21 fq_pair->seq_right=NULL; | |
| 22 | |
| 23 return fq_pair; | |
| 24 } | |
| 25 | |
| 26 int fastq_pair_remove(FASTQ_PAIR *fq_pair) | |
| 27 { | |
| 28 /* free the FASTQ pair. If successful, return 0, otherwise return 1. | |
| 29 */ | |
| 30 if(fq_pair==NULL) | |
| 31 return 1; | |
| 32 | |
| 33 fastq_pair_clear(fq_pair); | |
| 34 free(fq_pair); | |
| 35 | |
| 36 return 0; | |
| 37 } | |
| 38 | |
| 39 int fastq_pair_clear(FASTQ_PAIR *fq_pair) | |
| 40 { | |
| 41 /* clear the FASTQ pair. If successful, return 0, otherwise return 1. | |
| 42 */ | |
| 43 if(fq_pair==NULL) | |
| 44 return 1; | |
| 45 | |
| 46 if(fq_pair->seq_left!=NULL) | |
| 47 { | |
| 48 fastq_remove(fq_pair->seq_left); | |
| 49 fq_pair->seq_left=NULL; | |
| 50 } | |
| 51 if(fq_pair->seq_right!=NULL) | |
| 52 { | |
| 53 fastq_remove(fq_pair->seq_right); | |
| 54 fq_pair->seq_right=NULL; | |
| 55 } | |
| 56 return 0; | |
| 57 } | |
| 58 | |
| 59 int fastq_pair_scanf(FASTQ_PAIR *fq_pair, FILE *fp_left_in, FILE *fp_right_in, | |
| 60 int whether_append_description, int whether_append_quality) | |
| 61 { | |
| 62 /* load the left and right reads and qualities for FASTQ pair from file, including description | |
| 63 * (whether_append_description=1) or not (whether_append_description=0), including quality | |
| 64 * (whether_append_quality=1) or not (whether_append_quality=0). If successful, return 0, | |
| 65 * otherwise, clear FASTQ pair and return 1. | |
| 66 */ | |
| 67 FASTQ_ALL *fq_left, *fq_right; | |
| 68 | |
| 69 if(fq_pair==NULL || fp_left_in==NULL || fp_right_in==NULL) | |
| 70 return 1; | |
| 71 | |
| 72 /* clear the FASTQ_PAIR */ | |
| 73 fastq_pair_clear(fq_pair); | |
| 74 | |
| 75 /* create the FASTQ_ALL structure */ | |
| 76 if((fq_left=fastq_create())==NULL) | |
| 77 return 1; | |
| 78 if((fq_right=fastq_create())==NULL) | |
| 79 { | |
| 80 fastq_remove(fq_left); | |
| 81 return 1; | |
| 82 } | |
| 83 | |
| 84 if(fastq_scanf(fq_left, fp_left_in, whether_append_description, whether_append_quality)!=0 || | |
| 85 fastq_scanf(fq_right, fp_right_in, whether_append_description, whether_append_quality)!=0) | |
| 86 { | |
| 87 fastq_remove(fq_left); | |
| 88 fastq_remove(fq_right); | |
| 89 return 1; | |
| 90 } | |
| 91 | |
| 92 fq_pair->seq_left=fq_left; | |
| 93 fq_pair->seq_right=fq_right; | |
| 94 | |
| 95 return 0; | |
| 96 } | |
| 97 | |
| 98 int fastq_pair_printf(FASTQ_PAIR *fq_pair, FILE *fp_out1, FILE *fp_out2, | |
| 99 char *format, long serial) | |
| 100 { | |
| 101 /* write the pair-end reads in FASTA or FASTQ format into two output files(format='fa' or 'fq') | |
| 102 * or in FASTA format into a single output file(format="fa" and fp_out2==NULL) using the original | |
| 103 * description (serial=-1) or the new serial. If successful, return 0, otherwise, return 1. | |
| 104 */ | |
| 105 if(fq_pair==NULL || fp_out1==NULL) | |
| 106 return 1; | |
| 107 | |
| 108 if((strcmp(format, "fq")==0 && fp_out2!=NULL) || | |
| 109 (strcmp(format, "fa")==0 && fp_out2!=NULL)) | |
| 110 { | |
| 111 fastq_printf(fq_pair->seq_left, fp_out1, format, serial); | |
| 112 fastq_printf(fq_pair->seq_right, fp_out2, format, serial); | |
| 113 } | |
| 114 else if(strcmp(format, "fa")==0 && fp_out2==NULL) | |
| 115 { | |
| 116 fastq_printf(fq_pair->seq_left, fp_out1, format, serial); | |
| 117 fastq_printf(fq_pair->seq_right, fp_out1, format, serial); | |
| 118 } | |
| 119 else | |
| 120 return 1; | |
| 121 | |
| 122 return 0; | |
| 123 } | |
| 124 | |
| 125 int fastq_pair_compare_tight(FASTQ_PAIR *fq_pair_a, FASTQ_PAIR *fq_pair_b) | |
| 126 { | |
| 127 /* compare the two FASTQ pairs tightly, if identical, return 0, else if a>b, | |
| 128 * return 1, else if a<b, return -1. | |
| 129 */ | |
| 130 char *a_left, *a_right, *b_left, *b_right; | |
| 131 int i, flag; | |
| 132 | |
| 133 /* check whether the sequence read exist */ | |
| 134 if(fq_pair_a==NULL || fq_pair_b==NULL || | |
| 135 fq_pair_a->seq_left==NULL || fq_pair_a->seq_left->sequence==NULL || | |
| 136 fq_pair_a->seq_right==NULL || fq_pair_a->seq_right->sequence==NULL || | |
| 137 fq_pair_b->seq_left==NULL || fq_pair_b->seq_left->sequence==NULL || | |
| 138 fq_pair_b->seq_right==NULL || fq_pair_b->seq_right->sequence==NULL) | |
| 139 return 1; | |
| 140 | |
| 141 /* obtain points to sequence */ | |
| 142 a_left=fq_pair_a->seq_left->sequence; | |
| 143 a_right=fq_pair_a->seq_right->sequence; | |
| 144 b_left=fq_pair_b->seq_left->sequence; | |
| 145 b_right=fq_pair_b->seq_right->sequence; | |
| 146 | |
| 147 flag=0; | |
| 148 for(i=0;;i++) | |
| 149 { | |
| 150 if(a_left[i]=='\0' && b_left[i]=='\0') | |
| 151 break; | |
| 152 if(a_left[i]==b_left[i]) | |
| 153 continue; | |
| 154 if(a_left[i]=='\0') | |
| 155 { | |
| 156 flag=-1; | |
| 157 break; | |
| 158 } | |
| 159 if(b_left[i]=='\0') | |
| 160 { | |
| 161 flag=1; | |
| 162 break; | |
| 163 } | |
| 164 | |
| 165 switch((int)(a_left[i]>b_left[i])) | |
| 166 { | |
| 167 case 1: | |
| 168 flag=1; | |
| 169 break; | |
| 170 case 0: | |
| 171 flag=-1; | |
| 172 break; | |
| 173 default: | |
| 174 break; | |
| 175 } | |
| 176 break; | |
| 177 } | |
| 178 | |
| 179 if(flag==0) | |
| 180 { | |
| 181 for(i=0;;i++) | |
| 182 { | |
| 183 if(a_right[i]=='\0' && b_right[i]=='\0') | |
| 184 break; | |
| 185 if(a_right[i]==b_right[i]) | |
| 186 continue; | |
| 187 if(a_right[i]=='\0') | |
| 188 { | |
| 189 flag=-1; | |
| 190 break; | |
| 191 } | |
| 192 if(b_right[i]=='\0') | |
| 193 { | |
| 194 flag=1; | |
| 195 break; | |
| 196 } | |
| 197 | |
| 198 switch((int)(a_right[i]>b_right[i])) | |
| 199 { | |
| 200 case 1: | |
| 201 flag=1; | |
| 202 break; | |
| 203 case 0: | |
| 204 flag=-1; | |
| 205 break; | |
| 206 default: | |
| 207 break; | |
| 208 } | |
| 209 break; | |
| 210 } | |
| 211 } | |
| 212 | |
| 213 return flag; | |
| 214 } | |
| 215 | |
| 216 int fastq_pair_compare_loose(FASTQ_PAIR *fq_pair_a, FASTQ_PAIR *fq_pair_b) | |
| 217 { | |
| 218 /* compare the two FASTQ pairs loosely, if identical, return 0, else if a>b, | |
| 219 * return 1, else if a<b, return -1. | |
| 220 */ | |
| 221 char *a_left, *a_right, *b_left, *b_right; | |
| 222 int i, flag; | |
| 223 | |
| 224 /* check whether the sequence read exist */ | |
| 225 if(fq_pair_a==NULL || fq_pair_b==NULL || | |
| 226 fq_pair_a->seq_left==NULL || fq_pair_a->seq_left->sequence==NULL || | |
| 227 fq_pair_a->seq_right==NULL || fq_pair_a->seq_right->sequence==NULL || | |
| 228 fq_pair_b->seq_left==NULL || fq_pair_b->seq_left->sequence==NULL || | |
| 229 fq_pair_b->seq_right==NULL || fq_pair_b->seq_right->sequence==NULL) | |
| 230 return 1; | |
| 231 | |
| 232 /* obtain points to sequence */ | |
| 233 a_left=fq_pair_a->seq_left->sequence; | |
| 234 a_right=fq_pair_a->seq_right->sequence; | |
| 235 b_left=fq_pair_b->seq_left->sequence; | |
| 236 b_right=fq_pair_b->seq_right->sequence; | |
| 237 | |
| 238 flag=0; | |
| 239 for(i=0;;i++) | |
| 240 { | |
| 241 if(a_left[i]=='\0' && b_left[i]=='\0') | |
| 242 break; | |
| 243 if(a_left[i]==b_left[i]) | |
| 244 continue; | |
| 245 if(a_left[i]=='\0' || b_left[i]=='\0') | |
| 246 break; | |
| 247 | |
| 248 switch((int)(a_left[i]>b_left[i])) | |
| 249 { | |
| 250 case 1: | |
| 251 flag=1; | |
| 252 break; | |
| 253 case 0: | |
| 254 flag=-1; | |
| 255 break; | |
| 256 default: | |
| 257 break; | |
| 258 } | |
| 259 break; | |
| 260 } | |
| 261 | |
| 262 if(flag==0) | |
| 263 { | |
| 264 for(i=0;;i++) | |
| 265 { | |
| 266 if(a_right[i]=='\0' && b_right[i]=='\0') | |
| 267 break; | |
| 268 if(a_right[i]==b_right[i]) | |
| 269 continue; | |
| 270 if(a_right[i]=='\0' || b_right[i]=='\0') | |
| 271 break; | |
| 272 | |
| 273 switch((int)(a_right[i]>b_right[i])) | |
| 274 { | |
| 275 case 1: | |
| 276 flag=1; | |
| 277 break; | |
| 278 case 0: | |
| 279 flag=-1; | |
| 280 break; | |
| 281 default: | |
| 282 break; | |
| 283 } | |
| 284 break; | |
| 285 } | |
| 286 } | |
| 287 | |
| 288 return flag; | |
| 289 } | |
| 290 | |
| 291 long fastq_pair_get_left_length(FASTQ_PAIR *fq_pair) | |
| 292 { | |
| 293 /* return the length of left FASTQ sequence in pair, if any error, return -1. | |
| 294 */ | |
| 295 if(fq_pair==NULL) | |
| 296 return -1; | |
| 297 return fastq_get_length(fq_pair->seq_left); | |
| 298 } | |
| 299 | |
| 300 long fastq_pair_get_right_length(FASTQ_PAIR *fq_pair) | |
| 301 { | |
| 302 /* return the length of right FASTQ sequence in pair, if any error, return -1. | |
| 303 */ | |
| 304 if(fq_pair==NULL) | |
| 305 return -1; | |
| 306 return fastq_get_length(fq_pair->seq_right); | |
| 307 } | |
| 308 | |
| 309 long fastq_pair_get_total_length(FASTQ_PAIR *fq_pair) | |
| 310 { | |
| 311 /* return the length of both left and right FASTQ sequence in pair, | |
| 312 * if any error, return -1. | |
| 313 */ | |
| 314 long left_length, right_length; | |
| 315 | |
| 316 if(fq_pair==NULL) | |
| 317 return -1; | |
| 318 left_length=fastq_pair_get_left_length(fq_pair); | |
| 319 right_length=fastq_pair_get_right_length(fq_pair); | |
| 320 | |
| 321 if(left_length==-1 || right_length==-1) | |
| 322 return -1; | |
| 323 | |
| 324 return left_length+right_length; | |
| 325 } |
