Mercurial > repos > portiahollyoak > fastuniq
comparison source/fastq_pair_array.c @ 0:816cb55b5a2d draft default tip
planemo upload for repository https://github.com/portiahollyoak/Tools commit c4769fd68ad9583d4b9dbdf212e4ecb5968cef1c-dirty
| author | portiahollyoak |
|---|---|
| date | Thu, 02 Jun 2016 11:34:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:816cb55b5a2d |
|---|---|
| 1 /**************************************************************************** | |
| 2 * The 'FASTQ_PAIR_ARRAY' structure group was used to store a array of | |
| 3 * paired FASTQ reads, including basic operation function as well. | |
| 4 * | |
| 5 * This file was written by Haibin Xu, December 2011. | |
| 6 ****************************************************************************/ | |
| 7 | |
| 8 #include "fastq_pair_array.h" | |
| 9 | |
| 10 FASTQ_PAIR_ARRAY *fastq_pair_array_create() | |
| 11 { | |
| 12 /* create a FASTQ pair array. If successful, return the point to it, | |
| 13 * otherwise, return NULL. | |
| 14 */ | |
| 15 FASTQ_PAIR_ARRAY *fq_pair_array; | |
| 16 | |
| 17 if((fq_pair_array=(FASTQ_PAIR_ARRAY *)malloc(sizeof(FASTQ_PAIR_ARRAY)))==NULL) | |
| 18 return NULL; | |
| 19 | |
| 20 if((fq_pair_array->array= | |
| 21 (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) | |
| 22 { | |
| 23 free(fq_pair_array); | |
| 24 return NULL; | |
| 25 } | |
| 26 | |
| 27 fq_pair_array->last=fq_pair_array->array; | |
| 28 fq_pair_array->block_num=1; | |
| 29 fq_pair_array->fastq_pair_num=0; | |
| 30 | |
| 31 fq_pair_array->array->previous=NULL; | |
| 32 fq_pair_array->array->next=NULL; | |
| 33 fq_pair_array->array->num=0; | |
| 34 | |
| 35 fq_pair_array->index=NULL; | |
| 36 | |
| 37 return fq_pair_array; | |
| 38 } | |
| 39 | |
| 40 int fastq_pair_array_remove(FASTQ_PAIR_ARRAY *fq_pair_array) | |
| 41 { | |
| 42 /* free the FASTQ pair array. If successful, return 0, otherwise | |
| 43 * return 1. | |
| 44 */ | |
| 45 long i; | |
| 46 FASTQ_PAIR_ARRAY_BLOCK *fq_pair_array_block; | |
| 47 | |
| 48 if(fq_pair_array==NULL) | |
| 49 return 1; | |
| 50 | |
| 51 fq_pair_array_block=fq_pair_array->last; | |
| 52 for(;fq_pair_array_block!=NULL;) | |
| 53 { | |
| 54 for(i=0;i<fq_pair_array_block->num;i++) | |
| 55 fastq_pair_remove(fq_pair_array_block->block[i]); | |
| 56 | |
| 57 fq_pair_array_block=fq_pair_array_block->previous; | |
| 58 } | |
| 59 | |
| 60 if(fq_pair_array->index!=NULL) | |
| 61 free(fq_pair_array->index); | |
| 62 | |
| 63 return 0; | |
| 64 } | |
| 65 | |
| 66 int fastq_pair_array_append(FASTQ_PAIR *fq_pair, FASTQ_PAIR_ARRAY *fq_pair_array) | |
| 67 { | |
| 68 /* append a new FASTQ pair to the array. if successful, return 0, otherwise | |
| 69 * return 1. | |
| 70 */ | |
| 71 FASTQ_PAIR_ARRAY_BLOCK *block_temp; | |
| 72 | |
| 73 if(fq_pair_array==NULL || fq_pair==NULL) | |
| 74 return 1; | |
| 75 | |
| 76 if(fq_pair_array->last->num<FASTQ_PAIR_ARRAY_BLOCK_SIZE) | |
| 77 { | |
| 78 /* append to the last array_block */ | |
| 79 fq_pair_array->last->block[fq_pair_array->last->num++]=fq_pair; | |
| 80 fq_pair_array->fastq_pair_num++; | |
| 81 } | |
| 82 else | |
| 83 { | |
| 84 /* add a new array_block, amd append to it */ | |
| 85 if((block_temp= | |
| 86 (FASTQ_PAIR_ARRAY_BLOCK *)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK)))==NULL) | |
| 87 return 0; | |
| 88 | |
| 89 fq_pair_array->last->next=block_temp; | |
| 90 block_temp->previous=fq_pair_array->last; | |
| 91 fq_pair_array->last=block_temp; | |
| 92 fq_pair_array->block_num++; | |
| 93 | |
| 94 block_temp->num=0; | |
| 95 block_temp->block[block_temp->num++]=fq_pair; | |
| 96 fq_pair_array->fastq_pair_num++; | |
| 97 } | |
| 98 | |
| 99 return 0; | |
| 100 } | |
| 101 | |
| 102 int fastq_pair_array_generate_index(FASTQ_PAIR_ARRAY *fq_pair_array) | |
| 103 { | |
| 104 /* generate the index for given FASTQ_PAIR, if successful, return 0, otherwise | |
| 105 * return 1. | |
| 106 */ | |
| 107 FASTQ_PAIR_ARRAY_BLOCK **temp_index; | |
| 108 FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; | |
| 109 long i; | |
| 110 | |
| 111 if(fq_pair_array==NULL) | |
| 112 return 1; | |
| 113 | |
| 114 if(fq_pair_array->index!=NULL) | |
| 115 { | |
| 116 free(fq_pair_array->index); | |
| 117 fq_pair_array->index=NULL; | |
| 118 } | |
| 119 | |
| 120 if((temp_index=(FASTQ_PAIR_ARRAY_BLOCK **)malloc(sizeof(FASTQ_PAIR_ARRAY_BLOCK *)*(fq_pair_array->block_num)))==NULL) | |
| 121 return 1; | |
| 122 | |
| 123 fq_array_block=fq_pair_array->array; | |
| 124 for(i=0;i<fq_pair_array->block_num;i++) | |
| 125 { | |
| 126 temp_index[i]=fq_array_block; | |
| 127 fq_array_block=fq_array_block->next; | |
| 128 } | |
| 129 | |
| 130 fq_pair_array->index=temp_index; | |
| 131 | |
| 132 return 0; | |
| 133 | |
| 134 } | |
| 135 | |
| 136 FASTQ_PAIR **fastq_pair_array_get_pointer(FASTQ_PAIR_ARRAY *fq_pair_array, long position) | |
| 137 { | |
| 138 /* get double pointer to individual fastq_pair member at specific position | |
| 139 * in the array, if successful, return the double pointer, otherwise | |
| 140 * return NULL | |
| 141 */ | |
| 142 FASTQ_PAIR_ARRAY_BLOCK *fq_array_block; | |
| 143 long block_num, num; | |
| 144 long i; | |
| 145 | |
| 146 if(fq_pair_array==NULL || position<=0 || position>fq_pair_array->fastq_pair_num) | |
| 147 return NULL; | |
| 148 | |
| 149 block_num=position/FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
| 150 num=position%FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
| 151 | |
| 152 if(num==0) | |
| 153 num=FASTQ_PAIR_ARRAY_BLOCK_SIZE; | |
| 154 else | |
| 155 block_num++; | |
| 156 | |
| 157 if(fq_pair_array->index==NULL) | |
| 158 { | |
| 159 fq_array_block=fq_pair_array->array; | |
| 160 for(i=1;i<block_num;i++) | |
| 161 fq_array_block=fq_array_block->next; | |
| 162 | |
| 163 return &fq_array_block->block[num-1]; | |
| 164 } | |
| 165 else | |
| 166 return &fq_pair_array->index[block_num-1]->block[num-1]; | |
| 167 | |
| 168 return NULL; | |
| 169 } | |
| 170 | |
| 171 int fastq_pair_array_merge(FASTQ_PAIR_ARRAY *fq_pair_array, | |
| 172 FASTQ_PAIR_ARRAY *temp_fq_pair_array, | |
| 173 long low, long middle, long high) | |
| 174 { | |
| 175 /* merge the two sorted part in array, low-middle and middle-high, into a | |
| 176 * single sorted order. If successful, return 0, otherwise return 1. | |
| 177 */ | |
| 178 long i, begin1, end1, begin2, end2; | |
| 179 FASTQ_PAIR **fq_pair_current1, **fq_pair_current2; | |
| 180 FASTQ_PAIR **temp_fq_pair_current; | |
| 181 | |
| 182 if(fq_pair_array==NULL || temp_fq_pair_array==NULL || | |
| 183 low > middle || middle > high || | |
| 184 fq_pair_array->fastq_pair_num!=temp_fq_pair_array->fastq_pair_num) | |
| 185 return 1; | |
| 186 | |
| 187 begin1=low; | |
| 188 end1=middle; | |
| 189 begin2=middle+1; | |
| 190 end2=high; | |
| 191 | |
| 192 /* merge processing */ | |
| 193 for(i = low; begin1 <= end1 && begin2 <= end2;i++) | |
| 194 { | |
| 195 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1); | |
| 196 fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2); | |
| 197 | |
| 198 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); | |
| 199 | |
| 200 if(fastq_pair_compare_tight(*fq_pair_current1, *fq_pair_current2)<=0) | |
| 201 { | |
| 202 *temp_fq_pair_current=*fq_pair_current1; | |
| 203 begin1++; | |
| 204 } | |
| 205 else | |
| 206 { | |
| 207 *temp_fq_pair_current=*fq_pair_current2; | |
| 208 begin2++; | |
| 209 } | |
| 210 } | |
| 211 | |
| 212 /* moving the remaining data to temp_fq_pair_array */ | |
| 213 if(begin1<=end1) | |
| 214 { | |
| 215 for(;begin1<=end1;) | |
| 216 { | |
| 217 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); | |
| 218 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, begin1++); | |
| 219 *temp_fq_pair_current=*fq_pair_current1; | |
| 220 } | |
| 221 } | |
| 222 if(begin2<=end2) | |
| 223 { | |
| 224 for(;begin2<=end2;) | |
| 225 { | |
| 226 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i++); | |
| 227 fq_pair_current2=fastq_pair_array_get_pointer(fq_pair_array, begin2++); | |
| 228 *temp_fq_pair_current=*fq_pair_current2; | |
| 229 } | |
| 230 } | |
| 231 | |
| 232 /* moving the merged data to original position 'fq_pair_array' */ | |
| 233 for(i=low;i<=high;i++) | |
| 234 { | |
| 235 fq_pair_current1=fastq_pair_array_get_pointer(fq_pair_array, i); | |
| 236 temp_fq_pair_current=fastq_pair_array_get_pointer(temp_fq_pair_array, i); | |
| 237 *fq_pair_current1=*temp_fq_pair_current; | |
| 238 } | |
| 239 | |
| 240 return 0; | |
| 241 } | |
| 242 | |
| 243 int fastq_pair_array_sort(FASTQ_PAIR_ARRAY *fq_pair_array, FASTQ_PAIR_ARRAY *temp_fq_pair_array, | |
| 244 long first, long last) | |
| 245 { | |
| 246 /* sort the FASTQ pair array. If successful, return 0, otherwise | |
| 247 * return 1 | |
| 248 */ | |
| 249 long mid; | |
| 250 | |
| 251 if(first<last) | |
| 252 { | |
| 253 mid=(first+last)/2; | |
| 254 fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, first, mid); | |
| 255 fastq_pair_array_sort(fq_pair_array, temp_fq_pair_array, mid+1, last); | |
| 256 fastq_pair_array_merge(fq_pair_array, temp_fq_pair_array, first, mid, last); | |
| 257 } | |
| 258 | |
| 259 return 0; | |
| 260 } | |
| 261 | |
| 262 int fastq_pair_array_printf(FASTQ_PAIR_ARRAY *fq_pair_array, FILE *fp_out1, FILE *fp_out2, | |
| 263 char *format, int serial_flag, int flag_uniq) | |
| 264 { | |
| 265 /* write the pair-end reads in the array in FASTA or FASTQ format into two | |
| 266 * output files(format='fa' or 'fq') or in FASTA format into a single output | |
| 267 * file(format="fa" and fp_out2==NULL) using the original description | |
| 268 * (serial_flag=0) or a new serial number(serial_flag=1). Output all sequences | |
| 269 * (flag_uniq==0), or unique ones(flag_uniq==1). If successful, return 0, | |
| 270 * otherwise return 1. | |
| 271 */ | |
| 272 long i, k; | |
| 273 FASTQ_PAIR **temp_fq_pair, **temp_fq_pair_old; | |
| 274 | |
| 275 if(flag_uniq==0) | |
| 276 { | |
| 277 for(i=1;i<=fq_pair_array->fastq_pair_num;i++) | |
| 278 { | |
| 279 temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); | |
| 280 | |
| 281 if(serial_flag==0) | |
| 282 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, -1); | |
| 283 else | |
| 284 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, format, i); | |
| 285 } | |
| 286 } | |
| 287 else | |
| 288 { | |
| 289 temp_fq_pair_old=fastq_pair_array_get_pointer(fq_pair_array, 1); | |
| 290 | |
| 291 /* the fastq_pair_array contain only one read-pair, output it */ | |
| 292 if(fq_pair_array->fastq_pair_num==1) | |
| 293 { | |
| 294 if(serial_flag==0) | |
| 295 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 296 format, -1); | |
| 297 else | |
| 298 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 299 format, k++); | |
| 300 } | |
| 301 | |
| 302 /* compare and output */ | |
| 303 for(i=2, k=1;i<=fq_pair_array->fastq_pair_num;i++) | |
| 304 { | |
| 305 temp_fq_pair=fastq_pair_array_get_pointer(fq_pair_array, i); | |
| 306 if(fastq_pair_compare_loose(*temp_fq_pair_old, *temp_fq_pair)!=0) | |
| 307 { | |
| 308 if(serial_flag==0) | |
| 309 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 310 format, -1); | |
| 311 else | |
| 312 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 313 format, k++); | |
| 314 | |
| 315 temp_fq_pair_old=temp_fq_pair; | |
| 316 | |
| 317 if(i==fq_pair_array->fastq_pair_num) | |
| 318 { | |
| 319 if(serial_flag==0) | |
| 320 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
| 321 format, -1); | |
| 322 else | |
| 323 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
| 324 format, k++); | |
| 325 } | |
| 326 } | |
| 327 else | |
| 328 { | |
| 329 if(fastq_pair_get_left_length(*temp_fq_pair_old) <= fastq_pair_get_left_length(*temp_fq_pair) && | |
| 330 fastq_pair_get_right_length(*temp_fq_pair_old) <= fastq_pair_get_right_length(*temp_fq_pair)) | |
| 331 { | |
| 332 temp_fq_pair_old=temp_fq_pair; | |
| 333 | |
| 334 if(i==fq_pair_array->fastq_pair_num) | |
| 335 { | |
| 336 if(serial_flag==0) | |
| 337 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
| 338 format, -1); | |
| 339 else | |
| 340 fastq_pair_printf(*temp_fq_pair, fp_out1, fp_out2, | |
| 341 format, k++); | |
| 342 } | |
| 343 } | |
| 344 else | |
| 345 { | |
| 346 if(serial_flag==0) | |
| 347 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 348 format, -1); | |
| 349 else | |
| 350 fastq_pair_printf(*temp_fq_pair_old, fp_out1, fp_out2, | |
| 351 format, k++); | |
| 352 | |
| 353 temp_fq_pair_old=temp_fq_pair; | |
| 354 } | |
| 355 } | |
| 356 } | |
| 357 } | |
| 358 return 0; | |
| 359 } | |
| 360 | |
| 361 | |
| 362 | |
| 363 | |
| 364 | |
| 365 | |
| 366 | |
| 367 | |
| 368 | |
| 369 | |
| 370 | |
| 371 | |
| 372 | |
| 373 | |
| 374 | |
| 375 | |
| 376 | |
| 377 | |
| 378 |
