Mercurial > repos > arkarachai-fungtammasan > microsatellite_ngs
comparison test-data/pair_fetch_DNA_ff.py @ 4:ecfc9041bcc5
Deleted selected files
| author | arkarachai-fungtammasan |
|---|---|
| date | Wed, 01 Apr 2015 14:05:54 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:7cc220e8c2ac | 4:ecfc9041bcc5 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # pair_fetch_DNA_ff.py | |
| 3 # Function: filter microsat and flanking region by quality score; | |
| 4 # remove read with any base that has lower quality score than "quality_require" within "flanking_base" and convert from snoope to fastq | |
| 5 # Note that require flanking length need to be screen by Bob snoope script first | |
| 6 | |
| 7 # Author: Arkarachai Fungtammasan | |
| 8 # Version 1.0.0 (15 July 2012) | |
| 9 # Input format: length_of_repeat[0] left_flank_length[1] right_flank_length[2] repeat_motif[3] hamming_distance[4] read_name[5] read_sequence[6] read_quality[7] | |
| 10 # Output format: two fastq file. First file contain left flank. Second file contain right flank. | |
| 11 # Command: python pair_fetch_DNA_ff.py input.txt | |
| 12 | |
| 13 import sys | |
| 14 from galaxy import eggs | |
| 15 | |
| 16 def stop_err(msg): | |
| 17 sys.stderr.write(msg) | |
| 18 sys.exit() | |
| 19 | |
| 20 # read file name | |
| 21 | |
| 22 | |
| 23 | |
| 24 filename=sys.argv[1] | |
| 25 L_filename=sys.argv[2] | |
| 26 R_filename=sys.argv[3] | |
| 27 quality_require=sys.argv[4] | |
| 28 flanking_base=sys.argv[5] | |
| 29 try: | |
| 30 quality_require=int(quality_require) | |
| 31 flanking_base=int(flanking_base) | |
| 32 except Exception, eee: | |
| 33 print eee | |
| 34 stop_err("Quality score cutoff and Length of flanking regions that require quality screening must be integer") | |
| 35 | |
| 36 fd=open(filename) | |
| 37 fdd1=open(L_filename,'w') | |
| 38 fdd2=open(R_filename,'w') | |
| 39 lines=fd.xreadlines() | |
| 40 for line in lines: | |
| 41 temp=line.strip().split('\t') | |
| 42 temp=filter(None,temp) | |
| 43 #get index | |
| 44 left_flank=(0,int(temp[1])) | |
| 45 microsat=(int(temp[1]),int(temp[1])+int(temp[0])) | |
| 46 right_flank=(int(temp[1])+int(temp[0]),int(temp[1])+int(temp[0])+int(temp[2])) | |
| 47 flag=0 | |
| 48 #filter length of left and right flank | |
| 49 if (right_flank[1]-right_flank[0])<flanking_base: | |
| 50 continue | |
| 51 if (left_flank[1]-left_flank[0])<flanking_base: | |
| 52 continue | |
| 53 #filter quality score | |
| 54 for i in temp[7][microsat[0]-flanking_base:microsat[1]+flanking_base]: | |
| 55 if ord(i)<(quality_require+33): | |
| 56 flag=1 | |
| 57 else: | |
| 58 flag=flag | |
| 59 #print out to seperated files | |
| 60 if flag ==0: | |
| 61 newname= temp[5]##+'_'+temp[3]+'_'+temp[0] | |
| 62 fdd1.writelines('@'+newname+'\n') | |
| 63 fdd2.writelines('@'+newname+'\n') | |
| 64 fdd1.writelines(temp[6][left_flank[0]:left_flank[1]]+'\n') | |
| 65 fdd2.writelines(temp[6][right_flank[0]:right_flank[1]]+'\n') | |
| 66 fdd1.writelines('+'+newname+'\n') | |
| 67 fdd2.writelines('+'+newname+'\n') | |
| 68 fdd1.writelines(temp[7][left_flank[0]:left_flank[1]]+'\n') | |
| 69 fdd2.writelines(temp[7][right_flank[0]:right_flank[1]]+'\n') | |
| 70 | |
| 71 fd.close() | |
| 72 fdd1.close() | |
| 73 fdd2.close() | |
| 74 | |
| 75 |
