annotate test-data/pair_fetch_DNA_ff.py @ 5:b27006b0a953

update to latest version
author devteam@galaxyproject.org
date Wed, 22 Apr 2015 12:19:28 -0400
parents ecfc9041bcc5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
1 #!/usr/bin/env python
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
2 # pair_fetch_DNA_ff.py
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
3 # Function: filter microsat and flanking region by quality score;
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
4 # remove read with any base that has lower quality score than "quality_require" within "flanking_base" and convert from snoope to fastq
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
5 # Note that require flanking length need to be screen by Bob snoope script first
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
6
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
7 # Author: Arkarachai Fungtammasan
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
8 # Version 1.0.0 (15 July 2012)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
9 # Input format: length_of_repeat[0] left_flank_length[1] right_flank_length[2] repeat_motif[3] hamming_distance[4] read_name[5] read_sequence[6] read_quality[7]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
10 # Output format: two fastq file. First file contain left flank. Second file contain right flank.
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
11 # Command: python pair_fetch_DNA_ff.py input.txt
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
12
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
13 import sys
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
14 from galaxy import eggs
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
15
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
16 def stop_err(msg):
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
17 sys.stderr.write(msg)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
18 sys.exit()
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
19
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
20 # read file name
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
21
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
22
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
23
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
24 filename=sys.argv[1]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
25 L_filename=sys.argv[2]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
26 R_filename=sys.argv[3]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
27 quality_require=sys.argv[4]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
28 flanking_base=sys.argv[5]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
29 try:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
30 quality_require=int(quality_require)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
31 flanking_base=int(flanking_base)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
32 except Exception, eee:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
33 print eee
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
34 stop_err("Quality score cutoff and Length of flanking regions that require quality screening must be integer")
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
35
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
36 fd=open(filename)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
37 fdd1=open(L_filename,'w')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
38 fdd2=open(R_filename,'w')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
39 lines=fd.xreadlines()
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
40 for line in lines:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
41 temp=line.strip().split('\t')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
42 temp=filter(None,temp)
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
43 #get index
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
44 left_flank=(0,int(temp[1]))
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
45 microsat=(int(temp[1]),int(temp[1])+int(temp[0]))
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
46 right_flank=(int(temp[1])+int(temp[0]),int(temp[1])+int(temp[0])+int(temp[2]))
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
47 flag=0
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
48 #filter length of left and right flank
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
49 if (right_flank[1]-right_flank[0])<flanking_base:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
50 continue
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
51 if (left_flank[1]-left_flank[0])<flanking_base:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
52 continue
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
53 #filter quality score
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
54 for i in temp[7][microsat[0]-flanking_base:microsat[1]+flanking_base]:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
55 if ord(i)<(quality_require+33):
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
56 flag=1
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
57 else:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
58 flag=flag
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
59 #print out to seperated files
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
60 if flag ==0:
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
61 newname= temp[5]##+'_'+temp[3]+'_'+temp[0]
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
62 fdd1.writelines('@'+newname+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
63 fdd2.writelines('@'+newname+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
64 fdd1.writelines(temp[6][left_flank[0]:left_flank[1]]+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
65 fdd2.writelines(temp[6][right_flank[0]:right_flank[1]]+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
66 fdd1.writelines('+'+newname+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
67 fdd2.writelines('+'+newname+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
68 fdd1.writelines(temp[7][left_flank[0]:left_flank[1]]+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
69 fdd2.writelines(temp[7][right_flank[0]:right_flank[1]]+'\n')
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
70
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
71 fd.close()
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
72 fdd1.close()
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
73 fdd2.close()
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
74
ecfc9041bcc5 Deleted selected files
arkarachai-fungtammasan
parents:
diff changeset
75