annotate SeqSero/libs/Initial_functions.py @ 4:ab0802d77891 draft default tip

Uploaded
author estrain
date Thu, 12 Sep 2019 06:46:00 -0400
parents c577b57b7c74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4 def To_list(L):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5 import string
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 New_list=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7 for x in L:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8 x1=x[:-1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9 x1=string.atoi(x1)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 New_list.append(x1)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 return New_list
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14 def Uniq(L): #return the uniq list and the count number
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 Old=L
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16 L.sort()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 L = [L[i] for i in range(len(L)) if L[i] not in L[:i]]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18 count=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19 for j in range(len(L)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20 y=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21 for x in Old:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 if L[j]==x:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 y+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 count.append(y)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 return (L,count)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 def Parse_seros_in_genome_trakr(L): #return the sero names in the sra_result.xlsx, the next step is usually "Uniq" in above
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 names2=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 for x in L:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 if "serovar" in x:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 key_word=x.split("serovar")[1].split("_")[1] #the seronames
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 if key_word!="str." and key_word!="group": #to eliminate some "serovar_str." and "serovar_group"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 if key_word in ["I","II","III","IIIa","IIIb","IV","VI","B"]: #the serovar is behind those letters
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 if x.split("serovar")[1].split("_")[2]!="str.":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 names2.append(x.split("serovar")[1].split("_")[2])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 names2.append(x.split("serovar")[1].split("_")[1])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39 return names2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42