annotate armdb_mirgene.py @ 9:43cecced90f5 draft

Uploaded
author glogobyte
date Wed, 20 Oct 2021 15:17:00 +0000
parents 41f5a0616dbb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
1 import subprocess
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
2 import argparse
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
3 import time
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
4 import urllib.request
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
5 from multiprocessing import Process, Queue
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
6
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
7 #--------------------------------------------Arguments-----------------------------------------------
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
8 subprocess.call(['mkdir', 'out'])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
9 parser = argparse.ArgumentParser()
9
43cecced90f5 Uploaded
glogobyte
parents: 6
diff changeset
10 parser.add_argument("-pos", "--positions", help="number of additional nucleotides", action="store")
1
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
11 parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
9
43cecced90f5 Uploaded
glogobyte
parents: 6
diff changeset
12 parser.add_argument("-sym", "--symbol", help="organism symbol",action="store")
1
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
13 args = parser.parse_args()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
14
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
15 #------------------------------------Read Fasta files from MirGene-----------------------------------
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
16
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
17 def read_url(path):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
18
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
19 data = urllib.request.urlopen(path).read()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
20 data1 = data.decode('utf-8')
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
21 data1 = data1.split("\n")
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
22 del data1[-1]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
23 q.put(data1)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
24
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
25 #-------------------------------------Generation of Custom Arms--------------------------------------
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
26
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
27 def custom_arms_mirgene(mat_mirna,pri_mirna):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
28 mat_ext=[]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
29 for i in range(0,len(mat_mirna),2):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
30 if args.symbol in mat_mirna[i]:
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
31 mat_seq=mat_mirna[i+1]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
32
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
33 for j in range(0,len(pri_mirna),2):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
34 if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
35 temp_ext = pri_mirna[j+1].split(mat_seq)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
36 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
37
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
38 print(str(args.positions)+" positions shifted")
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
39 return(mat_ext)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
40
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
41 #----------------------------------------Export of Fasta files ---------------------------------------
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
42
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
43 def write_custom_arms(list,name,c):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
44
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
45 f = open(name, "w")
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
46 for x in list:
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
47 if c==1:
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
48 f.write(x[0]+'\n')
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
49 f.write(x[1]+'\n')
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
50 else:
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
51 f.write(x+'\n')
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
52
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
53 #==================================================================================================================================
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
54
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
55 if __name__=='__main__':
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
56
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
57 starttime = time.time()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
58 print(args.symbol)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
59 q = Queue()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
60
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
61 # Read of all Fasta files
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
62 mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
63 p = [Process(target=read_url(mat_url))]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
64 star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
65 p.extend([Process(target=read_url(star_url))])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
66 pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
67 p.extend([Process(target=read_url(pri_url))])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
68 [x.start() for x in p]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
69 [x.join() for x in p]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
70
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
71 mat_mirna=q.get()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
72 star_mirna=q.get()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
73 pri_mirna=q.get()
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
74
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
75 # Replace U with T in seqeunces
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
76 mat_mirna.extend(star_mirna)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
77 for i in range(1,len(mat_mirna),2):
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
78 mat_mirna[i]=mat_mirna[i].replace("U","T")
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
79
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
80 #Generation of Custom Arms
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
81 mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
82
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
83 #Export of all Fasta files
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
84 p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
85 p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
86 p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
87 [x.start() for x in p1]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
88 [x.join() for x in p1]
d133caf020a7 Uploaded
glogobyte
parents:
diff changeset
89
6
41f5a0616dbb Uploaded
glogobyte
parents: 1
diff changeset
90 print('Runtime: {} seconds'.format(round(time.time() - starttime,2)))