Mercurial > repos > glogobyte > armdb
view armdb_mirgene.py @ 13:f5b6cad56b62 draft
Uploaded
author | glogobyte |
---|---|
date | Sun, 08 May 2022 09:25:08 +0000 |
parents | 43cecced90f5 |
children |
line wrap: on
line source
import subprocess import argparse import time import urllib.request from multiprocessing import Process, Queue #--------------------------------------------Arguments----------------------------------------------- subprocess.call(['mkdir', 'out']) parser = argparse.ArgumentParser() parser.add_argument("-pos", "--positions", help="number of additional nucleotides", action="store") parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store") parser.add_argument("-sym", "--symbol", help="organism symbol",action="store") args = parser.parse_args() #------------------------------------Read Fasta files from MirGene----------------------------------- def read_url(path): data = urllib.request.urlopen(path).read() data1 = data.decode('utf-8') data1 = data1.split("\n") del data1[-1] q.put(data1) #-------------------------------------Generation of Custom Arms-------------------------------------- def custom_arms_mirgene(mat_mirna,pri_mirna): mat_ext=[] for i in range(0,len(mat_mirna),2): if args.symbol in mat_mirna[i]: mat_seq=mat_mirna[i+1] for j in range(0,len(pri_mirna),2): if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]: temp_ext = pri_mirna[j+1].split(mat_seq) mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]]) print(str(args.positions)+" positions shifted") return(mat_ext) #----------------------------------------Export of Fasta files --------------------------------------- def write_custom_arms(list,name,c): f = open(name, "w") for x in list: if c==1: f.write(x[0]+'\n') f.write(x[1]+'\n') else: f.write(x+'\n') #================================================================================================================================== if __name__=='__main__': starttime = time.time() print(args.symbol) q = Queue() # Read of all Fasta files mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1' p = [Process(target=read_url(mat_url))] star_url = 'https://mirgenedb.org/fasta/ALL?star=1' p.extend([Process(target=read_url(star_url))]) pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas' p.extend([Process(target=read_url(pri_url))]) [x.start() for x in p] [x.join() for x in p] mat_mirna=q.get() star_mirna=q.get() pri_mirna=q.get() # Replace U with T in seqeunces mat_mirna.extend(star_mirna) for i in range(1,len(mat_mirna),2): mat_mirna[i]=mat_mirna[i].replace("U","T") #Generation of Custom Arms mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna) #Export of all Fasta files p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))] p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))]) p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))]) [x.start() for x in p1] [x.join() for x in p1] print('Runtime: {} seconds'.format(round(time.time() - starttime,2)))