view armdb_mirgene.py @ 9:43cecced90f5 draft

Uploaded
author glogobyte
date Wed, 20 Oct 2021 15:17:00 +0000
parents 41f5a0616dbb
children
line wrap: on
line source

import subprocess
import argparse
import time
import urllib.request
from multiprocessing import Process, Queue

#--------------------------------------------Arguments-----------------------------------------------
subprocess.call(['mkdir', 'out'])
parser = argparse.ArgumentParser()
parser.add_argument("-pos", "--positions", help="number of additional nucleotides", action="store")
parser.add_argument("-tool_dir", "--tool_directory", help="tool directory path", action="store")
parser.add_argument("-sym", "--symbol", help="organism symbol",action="store")
args = parser.parse_args()

#------------------------------------Read Fasta files from MirGene-----------------------------------

def read_url(path):

    data = urllib.request.urlopen(path).read()
    data1 = data.decode('utf-8')
    data1 = data1.split("\n")
    del data1[-1]
    q.put(data1)

#-------------------------------------Generation of Custom Arms--------------------------------------

def custom_arms_mirgene(mat_mirna,pri_mirna):
   mat_ext=[]
   for i in range(0,len(mat_mirna),2):
       if args.symbol in mat_mirna[i]:
          mat_seq=mat_mirna[i+1]

          for j in range(0,len(pri_mirna),2):
              if mat_mirna[i].split("_")[0] == pri_mirna[j].split("_")[0]:
                 temp_ext = pri_mirna[j+1].split(mat_seq)
                 mat_ext.append([mat_mirna[i],temp_ext[0][-int(args.positions):]+mat_seq+temp_ext[1][0:int(args.positions)]])

   print(str(args.positions)+" positions shifted")
   return(mat_ext)

#----------------------------------------Export of Fasta files ---------------------------------------

def write_custom_arms(list,name,c):

    f = open(name, "w")
    for x in list:
       if c==1:
          f.write(x[0]+'\n')
          f.write(x[1]+'\n')
       else:
          f.write(x+'\n')

#==================================================================================================================================

if __name__=='__main__':

    starttime = time.time()
    print(args.symbol)
    q = Queue()

    # Read of all Fasta files
    mat_url = 'https://mirgenedb.org/fasta/ALL?mat=1'
    p = [Process(target=read_url(mat_url))]
    star_url = 'https://mirgenedb.org/fasta/ALL?star=1'
    p.extend([Process(target=read_url(star_url))])
    pri_url = 'https://mirgenedb.org/static/data/ALL/ALL--pri-30-30.fas'
    p.extend([Process(target=read_url(pri_url))])
    [x.start() for x in p]
    [x.join() for x in p]

    mat_mirna=q.get()
    star_mirna=q.get()
    pri_mirna=q.get()

    # Replace U with T in seqeunces
    mat_mirna.extend(star_mirna)
    for i in range(1,len(mat_mirna),2):
        mat_mirna[i]=mat_mirna[i].replace("U","T")

    #Generation of Custom Arms
    mat_ext=custom_arms_mirgene(mat_mirna,pri_mirna)

    #Export of all Fasta files
    p1 = [Process(target=write_custom_arms(mat_mirna,"shifted_mirnas.bed",0))]
    p1.extend([Process(target=write_custom_arms(pri_mirna,"original_mirnas.bed",0))])
    p1.extend([Process(target=write_custom_arms(mat_ext,"new_ref.fa",1))])
    [x.start() for x in p1]
    [x.join() for x in p1]

    print('Runtime: {} seconds'.format(round(time.time() - starttime,2)))