changeset 36:e55fa008e518 draft

Deleted selected files
author glogobyte
date Tue, 17 Oct 2023 08:21:45 +0000
parents 293aa8cbbc20
children b8de171556f0
files mirbase_functions.py
diffstat 1 files changed, 0 insertions(+), 766 deletions(-) [+]
line wrap: on
line diff
--- a/mirbase_functions.py	Mon Apr 25 07:34:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,766 +0,0 @@
-import itertools
-import re
-import urllib.request
-import gzip
-import copy
-from collections import OrderedDict
-
-
-
-# Read a file and return it as a list
-def read(path, flag):
-    if flag == 0:
-        with open(path) as fp:
-            file=fp.readlines()
-        fp.close()
-        return file
-
-    if flag == 1:
-        with open(path) as fp:
-            file = fp.read().splitlines()
-        fp.close()
-        return file
-
-# Write a list to a txt file
-def write(path, list):
-    with open(path,'w') as fp:
-        for x in list:
-            fp.write(str("\t".join(x[1:-1])))
-    fp.close()
-
-
-#################################################################################################################>
-
-# Detect the longest common substring sequence between two mirnas
-def longestSubstring(str1, str2):
-
-    from difflib import SequenceMatcher
-    # initialize SequenceMatcher object with
-    # input string
-    seqMatch = SequenceMatcher(None, str1, str2)
-
-    # find match of longest sub-string
-    # output will be like Match(a=0, b=0, size=5)
-    match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
-
-    # print longest substring
-    if (match.size != 0):
-        return str1[match.a: match.a + match.size]
-    else:
-        print('No longest common sub-string found')
-
-#################################################################################################################################################################################################################
-
-"""
-
-This function concatenates miRNAs which are generated from different chromosomes
-and eliminates the duplications of miRNAs on every sample
-
-input:  detected miRNAs
-output: collpased miRNAs without duplicates
-
-"""
-
-
-def remove_duplicates(mirnas):
-
- # Detection of canonical mirRNAs whicha are generated from different chromosomes
- dupes=[[x[9],x[0],x[2]] for x in mirnas]
-
- for x in mirnas:
-     for y in dupes:
-         if x[9] == y[0] and x[0] == y[1] and x[2].split("_")[0] == y[2].split("_")[0] and x[2] != y[2]:
-            y.append(x[2])
-
- # Detection of different chromosomes for every miRNA
- chr_order = []
- for x in dupes:
-     temp = []
-     for i in range(2,len(x)):
-         if x[i].split("chr")[1].split("(")[0].isdigit():
-            temp.append(int(x[i].split("chr")[1].split("(")[1][0]+x[i].split("chr")[1].split("(")[0]))
-         else:
-            temp.append(x[i].split("chr")[1][0:4])
-
-     for z in temp:
-         if 'X(-)'==z or 'Y(-)'==z or 'X(+)'==z or 'Y(+)'==z:
-             temp = [str(j) for j in temp]
-     temp = list(set(temp))
-     temp.sort()
-     chr_order.append(temp)
-
- # Collapsing the miRNAs with the same sequence from different chromosomes
- collapsed_dupes=[]
- for i in range(len(dupes)):
-     collapsed_dupes.append([dupes[i][0],dupes[i][2].split("_")[0],dupes[i][1]])
-     for x in chr_order[i]:
-         chr_check = re.match("[-+]?\d+$", str(x))	  # check if chromosome is 'X' or 'Y'
-         if chr_check is not None:
-            if int(x)<0:                 # Check the strand (+) or (-)
-               collapsed_dupes[i][1]= collapsed_dupes[i][1]+"_chr"+str(abs(int(x)))+"(-)"
-            else:
-               collapsed_dupes[i][1] = collapsed_dupes[i][1] + "_chr" + str(abs(int(x)))+"(+)"
-         else:
-            collapsed_dupes[i][1] = collapsed_dupes[i][1] + "_chr" + str(x)
-
- # Remove duplicates from collapsed_dupes
- collapsed_dupes.sort()
- collapsed_dupes = list(collapsed_dupes for collapsed_dupes,_ in itertools.groupby(collapsed_dupes))
-
- for i in range(len(mirnas)):
-     for x in collapsed_dupes:
-
-         # Naming of template isomirs (adding positions in the names)
-         if mirnas[i][9] == x[0] and mirnas[i][0] == x[2] and len(mirnas[i][2].split("_")) >3 and mirnas[i][2].split("_")[0]==x[1].split("_")[0]:
-            gg=str("_t_"+mirnas[i][2].split("_")[-2]+"_"+mirnas[i][2].split("_")[-1])
-            mirnas[i][2] = x[1]+gg
-            break
-
-         # Naming of canonical miRNAs (collpsed names)
-         if mirnas[i][9]==x[0] and mirnas[i][0]== x[2] and len(mirnas[i][2].split("_"))==3 and mirnas[i][2].split("_")[0]==x[1].split("_")[0]:
-            mirnas[i][2] = x[1]
-            break
-
- # Remove duplicates
- mirnas.sort()
- mirnas=list(mirnas for mirnas,_ in itertools.groupby(mirnas))
-
- return mirnas
-
-#############################################################################################################################################################################################################
-
-"""
-
-This function indentifies and classifies the miRNAs which are detected from the alignment tool.
-
-"""
-
-def sam_edit(mature_mirnas,path,file,case,l,samples,data,file_order,unmap_seq,names_n_seqs,deseq,mirna_names,ini_sample,unmap_counts):
-
-    # read the sam file
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]     # remove introduction
-    unique_seq = [x for x in main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]   # keeps only the functional miRNAs
-    filter_sam = [[x[0],x[1],x[2],len(x[9])] for x in main_sam]                             # keeps only the necessary info of miRNAs from sam files (name, sequence, counts, etc)
-
-    sorted_uni_arms = []
-
-    for i in range(0,len(mature_mirnas,),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-        for j in range(len(unique_seq)):
-
-            if "{" in unique_seq[j][2].split("_")[0]:           # checks if a miRNA is generated from two different locis on the same chromosome
-                mirna=unique_seq[j][2].split("_")[0][:-4]
-            else:
-                mirna=unique_seq[j][2].split("_")[0]
-
-            # Detection of differences between the canonical miRNA and the detected miRNA
-            if mature_mirnas[i].split(" ")[0][1:] == mirna:
-
-                temp_mature = mature_mirnas[i+1].strip().replace("U", "T")
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                # Handling of some special mirnas like (hsa-miR-8485)
-                if mat_diff[1]!=0 and unique_diff[1]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                elif mat_diff[0]!=0 and unique_diff[0]!=0:
-                    unique_seq[j]=1
-                    pre_pos = 0
-                    post_pos = 0
-
-                else:
-                   # Keep the findings
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-                   tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                   tmp_count_seq = tmp_count_seq+1
-
-                # Store the detected miRNAs with new names according to the findings
-                if pre_pos != 0 or post_pos != 0:
-                    if pre_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[2]+ "_t_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
-                    elif post_pos == 0:
-                        unique_seq[j][2] = unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[2] + "_t_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
-                    else:
-                        unique_seq[j][2] = unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[2]+"_t_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
-
-        # Remove the values "1" from the handling of special mirnas (hsa-miR-8485)
-        for x in range(unique_seq.count(1)):
-            unique_seq.remove(1)
-
-        # metrics for the production of database
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-           sorted_uni_arms.append([mature_mirnas[i].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-
-    # Sorting of the metrics for database
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-
-    # Collapsing of miRNAs and removing of duplicates
-    collapsed_mirnas = remove_duplicates(unique_seq)
-
-    # Correction of metrics due to the collapsing and removing of duplicates for the production of Database
-    for y in sorted_uni_arms:
-       counts=0
-       seqs=0
-       for x in collapsed_mirnas:
-           if y[0] in x[2].split("_")[0]:
-              counts+=int(x[0].split("-")[1])
-              seqs+=1
-
-       y[1]=seqs
-       y[2]=counts
-
-
-    # Output variables
-    temp_mirna_names=[]
-
-    l.acquire()
-
-    if case == "c" or case == "t":
-       temp_mirna_names.extend(z[2] for z in collapsed_mirnas)
-       names_n_seqs.extend([[y[2],y[9]] for y in collapsed_mirnas])
-       deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in collapsed_mirnas])
-       mirna_names.extend(temp_mirna_names)
-       unmap_seq.value += sum([1 for x in main_sam if x[1] == '4'])     # Keeps the unmap unique sequences for the production of a graph
-       unmap_counts.value += sum([int(x[0].split("-")[1]) for x in main_sam if x[1] == '4'])    # Keeps the unmap counts of sequences for the production of a graph
-       file_order.append(file)    #Keeps the names of SAM files with the order of reading by the fuction (avoid problems due to multiprocesssing)
-       samples.append(collapsed_mirnas)         # return the processed detected miRNAs
-       data.append([case,file,collapsed_mirnas,sorted_uni_arms])
-       ini_sample.append(filter_sam)    # returns the filtered sam file
-
-    l.release()
-
-
-######################################################################################################################################
-
-
-"""
-
-Read a sam file from Bowtie and do the followings:
-
-1) Remove reverse stranded mapped reads
-2) Remove unmapped reads 
-3) Remove all sequences with reads less than 11 reads
-4) Sort the arms with the most sequences in decreading rate
-5) Sort the sequences of every arm with the most reads in decreasing rate
-6) Calculate total number of sequences of every arm
-7) Calculate total number of reads of sequences of every arm.
-8) Store all the informations in a txt file 
-
-"""
-
-def non_sam_edit(mature_mirnas,path,file,case,l,data,file_order,n_deseq,names_n_seqs):
-
-    # read the sam file
-    ini_sam=read(path,0)
-    main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
-    unique_seq=[]
-    unique_seq = [x for x in main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
-
-    uni_seq=[]
-
-    # Calculate the shifted positions for every non template mirna and add them to the name of it
-    sorted_uni_arms = []
-    for i in range(1,len(mature_mirnas),2):
-        tmp_count_reads = 0   # calculate the total number of reads
-        tmp_count_seq = 0     # calculate the total number of sequences
-
-        for j in range(len(unique_seq)):
-
-            temp_mature = mature_mirnas[i].strip().replace("U", "T")
-
-            # Detection of differences between the canonical miRNA and the detected non template miRNA
-            if temp_mature in unique_seq[j][9]:
-
-                off_part = longestSubstring(temp_mature, unique_seq[j][9])
-
-                mat_diff = temp_mature.split(off_part)
-                mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
-
-                unique_diff = unique_seq[j][9].split(off_part)
-                if len(unique_diff)<=2:
-                   unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
-
-                   pre_pos = mat_diff[0]-unique_diff[0]
-                   post_pos = unique_diff[1]-mat_diff[1]
-
-                   lengthofmir = len(off_part) + post_pos
-                   if pre_pos == 0 and post_pos<4:
-                      tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
-                      tmp_count_seq = tmp_count_seq + 1
-
-                      t_name=unique_seq[j].copy()
-                      t_name[2]=mature_mirnas[i - 1].split(" ")[0][1:] + "_nont_" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
-                      uni_seq.append(t_name)
-        # metrics for the production of database
-        if tmp_count_reads != 0 and tmp_count_seq != 0:
-            sorted_uni_arms.append([mature_mirnas[i-1].split(" ")[0][1:], tmp_count_seq, tmp_count_reads])
-
-    sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
-    unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
-
-    # Output variables
-    l.acquire()
-    if case=="c" or case=="t":
-       names_n_seqs.extend([[y[2],y[9]] for y in unique_seq if y[2]!="*"])
-       n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
-       file_order.append(file)
-       data.append([case,file,unique_seq,sorted_uni_arms])
-    l.release()
-
-#################################################################################################################################################################################################################
-
-def black_white(mirna_names_1,mirna_names_2,group,manager):
-
-    add_names = [x for x in mirna_names_1 if x not in mirna_names_2]
-    add_names.sort()
-    add_names = list(add_names for add_names,_ in itertools.groupby(add_names))
-
-    group.sort()
-    group = list(group for group,_ in itertools.groupby(group))
-
-    zeros=["0"]*(len(group[0])-2)
-    [add_names[i].extend(zeros) for i,_ in enumerate(add_names)]
-    group=group+add_names
-
-    manager.extend(group)
-
-################################################################################################################################################################################################################################
-
-def merging_dupes(group,f_dupes):
-
-    dupes=[]
-    final_mat =[]
-
-    for num,_ in enumerate(group):
-
-        if group[num][1] not in final_mat and group[num][0] not in final_mat:
-           final_mat.append(group[num][1])
-           final_mat.append(group[num][0])
-        else:
-           dupes.append(group[num][1])
-
-
-    dupes=list(set(dupes))
-
-    dupes=[[x] for x in dupes]
-
-    for x in group:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]==x[0].split("_")[0]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-    f_dupes.extend(dupes)
-
-##########################################################################################################################################################################################################################################
-
-def apply_merging_dupes(group,dupes,managger):
-
-    for x in group:
-     for y in dupes:
-         if x[1]==y[0]:
-            x[0]=y[1]
-
-    group.sort()
-    group=list(group for group,_ in itertools.groupby(group))
-    managger.extend(group)
-
-###############################################################################################################################################################################################################################
-
-
-def filter_low_counts(c_group,t_group,fil_c_group,fil_t_group,per,counts):
-
-    t_group_new=[]
-    c_group_new=[]
-
-    percent=int(per)/100
-    c_col_filter=round(percent*(len(c_group[1])-2))
-    t_col_filter=round(percent*(len(t_group[1])-2))
-
-    for i, _ in enumerate(c_group):
-        c_cols=0
-        t_cols=0
-
-        c_cols=sum([1 for j in range(len(c_group[i])-2) if int(c_group[i][j+2])>=int(counts)])
-        t_cols=sum([1 for j in range(len(t_group[i])-2) if int(t_group[i][j+2])>=int(counts)])
-
-        if c_cols>=c_col_filter or t_cols>=t_col_filter:
-           t_group_new.append(t_group[i])
-           c_group_new.append(c_group[i])
-
-    fil_c_group.extend(c_group_new)
-    fil_t_group.extend(t_group_new)
-
-##################################################################################################################################################################################################################
-
-
-def write_main(raw_con, raw_tre, fil_con, fil_tre, con_file_order, tre_file_order, flag, group_name1, group_name2, per):
-
- if flag == 1 and int(per)!=-1:
-    fp = open('Counts/Filtered '+group_name2 +' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in tre_file_order:
-       fp.write("\t"+y)
-
-    for x in fil_tre:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+group_name1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in con_file_order:
-       fp.write("\t"+y)
-
-    for x in fil_con:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2 and int(per)!=-1:
-    fp = open('Counts/Filtered '+group_name2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in tre_file_order:
-       fp.write("\t"+y)
-
-
-    for x in fil_tre:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Filtered '+group_name1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in con_file_order:
-       fp.write("\t"+y)
-
-    for x in fil_con:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 1:
-    fp = open('Counts/Raw '+group_name2+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in tre_file_order:
-       fp.write("\t"+y)
-
-    for x in raw_tre:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+group_name1+' Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in con_file_order:
-       fp.write("\t"+y)
-
-    for x in raw_con:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
- if flag == 2:
-    fp = open('Counts/Raw '+group_name2+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in tre_file_order:
-       fp.write("\t"+y)
-
-
-    for x in raw_tre:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-    fp = open('Counts/Raw '+group_name1+' Non-Templated Counts', 'w')
-    fp.write("Name\t")
-    fp.write("Sequence")
-    for y in con_file_order:
-       fp.write("\t"+y)
-
-    for x in raw_con:
-        fp.write("\n%s" % "\t".join(x))
-    fp.close()
-
-
-#########################################################################################################################################
-
-def temp_counts_to_diff(names,samp,folder):
-
-    for i in range(2,len(samp[0])):
-
-       fp = open(folder+names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+names[i-2]+"\n")
-
-       for x in samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-       fp.close()
-
-##################################################################################################################
-
-def DB_write(con,name,unique_seq,sorted_uni_arms,f):
-
- if f==1:
-    # Write a txt file with all the information
-    if con=="c":
-       fp = open('split1/'+name, 'w')
-
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split2/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-
-            if sorted_uni_arms[i][0] in unique_seq[j][2].split("_")[0]:
-
-                temp.append(unique_seq[j])
-
-        temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-        fp.write("*********************************************************************************************************\n")
-        fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-        fp.write("*********************************************************************************************************\n\n")
-        [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-        fp.write("\n" + "\n")
-    fp.close()
-
- if f==2:
-
-    if con=="c":
-       fp = open('split3/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-    if con=="t":
-       fp = open('split4/'+name, 'w')
-       fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
-
-
-    for i in range(len(sorted_uni_arms)):
-        temp = []
-        for j in range(len(unique_seq)):
-               if sorted_uni_arms[i][0]==unique_seq[j][2].split("_nont_")[0]:
-                  temp.append(unique_seq[j])
-        if temp!=[]:
-           temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
-           fp.write("*********************************************************************************************************\n")
-           fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
-           fp.write("*********************************************************************************************************\n\n")
-           [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
-           fp.write("\n" + "\n")
-    fp.close()
-
-
-##########################################################################################################################
-
-def new_mat_seq(pre_unique_seq,mat_mirnas,l):
-
-    unique_iso = []
-    for x in pre_unique_seq:
-       if len(x[2].split("_"))==3:
-          for y in pre_unique_seq:
-              if x[2] in y[2] and int(x[0].split("-")[1])<int(y[0].split("-")[1]):
-                 if any(y[2] in lst2 for lst2 in unique_iso)==False:
-                    y[2]=">"+y[2]
-                    unique_iso.append(y)
-    l.acquire()
-    for x in unique_iso:
-        mat_mirnas.append(x[2])
-        mat_mirnas.append(x[9])
-    l.release()
-
-#########################################################################################################################
-
-def merging_names(ini_mat,new):
-
-    dupes=[]
-    final_mat =[]
-
-    for num in range(len(ini_mat)):
-
-        if ini_mat[num][1] not in final_mat and ini_mat[num][0] not in final_mat:
-           final_mat.append(ini_mat[num][1])
-           final_mat.append(ini_mat[num][0])
-        else:
-           dupes.append(ini_mat[num][1])
-
-    dupes=list(set(dupes))
-
-    for i in range(len(dupes)):
-        dupes[i]=[dupes[i]]
-
-    for x in ini_mat:
-        for y in dupes:
-            if x[1]==y[0]:
-               fl=0
-               if len(y)==1:
-                  y.append(x[0])
-               else:
-                  for i in range(1,len(y)):
-                      if y[i].split("_")[0]==x[0].split("_")[0]:
-                         fl=1
-                         if len(x[0])<len(y[i]):
-                            del y[i]
-                            y.append(x[0])
-                            break
-
-                  if fl==0:
-                     y.append((x[0]))
-
-    for y in dupes:
-        if len(y)>2:
-           for i in range(len(y)-1,1,-1):
-               y[1]=y[1]+"/"+y[i]
-               del y[i]
-
-
-    for x in ini_mat:
-        for y in dupes:
-            if x[1]==y[0]:
-               x[0]=y[1]
-
-    ini_mat.sort()
-    ini_mat=list(ini_mat for ini_mat,_ in itertools.groupby(ini_mat))
-
-    new.extend(ini_mat)
-
-
-######################################################################################################################################################
-
-def nontemp_counts_to_diff(tem_names,tem_samp,non_names,non_samp,folder):
-
-    for i in range(2,len(tem_samp[0])):
-
-       fp = open(folder+tem_names[i-2]+'.txt','w')
-       fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
-
-       for x in tem_samp:
-           fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
-
-       for j in range(len(non_names)):
-           if non_names[j]==tem_names[i-2]:
-              for x in non_samp:
-                  fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
-       fp.close()
-
-###################################################################################################################################################################################################################
-
-"""
-
-This function downloads all the miRNAs of all the species from MirBase
-and filters them by the requested organism
-
-input : Organism
-output: A list with the miRNA sequences in fasta format
-
-"""
-
-def download_matures(matures,org_name):
-
-    url = 'https://www.mirbase.org/ftp/CURRENT/mature.fa.gz'
-    data = urllib.request.urlopen(url).read()
-    file_mirna = gzip.decompress(data).decode('utf-8')
-    file_mirna = file_mirna.split("\n")
-
-    for i in range(0,len(file_mirna)-1,2):
-
-        if org_name in file_mirna[i]:
-           matures.append(file_mirna[i])
-           matures.append(file_mirna[i+1])
-
-###################################################################################################################################################################################################################
-
-
-"""
-
-This function keeps all mirna isoforms which are detected on SAM files from the first part of the analysis
-These isoforms will be used as refence sequences with canonical (ref) mirnas for the detection of non-template
-mirnas
-
-"""
-
-
-def non_template_ref(c_samples,t_samples,all_isoforms):
-
-  pre_uni_seq_con = list(c_samples)
-  pre_uni_seq_tre = list(t_samples)
-
-  for x in pre_uni_seq_con:
-      for y in x:
-          #if ">"+y[2] not in all_isoforms and ")_" in y[2] :
-           if ">"+y[2] not in all_isoforms and "_t_" in y[2] :
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-  for x in pre_uni_seq_tre:
-      for y in x:
-          #if ">"+y[2] not in all_isoforms and ")_" in y[2]:
-           if ">"+y[2] not in all_isoforms and "_t_" in y[2] :
-             all_isoforms.append(">"+y[2])
-             all_isoforms.append(y[9])
-
-################################################################################################################################################################################################
-
-"""
-
-This function adds the uncommon detected miRNAs among samples.
-As a result all samples will have the same length.
-
-"""
-
-def uncommon_mirnas(sample,mir_names,l,new_d,sample_name,sample_order):
-
-    for y in mir_names:
-        flag=0
-        for x in sample:
-            if y[0]==x[0]: # check if miRNA exists in the sample
-               flag=1
-               break
-        if flag==0:
-           sample.append([y[0],"0",y[1]]) # add the name of mirna to the sample with zero counts and its sequence
-
-    # sorting and remove duplicates
-    sample.sort(key=lambda x: x[0])
-    sample=list(sample for sample,_ in itertools.groupby(sample))
-
-    # Return the updated sample
-    l.acquire()
-    new_d.append(sample)
-    sample_order.append(sample_name)
-    l.release()
-
-###############################################################################################################################################################################################
-