annotate MolD_v1.4.py @ 0:4e8e2f836d0f draft default tip

planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
author itaxotools
date Sun, 29 Jan 2023 16:25:48 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
1 """
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
2 This script compiles rDNC-based DNA diagnoses for a pre-defined taxa in a dataset. This is the MAIN WORKING VERSION v1.4
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
3 This version already implements the new functionalities:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
4 - automatic trimming of the alignment to match the median sequence length (should seq len distribution and provided NumberN settings require that)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
5 - expanded qCLADEs setting
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
6 - diagnoses in pairwise comparisons of taxa.
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
7 - selection of a reference sequence for indexing DNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
8
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
9 THIS version LACKS SPART compatibility
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
10
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
11 """
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
12 import os, sys
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
13 import random
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
14 import argparse
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
15 from io import StringIO
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
16 ######################################################################## FUNCTIONS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
17 #***STEP 1 - SORTING ENTRIES BY CLADE AND IDENTIFYING NUCLEOTIDE POSITIONS SHARED WITHIN CLADE
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
18 def Step1(raw_records):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
19 Clades=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
20 for i in range(len(raw_records)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
21 Clade=raw_records[i][1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
22 if Clade not in Clades:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
23 Clades.append(Clade)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
24 clade_sorted_seqs = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
25 for letter in Clades:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
26 clade_sorted_seqs[letter]=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
27 for i in range(len(raw_records)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
28 if raw_records[i][1]==letter:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
29 clade_sorted_seqs[letter].append(raw_records[i][2])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
30 shared_positions={}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
31 for key in clade_sorted_seqs:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
32 sh_pos=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
33 for i in range(len(clade_sorted_seqs[key][0])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
34 shared_nucleotide = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
35 csm = clade_sorted_seqs[key][0][i] #candidate shared nucleotide
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
36 for j in range(1, len(clade_sorted_seqs[key])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
37 if clade_sorted_seqs[key][j][i] != csm:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
38 shared_nucleotide = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
39 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
40 if shared_nucleotide == True and csm != 'N':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
41 sh_pos.append(i)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
42 shared_positions[key]=sh_pos
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
43 return Clades, clade_sorted_seqs, shared_positions
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
44
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
45 #***STEP 2 COMPILING COMPARISON LISTS FOR CLADES AND IDENTIFYING VARIABLE POSITIONS AND N PRIORITY POSITIONS WITH LARGEST CUTOFFS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
46 def C_VP_PP(clade_sorted_seqs, clade, shared_positions, CUTOFF):# complist_variable_positions_priority_positions; Arguments: dictionary, string, dictionary
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
47 CShN={}#a dictionary keys - clade shared positions, values - nucleotides at those positions
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
48 for pos in shared_positions[clade]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
49 CShN[pos] = clade_sorted_seqs[clade][0][pos]#creates a dictionary shared position : nucleotide
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
50 complist=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
51 for key in clade_sorted_seqs:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
52 if key != clade:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
53 complist = complist + clade_sorted_seqs[key]#creates a list of all other sequences for comparison
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
54 cutoffs = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
55 pures = []####! newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
56 for key in CShN:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
57 newcomplist = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
58 for k in complist:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
59 if k[key] == CShN[key]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
60 newcomplist.append(k)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
61 else: continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
62 cutoffs[key] = len(complist) - len(newcomplist)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
63 if len(newcomplist) == 0:####! newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
64 pures.append(key)####! newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
65 CPP = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
66 for key in sorted(cutoffs, key = cutoffs.get, reverse = True):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
67 CPP.append(key)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
68 if CUTOFF[0] == '>':#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
69 Clade_priority_positions = {pos:CShN[pos] for pos in CPP if cutoffs[pos] > int(CUTOFF[1:])}#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
70 else:#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
71 Clade_priority_positions = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
72 for position in CPP[:int(CUTOFF)]:#Here you define how many of the clade shared combinations are used in subsequent search
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
73 Clade_priority_positions[position] = CShN[position]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
74 return complist, Clade_priority_positions, cutoffs, pures####! pures added
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
75
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
76 #***STEPS 3 RANDOM SEARCH ACROSS PRIORITY POSITIONS TO FIND RAW DIAGNOSTIC COMBINATIONS AND TO SUBSEQUENTLY REFINE THEM
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
77 def random_position(somelist, checklist):#gives a random index (integer) of the specified range, and returns indexed somelist element if it is not present in the checklist
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
78 while True:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
79 i = random.randint(0, len(somelist) - 1)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
80 if somelist[i] not in checklist:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
81 return somelist[i]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
82 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
83 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
84 continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
85
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
86 def step_reduction_complist(clade, complist, CPP, checked_ind):#checks randomly selected positions of CladeSharedNucleotides with sequences of other clades, until a diagnostic combination of nucleotides for a selected clade is found.
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
87 if len(complist) == 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
88 return checked_ind
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
89 elif len(checked_ind) == len(CPP):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
90 return checked_ind
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
91 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
92 newcomplist = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
93 pos = random_position(list(CPP.keys()), checked_ind)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
94 for j in complist:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
95 if j[pos] == CPP[pos] or j[pos] == 'N':#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
96 newcomplist.append(j)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
97 else: continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
98 new_checked_ind = checked_ind + [pos]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
99 return step_reduction_complist(clade, newcomplist, CPP, new_checked_ind)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
100
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
101 def ConditionD(newcomb, complist, CPP):#The function checks the 'Condition D' - i.e. whither any given combination of nucleotide positions is diagnostic for the selected clade
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
102 ContD = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
103 for i in newcomb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
104 newcomplist = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
105 for m in complist:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
106 if m[i] == CPP[i]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
107 newcomplist.append(m)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
108 else: continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
109 complist = newcomplist
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
110 if len(complist) == 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
111 ContD = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
112 return ContD
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
113
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
114 def RemoveRedundantPositions(raw_comb, complist, CPP):# The function removes positions from the raw combinations one by one, and then checks whether new combination fulfills the condition D, thus recursively reducing the diagnostic combination.
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
115 red_possible = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
116 for j in raw_comb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
117 newcomb = [k for k in raw_comb if k != j]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
118 if ConditionD(newcomb, complist, CPP) == True:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
119 red_possible = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
120 return RemoveRedundantPositions(newcomb, complist, CPP)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
121 else: pass
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
122 if red_possible == False:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
123 return raw_comb
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
124
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
125 #PUTS EVERYTHING TOGETHER - 20000 ROUNDS OF RANDOM SEARCH FOLLOWED BY REFINING OF 500 SHORTEST COMBINATIONS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
126 def Diagnostic_combinations(qCLADE, complist, CPP, n1, maxlen1, maxlen2):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
127 Achecked_ind = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
128 bestlists = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
129 n = n1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
130 while n>0:#STEP3 proposes raw diagnostic combinations
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
131 raw_comb = step_reduction_complist(qCLADE, complist, CPP, Achecked_ind)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
132 if len(raw_comb) <= maxlen1:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
133 refined_comb = RemoveRedundantPositions(raw_comb, complist, CPP)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
134 if len(refined_comb) <= maxlen2 and sorted(refined_comb) not in bestlists:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
135 bestlists.append(sorted(refined_comb))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
136 n=n-1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
137 bestlists.sort(key=len)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
138 return bestlists
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
139
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
140 #***STEP 4 ANALYSIS OF OUTPUT rDNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
141 def IndependentKey(diagnostic_combinations):#PRESENTLY NOT INVOLVED - returns independent diagnostic nucleotide combinations, and identifies key nucleotide positions
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
142 independent_combinations = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
143 selected_positions = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
144 for i in range(len(diagnostic_combinations)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
145 if len(selected_positions) == 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
146 for j in range(0, i):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
147 if len(set(diagnostic_combinations[i]) & set(diagnostic_combinations[j])) == 0 and len(set(diagnostic_combinations[i]) & set(selected_positions)) == 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
148 independent_combinations.append(diagnostic_combinations[i])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
149 independent_combinations.append(diagnostic_combinations[j])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
150 for k in range(len(diagnostic_combinations[i])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
151 selected_positions.append(diagnostic_combinations[i][k])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
152 for l in range(len(diagnostic_combinations[j])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
153 selected_positions.append(diagnostic_combinations[j][l])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
154 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
155 if len(set(diagnostic_combinations[i]) & set(selected_positions)) == 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
156 independent_combinations.append(diagnostic_combinations[i])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
157 for k in range(len(diagnostic_combinations[i])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
158 selected_positions.append(diagnostic_combinations[i][k])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
159 independent_combinations.sort(key=len)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
160 key_positions = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
161 for pos in diagnostic_combinations[0]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
162 KP = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
163 for combination in diagnostic_combinations[1:]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
164 if pos not in combination:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
165 KP = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
166 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
167 else: continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
168 if KP == True:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
169 key_positions.append(pos)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
170 return independent_combinations, key_positions
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
171
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
172 #SPECIFIC FUNCTIONS FOR THE rDNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
173 def PositionArrays(Motifs):#VERYNEW ALL FUNCTION NEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
174 PositionArrays = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
175 VarPosList = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
176 for i in range(len(Motifs[0])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
177 Const = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
178 array = [Motifs[0][i]]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
179 for j in range(len(Motifs[1:])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
180 if Motifs[j][i] != 'N':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
181 if Motifs[j][i] != array[-1]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
182 Const = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
183 array.append(Motifs[j][i])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
184 PositionArrays.append(array)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
185 if Const == False:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
186 VarPosList.append(i)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
187 return PositionArrays, VarPosList
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
188
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
189 def random_sequence_new(SEQ, PositionArrays, VarPosList, Pdiff):#VERYNEW FUNCTION REVISED
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
190 #print(["ROUND", len(SEQ)*Pdiff/100, round(len(SEQ)*Pdiff/100)])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
191 n = round(len(SEQ)*Pdiff/100)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
192 N = random.sample(list(range(1, n)), 1)[0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
193 PosToChange = random.sample([p for p in VarPosList if SEQ[p] != 'D'], N)#this is a new definition to keep alignment gaps unchanged
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
194 NEWSEQ = ''
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
195 for i in range(len(SEQ)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
196 if i not in PosToChange:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
197 NEWSEQ = NEWSEQ + SEQ[i]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
198 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
199 newarray = [j for j in PositionArrays[i] if j != SEQ[i]]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
200 newbase = random.sample(newarray, 1)[0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
201 NEWSEQ = NEWSEQ + newbase
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
202 return NEWSEQ
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
203
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
204 def GenerateBarcode_new(Diagnostic_combinations, length):#VERYNEW FUNCTION REVISED - This function calculates diagnostic combinations and assembles a barcode of desired length for a query taxon. First all single position DNCs are added, then based on the frequency of a nucleotide position in the DNCs of the 2 positions, and then based on the frequency of a position in longer DNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
205 len1 = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
206 len2 = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
207 lenmore = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
208 for comb in Diagnostic_combinations:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
209 if len(comb) == len(Diagnostic_combinations[0]):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
210 for i in comb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
211 len1.append(i)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
212 elif len(comb) == len(Diagnostic_combinations[0])+1:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
213 for j in comb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
214 len2.append(j)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
215 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
216 for k in comb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
217 lenmore.append(k)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
218 if len(Diagnostic_combinations[0]) == 1:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
219 Setin = len1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
220 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
221 Setin = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
222 for pos in sorted(len1, key=len1.count, reverse = True):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
223 if not pos in Setin:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
224 Setin.append(pos)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
225 for pos1 in sorted(len2, key=len2.count, reverse = True):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
226 if not pos1 in Setin:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
227 Setin.append(pos1)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
228 for pos2 in sorted(lenmore, key=lenmore.count, reverse = True):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
229 if not pos2 in Setin:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
230 Setin.append(pos2)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
231 return Setin[:length]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
232
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
233 def Screwed_dataset_new(raw_records, nseq_per_clade_to_screw, PositionArrays, VarPosList, Percent_difference, Taxon, Cutoff):#VERYNEW FUNCTION REVISED
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
234 clades=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
235 for i in range(len(raw_records)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
236 Clade=raw_records[i][1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
237 if Clade not in clades:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
238 clades.append(Clade)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
239 clade_sorted_seqs = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
240 for letter in clades:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
241 clade_sorted_seqs[letter]=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
242 for i in range(len(raw_records)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
243 if raw_records[i][1]==letter:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
244 clade_sorted_seqs[letter].append(raw_records[i][2])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
245
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
246 for clade in clades:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
247 seqlist = clade_sorted_seqs[clade]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
248 newseqs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
249 if len(seqlist) > nseq_per_clade_to_screw:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
250 iSTS = random.sample(list(range(len(seqlist))), nseq_per_clade_to_screw)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
251 for k in range(len(seqlist)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
252 if k in iSTS:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
253 newseq = random_sequence_new(seqlist[k], PositionArrays, VarPosList, Percent_difference)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
254 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
255 newseq = seqlist[k]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
256 newseqs.append(newseq)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
257 elif len(clade_sorted_seqs[clade]) == nseq_per_clade_to_screw:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
258 for k in range(len(seqlist)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
259 newseq = random_sequence_new(seqlist[k], PositionArrays, VarPosList, Percent_difference)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
260 newseqs.append(newseq)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
261 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
262 for i in range(nseq_per_clade_to_screw):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
263 seq = random.sample(seqlist, 1)[0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
264 newseq = random_sequence_new(seq, PositionArrays, VarPosList, Percent_difference)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
265 newseqs.append(newseq)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
266 clade_sorted_seqs[clade] = newseqs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
267
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
268 shared_positions={}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
269 for key in clade_sorted_seqs:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
270 sh_pos=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
271 for i in range(len(clade_sorted_seqs[key][0])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
272 shared_nucleotide = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
273 csm = clade_sorted_seqs[key][0][i] #candidate shared nucleotide
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
274 for j in range(1, len(clade_sorted_seqs[key])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
275 if clade_sorted_seqs[key][j][i] != csm:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
276 shared_nucleotide = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
277 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
278 if shared_nucleotide == True and csm != 'N':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
279 sh_pos.append(i)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
280 shared_positions[key]=sh_pos
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
281
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
282 x,y,z,pures = C_VP_PP(clade_sorted_seqs, Taxon, shared_positions, Cutoff)#STEP2####
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
283 return x, y
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
284
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
285 #NEWFUNCYIONS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
286 def medianSeqLen(listofseqs):#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
287 seqlens = [i.count('A')+i.count('C')+i.count('G')+i.count('T') for i in listofseqs]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
288 medlen = sorted(seqlens)[int(len(seqlens)/2)]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
289 medseq = listofseqs[seqlens.index(medlen)]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
290 start = min([medseq.find('A'),medseq.find('C'),medseq.find('G'),medseq.count('T')])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
291 if not 'N' in medseq[start:]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
292 end = len(medseq)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
293 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
294 for i in range(start, len(medseq), 1):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
295 if medseq[i:].count('N') == len(medseq[i:]):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
296 end = i
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
297 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
298 return medlen, start, end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
299
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
300 def getAllPairs(taxalist):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
301 uniquetaxapairs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
302 stl = sorted(list(set(taxalist)))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
303 for i in range(len(stl)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
304 for j in range(i+1, len(stl)):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
305 uniquetaxapairs.append([stl[i],stl[j]])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
306 return uniquetaxapairs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
307
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
308
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
309 ################################################READ IN PARAMETER FILE AND DATA FILE
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
310
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
311 def get_args(): #arguments needed to give to this script
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
312 parser = argparse.ArgumentParser(description="run MolD")
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
313 required = parser.add_argument_group("required arguments")
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
314 required.add_argument("-i", help="textfile with parameters of the analysis", required=True)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
315 return parser.parse_args()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
316
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
317 def mainprocessing(gapsaschars=None, taxalist=None, taxonrank=None, cutoff=None, numnucl=None, numiter=None, maxlenraw=None, maxlenrefined=None, iref=None, pdiff=None, nmax=None, thresh=None, tmpfname=None, origfname=None):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
318 ParDict = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
319 if not(all([gapsaschars, taxalist, taxonrank, cutoff, numnucl, numiter, maxlenraw, maxlenrefined, iref, pdiff, nmax, thresh, tmpfname])):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
320 args = get_args()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
321 with open(args.i) as params:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
322 for line in params:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
323 line = line.strip()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
324 if line.startswith('#'):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
325 pass
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
326 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
327 if len(line.split('=')) == 2 and len(line.split('=')[1]) != 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
328 ParDict[line.split('=')[0]] = line.split('=')[1].replace(' ', '')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
329 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
330 ParDict['Gaps_as_chars'] = gapsaschars
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
331 ParDict['qTAXA'] = taxalist
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
332 ParDict['Taxon_rank'] = taxonrank
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
333 ParDict['INPUT_FILE'] = tmpfname
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
334 ParDict['ORIG_FNAME'] = origfname# I do not understand this ORIG_FNAME, it throws an error with the command line
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
335 ParDict['Cutoff'] = cutoff
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
336 ParDict['NumberN'] = numnucl
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
337 ParDict['Number_of_iterations'] = numiter
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
338 ParDict['MaxLen1'] = maxlenraw
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
339 ParDict['MaxLen2'] = maxlenrefined
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
340 ParDict['Iref'] = iref
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
341 ParDict['Pdiff'] = pdiff
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
342 #ParDict['PrSeq'] = prseq
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
343 ParDict['NMaxSeq'] = nmax
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
344 ParDict['Scoring'] = thresh
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
345 ParDict['OUTPUT_FILE'] = "str"
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
346 print(ParDict)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
347 ############################################# #VERYNEW HOW GAPS ARE TREATED
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
348 #REQUIRES A NEW FIELD IN THE GUI
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
349 if ParDict['Gaps_as_chars'] == 'yes':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
350 gaps2D = True#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
351 else:#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
352 gaps2D = False#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
353
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
354 ############################################ DATA FILE
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
355 checkread = open(ParDict['INPUT_FILE'], 'r')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
356 firstline = checkread.readline()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
357 checkread.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
358 imported=[]#set up a new list with species and identifiers
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
359 if '>' in firstline:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
360 f = open(ParDict['INPUT_FILE'], 'r')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
361 for line in f:#VERYNEW - THE DATA READING FROM ALIGNMENT FILE IS ALL REVISED UNTIL f.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
362 line=line.rstrip()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
363 if line.startswith('>'):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
364 data = line[1:].split('|')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
365 if len(data) != 2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
366 print('Check number of entries in', data[0])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
367 #break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
368 data.append('')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
369 imported.append(data)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
370 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
371 if gaps2D == True:#
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
372 DNA = line.upper().replace('-', 'D').replace('R', 'N').replace('Y', 'N').replace('S','N').replace('W','N').replace('M','N').replace('K','N')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
373 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
374 DNA = line.upper().replace('-', 'N').replace('R', 'N').replace('Y', 'N').replace('S','N').replace('W','N').replace('M','N').replace('K','N')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
375 imported[-1][-1] = imported[-1][-1]+DNA
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
376 f.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
377 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
378 f = open(ParDict['INPUT_FILE'], 'r')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
379 for line in f:#VERYNEW - THE DATA READING FROM ALIGNMENT FILE IS ALL REVISED UNTIL f.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
380 line=line.rstrip()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
381 data = line.split('\t')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
382 if len(data) != 3:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
383 print('Check number of entries in', data[0])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
384 ID = data[0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
385 thetax = data[1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
386 if gaps2D == True:#
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
387 DNA = data[2].upper().replace('-', 'D').replace('R', 'N').replace('Y', 'N').replace('S','N').replace('W','N').replace('M','N').replace('K','N')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
388 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
389 DNA = data[2].upper().replace('-', 'N').replace('R', 'N').replace('Y', 'N').replace('S','N').replace('W','N').replace('M','N').replace('K','N')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
390 imported.append([ID, thetax, DNA])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
391 f.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
392
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
393 if 'NumberN' in list(ParDict.keys()):#How many ambiguously called nucleotides are allowed
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
394 NumberN = int(ParDict['NumberN'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
395 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
396 NumberN = 5
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
397
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
398 if len(set([len(i[2]) for i in imported])) != 1:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
399 print('Alignment contains sequences of different lengths:', set([len(i[2]) for i in imported]))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
400 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
401 mlen, sstart, send = medianSeqLen([i[2] for i in imported])#OCT2022 - start
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
402 if mlen + NumberN < len(imported[0][2]):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
403 Slice = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
404 FragmentLen = mlen
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
405 corr = sstart
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
406 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
407 Slice = False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
408 FragmentLen = len(imported[0][2])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
409 sstart = 0
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
410 send = FragmentLen+1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
411 corr = 0
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
412 raw_records=[]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
413 for i in imported:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
414 if ParDict['Iref'] != 'NO' and ParDict['Iref'].split(',')[0] == i[0] and ParDict['Iref'].split(',')[1] in ['ex', 'excl', 'out']:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
415 continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
416 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
417 if i[2][sstart:send].count('N') < NumberN and len(i[2][sstart:send]) == FragmentLen:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
418 newi = [i[0], i[1], i[2][sstart:send]]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
419 raw_records.append(newi)#OCT2022 - end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
420 print('\n########################## PARAMETERS ######################\n')#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
421 #print('input file:', ParDict['ORIG_FNAME']) #Outcommented ORIG_FNAME
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
422 print('input file:', ParDict['INPUT_FILE']) #Replacement of the line above
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
423 print('Coding gaps as characters:', gaps2D)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
424 print('Maximum undetermined nucleotides allowed:', NumberN)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
425 print('Length of the alignment:', len(imported[0][2]),'->', FragmentLen)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
426 print('Indexing reference:', ParDict['Iref'].replace('NO', 'Not set').replace('in', 'included').replace('ex', 'excluded'))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
427 print('Read in', len(raw_records), 'sequences')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
428
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
429 PosArrays, VarPosList = PositionArrays([i[2] for i in raw_records])#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
430
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
431 #############################################READ IN OTHER ANALYSIS PARAMETERS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
432 ##OCT2022 - start
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
433 withplus = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
434 P2 = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
435 shift = True
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
436 if ParDict['qTAXA'][0] == '>':#THIS OPTION DIAGNOSES ALL TAXA WITH MORE THAN USER-DEFINED NUMBER OF SEQUENCES AVAILABLE
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
437 NumSeq = int(ParDict['qTAXA'][1:])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
438 Taxarecords = [i[1] for i in raw_records]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
439 qCLADEs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
440 for j in set(Taxarecords):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
441 if Taxarecords.count(j) >= NumSeq:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
442 qCLADEs.append(j)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
443 elif ParDict['qTAXA'].startswith('P:'):#THIS OPTION DIAGNOSES ALL TAXA CONTAINING A USER-DEFINED PATTERN IN THE NAME
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
444 pattern = ParDict['qTAXA'].split(':')[1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
445 Taxarecords = [i[1] for i in raw_records]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
446 qCLADEs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
447 for j in set(Taxarecords):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
448 if pattern in j:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
449 qCLADEs.append(j)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
450 elif ParDict['qTAXA'].startswith('P+:'):#THIS OPTION POOLS ALL TAXA CONTAINING A USER-DEFINED PATTERN IN THE NAME IN ONE TAXON AND DIAGNOSES IT
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
451 pattern = ParDict['qTAXA'].split(':')[1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
452 Taxarecords = set([i[1] for i in raw_records if pattern in i[1]])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
453 spp = '+'.join(sorted(list(Taxarecords)))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
454 qCLADEs = [spp]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
455 nrecords = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
456 for rec in raw_records:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
457 if rec[1] in Taxarecords:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
458 nrecords.append([rec[0], spp, rec[2]])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
459 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
460 nrecords.append(rec)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
461 raw_records = nrecords
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
462 else:#THIS OPTION DIAGNOSES ALL TAXA FROM A USER-DEFINED LIST; TAXA MAY BE COMBINED BY USING '+'
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
463 qCLADEs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
464 allrecs = ParDict['qTAXA'].split(',')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
465 for item in allrecs:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
466 if item in ['ALL', 'All', 'all']:#THIS OPTION DIAGNOSES ALL TAXA IN THE DATASET
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
467 qCLADEs = list(set([i[1] for i in raw_records]))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
468 elif item in [i[1] for i in raw_records]:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
469 qCLADEs.append(item)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
470 elif '+' in item:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
471 withplus.append(item)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
472 elif 'VS' in item:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
473 P2.append(item.split('VS'))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
474 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
475 print('UNRECOGNIZED TAXON', item)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
476 #OCT2022 - end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
477 print('query taxa:', len(qCLADEs+withplus), '-', str(sorted(qCLADEs)+sorted(withplus)).replace('[','').replace(']','').replace("'", ''))#1.3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
478
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
479 if 'Cutoff' in list(ParDict.keys()):#CUTOFF Number of the informative positions to be considered, default 100
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
480 Cutoff = ParDict['Cutoff']#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
481 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
482 Cutoff = 100
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
483 print('Cutoff set as:', Cutoff)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
484 if 'Number_of_iterations' in list(ParDict.keys()):#Number iterations of MolD
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
485 N1 = int(ParDict['Number_of_iterations'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
486 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
487 N1 = 10000
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
488 print('Number iterations of MolD set as:', N1)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
489
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
490 if 'MaxLen1' in list(ParDict.keys()):#Maximum length for the raw mDNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
491 MaxLen1 = int(ParDict['MaxLen1'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
492 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
493 MaxLen1 = 12
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
494 print('Maximum length of raw mDNCs set as:', MaxLen1)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
495
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
496 if 'MaxLen2' in list(ParDict.keys()):#Maximum length for the refined mDNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
497 MaxLen2 = int(ParDict['MaxLen2'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
498 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
499 MaxLen2 = 7
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
500 print('Maximum length of refined mDNCs set as:', MaxLen2)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
501
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
502 if 'Pdiff' in list(ParDict.keys()):#Percent difference
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
503 Percent_difference = float(ParDict['Pdiff'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
504 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
505 if int(ParDict['Taxon_rank']) == 1:#read in taxon rank to configure Pdiff parameter of artificial dataset
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
506 Percent_difference = 1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
507 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
508 Percent_difference = 5
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
509 print('simulated sequences up to', Percent_difference, 'percent divergent from original ones')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
510
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
511 if 'NMaxSeq' in list(ParDict.keys()):#Maximum number of sequences per taxon to be modified
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
512 Seq_per_clade_to_screw = int(ParDict['NMaxSeq'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
513 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
514 Seq_per_clade_to_screw = 10####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
515 print('Maximum number of sequences modified per clade', Seq_per_clade_to_screw)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
516
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
517 if 'Scoring' in list(ParDict.keys()):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
518 if ParDict['Scoring'] == 'lousy':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
519 threshold = 66####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
520 elif ParDict['Scoring'] == 'moderate':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
521 threshold = 75####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
522 elif ParDict['Scoring'] == 'stringent':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
523 threshold = 90####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
524 elif ParDict['Scoring'] == 'very_stringent':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
525 threshold = 95####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
526 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
527 threshold = 75####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
528 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
529 threshold = 75####!changed value
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
530 #print(ParDict['Scoring'], 'scoring of the rDNCs; threshold in two consequtive runs:', threshold)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
531 print('scoring of the rDNCs; threshold in two consequtive runs:', threshold)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
532 #OCT2022 - start
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
533 if corr > 1 and len(ParDict['Iref'].split(',')) == 2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
534 print('\nNOTE: The alignment was trimmed automatically to match median sequences length. The analysed slice starts from the site',str(sstart+1),'and ends on the site',str(send+1),'. The site indexing in the DNCs as in the provided reference.')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
535 if corr > 1 and ParDict['Iref'] == 'NO':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
536 print('\nNOTE: The alignment was trimmed automatically to match median sequences length. The analysed slice starts from the site',str(sstart+1),'and ends on the site',str(send+1),'. The site indexing in the rDNC as in the untrimmed alignment.')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
537 #OCT2022 - end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
538 thephrase = 'The DNA diagnosis for the taxon'
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
539 ###################################################IMPLEMENTATION
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
540 #Setting up a new class just for the convenient output formatting
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
541 class SortedDisplayDict(dict):#this is only to get a likable formatting of the barcode
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
542 def __str__(self):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
543 return "[" + ", ".join("%r: %r" % (key, self[key]) for key in sorted(self)) + "]"
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
544
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
545 class SortedDisplayDictVerbose(dict):#this is only to get a likable formatting of the barcode
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
546 def __str__(self):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
547 return ", ".join("%r %r" % (self[key],'in the site '+str(key)) for key in sorted(self)).replace("'", '').replace("A", "'A'").replace("C", "'C'").replace("G", "'G'").replace("T", "'T'")+'.'
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
548
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
549 #Calling functions and outputing results
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
550 if ParDict['OUTPUT_FILE'] == "str":
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
551 g = StringIO()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
552 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
553 g = open(ParDict['OUTPUT_FILE'], "w")#Initiating output file
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
554 #VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
555 print('<h4>########################## PARAMETERS ######################</h4>', file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
556 #print("<p>", 'input file:', ParDict['ORIG_FNAME'], "</p>", file=g) #outcommented ORIG_FNAME
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
557 print("<p>", 'input file:', ParDict['INPUT_FILE'], "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
558 print("<p>", 'Coding gaps as characters:', gaps2D, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
559 print("<p>", 'Maximum undetermined nucleotides allowed:', NumberN, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
560 print("<p>", 'Length of the alignment:', len(imported[0][2]),'->', FragmentLen, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
561 print("<p>", 'Indexing reference:', ParDict['Iref'].replace('NO', 'Not set').replace('in', 'included').replace('ex', 'excluded'), "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
562 print("<p>", 'Read in', len(raw_records), 'sequences', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
563 print("<p>", 'query taxa:', len(qCLADEs+withplus), '-', str(sorted(qCLADEs)+sorted(withplus)).replace('[','').replace(']','').replace("'", ''), "</p>", file=g)#1.3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
564 print("<p>", 'Cutoff set as:', Cutoff, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
565 print("<p>", 'Number iterations of MolD set as:', N1, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
566 print("<p>", 'Maximum length of raw mDNCs set as:', MaxLen1, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
567 print("<p>", 'Maximum length of refined mDNCs set as:', MaxLen2, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
568 print("<p>", 'simulated sequences up to', Percent_difference, 'percent divergent from original ones', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
569 print("<p>", 'Maximum number of sequences modified per clade', Seq_per_clade_to_screw, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
570 #print("<p>", ParDict['Scoring'], 'scoring of the rDNCs; threshold in two consequtive runs:', threshold, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
571 print("<p>", 'scoring of the rDNCs; threshold in two consequtive runs:', threshold, "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
572 if corr > 1 and len(ParDict['Iref'].split(',')) == 2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
573 print('<h4>NOTE: The alignment was trimmed automatically to match median sequences length. The analysed slice starts from the site',str(sstart+1),'and ends on the site',str(send+1),'. The site indexing in the DNCs as in the provided reference.</h4>', file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
574 if corr > 1 and ParDict['Iref'] == 'NO':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
575 print('<h4>NOTE: The alignment was trimmed automatically to match median sequences length. The analysed slice starts from the site',str(sstart+1),'and ends on the site',str(send+1),'. The site indexing in the rDNC as in the untrimmed alignment.</h4>', file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
576
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
577 print('<h4>########################### RESULTS ##########################</h4>', file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
578
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
579 for qCLADE in sorted(qCLADEs) + sorted(withplus):#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
580 if '+' in qCLADE:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
581 if shift == True:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
582 old_records = raw_records
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
583 shift == False
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
584 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
585 raw_records = old_records
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
586 spp = qCLADE.split('+')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
587 nrecords = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
588 for rec in raw_records:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
589 if rec[1] in spp:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
590 nrecords.append([rec[0], qCLADE, rec[2]])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
591 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
592 nrecords.append(rec)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
593 raw_records = nrecords
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
594 print('\n**************', qCLADE, '**************')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
595 print('<h4>**************', qCLADE, '**************</h4>', file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
596 Clades, clade_sorted_seqs, shared_positions = Step1(raw_records)#STEP1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
597 x,y,z,pures = C_VP_PP(clade_sorted_seqs, qCLADE, shared_positions, Cutoff)#STEP2 ####! added pures
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
598 newy = {key:y[key] for key in y if not key in pures} ####! newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
599 print('Sequences analyzed:', len(clade_sorted_seqs[qCLADE]))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
600 print("<p>",'Sequences analyzed:', len(clade_sorted_seqs[qCLADE]), "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
601 ND_combinations = [[item] for item in pures] ####! before ND_combinations were initiated as an empty list
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
602 print('single nucleotide mDNCs:', len(pures), '-', str(SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in pures]}))[1:-1])#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
603 print("<p>",'single nucleotide mDNCs*:',len(pures), '-', str(SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in pures]}))[1:-1], "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
604 N = 1 ####!
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
605 while N > 0:#STEP3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
606 try:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
607 q = Diagnostic_combinations(qCLADE, x, newy, N1, MaxLen1, MaxLen2) ####! newy instead of y
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
608 except IndexError:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
609 print(N, 'IndexError')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
610 continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
611 for comb in q:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
612 if not comb in ND_combinations:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
613 ND_combinations.append(comb)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
614 N-=1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
615 ND_combinations.sort(key=len)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
616 #################################### mDNC output
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
617 try:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
618 Nind, KeyPos = IndependentKey(ND_combinations)#STEP4
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
619 except IndexError:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
620 print('no mDNCs recovered for', qCLADE)#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
621 print("<p>", 'no mDNCs recovered for', "</p>", qCLADE, file=g)#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
622 continue
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
623 Allpos = []#Create list of all positions involved in mDNCs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
624 for comb in ND_combinations:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
625 for pos in comb:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
626 if not pos in Allpos:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
627 Allpos.append(pos)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
628 print('\nmDNCs retrieved:', str(len(ND_combinations)) + '; Sites involved:', str(len(Allpos)) + '; Independent mDNCs:', len(Nind))#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
629 print("<p>", 'mDNCs* retrieved:', str(len(ND_combinations)) + '; Sites involved:', str(len(Allpos)) + '; Independent mDNCs**:', len(Nind), "</p>", file=g)#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
630 print('Shortest retrieved mDNC:', SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in ND_combinations[0]]}), '\n')#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
631 print("<p>",'Shortest retrieved mDNC*:', SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in ND_combinations[0]]}), "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
632 ######################################################## rDNC output
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
633 Barcode_scores = []#Initiate a list for rDNC scores
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
634 npos = len(ND_combinations[0])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
635 BestBarcode = 'none'####! newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
636 while npos <= min([10, len(Allpos)]):#in this loop the positions are added one-by-one to a rDNC and the rDNC is then rated on the artificially generated datasets
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
637 Barcode = GenerateBarcode_new(ND_combinations, npos)#Initiate a rDNC
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
638 Barcode_score = 0#Initiate a score to rate a rDNC
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
639 N = 100
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
640 while N > 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
641 NComplist, NCPP = Screwed_dataset_new(raw_records, Seq_per_clade_to_screw, PosArrays, VarPosList, Percent_difference, qCLADE, Cutoff)#Create an artificial dataset VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
642 NBarcode = [i for i in Barcode if i in list(NCPP.keys())]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
643 if len(Barcode) - len(NBarcode) <= 1 and ConditionD(NBarcode, NComplist, NCPP) == True:####! new condition (first) added
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
644 Barcode_score +=1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
645 N -=1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
646 print(npos, 'rDNC_score (100):', [k+corr+1 for k in Barcode], '-', Barcode_score)#VERYNEW
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
647 print("<p>", npos, 'rDNC_score (100):', [k+corr+1 for k in Barcode], '-', Barcode_score, "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
648 if Barcode_score >= threshold and len(Barcode_scores) == 1: ###1.3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
649 BestBarcode = Barcode###1.3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
650 if Barcode_score >= threshold and len(Barcode_scores) > 1 and Barcode_score >= max(Barcode_scores): ###1.3
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
651 BestBarcode = Barcode####!newline
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
652 Barcode_scores.append(Barcode_score)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
653 if len(Barcode_scores) >= 2 and Barcode_scores[-1] >= threshold and Barcode_scores[-2] >= threshold:#Check whether the rDNC fulfills robustnes criteria 85:85:85
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
654 print('Final rDNC:', SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}))#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
655 print("<p>",'Final rDNC***:', SortedDisplayDict({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}), "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
656 print('\n',thephrase, qCLADE, 'is:', SortedDisplayDictVerbose({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}))#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
657 print("<p>", thephrase, qCLADE, 'is:', SortedDisplayDictVerbose({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}), "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
658 break
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
659 else:# VERY NEW FROM HERE ONWARDS
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
660 npos += 1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
661 if npos > min([10, len(Allpos)]):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
662 if BestBarcode != 'none':
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
663 print('The highest scoring rDNC for taxon', qCLADE, 'is:', SortedDisplayDictVerbose({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}))#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
664 print("<p>", 'The highest scoring rDNC*** for taxon', qCLADE, 'is:', SortedDisplayDictVerbose({pos+corr : y[pos-1] for pos in [i+1 for i in BestBarcode]}), "</p>", file=g)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
665 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
666 print('No sufficiently robust DNA diagnosis for taxon', qCLADE, 'was retrieved')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
667 print("<p>", 'No sufficiently robust DNA diagnosis for taxon', qCLADE, 'was retrieved', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
668 #OCT2022 - start
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
669 print("<h4>", '################################# EXPLANATIONS ####################################', "</h4>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
670 print("<p>", ' * mDNC -(=minimal Diagnostic nucleotide combination) is a combination of nucleotides at specified sites of the alignment,', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
671 print("<p>", ' unique for a query taxon. Therefore it is sufficient to differentiate a query taxon from all reference taxa in a dataset.', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
672 print("<p>", ' Because it comprises minimal necessary number of nucleotide sites to differentiate a query, any mutation in the mDNC in' "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
673 print("<p>", ' single specimen of a query taxon will automatically disqualify it as a diagnostic combination.', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
674 print("<p>", "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
675 print("<p>", ' ** two or more mDNCs are INDEPENDENT if they constitute non-overlapping sets of nucleotide sites.', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
676 print("<p>", "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
677 print("<p>", '*** rDNC -(=robust/redundant Diagnostic nucleotide combination) is a combination of nucleotides at specified sites of the alignment,', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
678 print("<p>", ' unique for a query taxon and (likewise mDNC) sufficient to differentiate a query taxon from all reference taxa in a dataset.', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
679 print("<p>", ' However, rDNC comprises more than a minimal necessary number of diagnostic sites, and therefore is robust to single nucleotide', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
680 print("<p>", ' replacements. Even if a mutation arises in one of the rDNC sites, the remaining ones will (with high probability) remain sufficient ', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
681 print("<p>", ' to diagnose the query taxon', "</p>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
682 print("<h4>", ' Final diagnosis corresponds to rDNC', "</h4>", file=g)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
683 #OCT2022 - end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
684
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
685 if ParDict['OUTPUT_FILE'] == "str":
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
686 contents = g.getvalue()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
687 os.unlink(ParDict['INPUT_FILE'])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
688 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
689 contents = None
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
690 g.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
691 #OCT2022 - start
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
692 if len(P2) != 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
693 ext = '.'+ParDict['OUTPUT_FILE'].split('.')[-1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
694 h = open(ParDict['OUTPUT_FILE'].replace(ext, '_pairwise'+ext), "w")#Initiating output file
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
695 if len(withplus) != 0:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
696 raw_records = old_records
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
697 taxain = [i[1] for i in raw_records]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
698 tpairs = []
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
699 for alist in P2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
700 if alist.count('all')+alist.count('All')+alist.count('ALL') == 2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
701 tpairs = getAllPairs(taxain)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
702 elif alist.count('all')+alist.count('All')+alist.count('ALL') == 1:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
703 thetax = [i for i in taxain if i in alist][0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
704 print(thetax)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
705 for atax in sorted(list(set(taxain))):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
706 if atax != thetax:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
707 tpairs.append([thetax, atax])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
708 else:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
709 for apair in getAllPairs(alist):
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
710 tpairs.append(apair)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
711 for apair in tpairs:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
712 t1 = apair[0]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
713 t2 = apair[1]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
714 p2records = [i for i in raw_records if i[1] in [t1, t2]]
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
715 print('\n**************', t1, 'VS', t2,'**************')
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
716 print('<h4>**************', t1, 'VS', t2, '**************</h4>', file=h)
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
717 C2, css2, sp2 = Step1(p2records)#STEP1
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
718 x2,y2,z2,pures2 = C_VP_PP(css2, t1, sp2, '>0')#STEP2 ####! added pures
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
719 Pairphrase = 'Each of the following '+ str(len(pures2))+' sites is invariant across sequences of '+ t1+ ' and differentiates it from '+ t2+': '
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
720 counterPures = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
721 for site in pures2:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
722 counterPures[site] = "'or'".join(list(set([thing[site] for thing in css2[t2] if thing[site] != 'N'])))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
723 Pairphrase = Pairphrase + str(site+corr)+" ('"+str(y2[site])+"' vs '"+str(counterPures[site])+"'), "
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
724 print(Pairphrase[:-2])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
725 print("<p>",Pairphrase[:-2],'</h4>', file=h)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
726 x2r,y2r,z2r,pures2r = C_VP_PP(css2, t2, sp2, '>0')#STEP2 ####! added pures
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
727 Pairphraser = 'Each of the following '+ str(len(pures2r))+' sites is invariant across sequences of '+ t2+ ' and differentiates it from '+ t1+': '
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
728 counterPuresr = {}
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
729 for site in pures2r:
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
730 counterPuresr[site] = "'or'".join(list(set([thing[site] for thing in css2[t1] if thing[site] != 'N'])))
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
731 Pairphraser = Pairphraser + str(site+corr)+" ('"+str(y2r[site])+"' vs '"+str(counterPuresr[site])+"'), "
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
732 print(Pairphraser[:-2])
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
733 print("<p>",Pairphraser[:-2],'</h4>', file=h)#OCT2022
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
734 h.close()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
735
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
736 #OCT2022 - end
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
737 return contents, qCLADEs
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
738 if __name__ == "__main__":
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
739 c, q = mainprocessing()
4e8e2f836d0f planemo upload commit 232ce39054ce38be27c436a4cabec2800e14f988-dirty
itaxotools
parents:
diff changeset
740