Mercurial > repos > abims-sbr > mutcount
annotate scripts/S01b_extract_variable_nuc.py @ 0:acc3674e515b draft default tip
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
| author | abims-sbr |
|---|---|
| date | Fri, 01 Feb 2019 10:28:50 -0500 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
2 #coding: utf-8 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
3 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
4 import argparse, os |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
5 from functions import dico, write_output, fill_with_NaN |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
6 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
7 """ Functions for nucleic format """ |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
8 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
9 def all_nuc_counts(seq): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
10 """ Counts the number of nucleotides in a sequence |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
11 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
12 Args : |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
13 - seq (String) : a nucleic sequence |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
14 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
15 Return: |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
16 - nuc_counts (dict) : counts on nucleotides (key/value : nucleotide/count) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
17 """ |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
18 nuc_counts = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
19 seqU = seq.upper() |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
20 LN =['A','C','T','G'] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
21 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
22 for base in LN: |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
23 nuc_counts[base] = seqU.count(base) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
24 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
25 return nuc_counts |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
26 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
27 def ratios(nuc_counts): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
28 """ Compute GC and purine ratios from the counts of nucleotides in a sequence |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
29 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
30 Args : |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
31 - nuc_counts (dict) : dictionary of nucleotides counts (output of all_nuc_counts()) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
32 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
33 Return : |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
34 - ratios (dict) : dictionary with ratios |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
35 """ |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
36 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
37 # Search for compositional bias in genome as marker of thermal adaptation |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
38 ratios = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
39 ratios['GC_percent'] = float(nuc_counts['C'] + nuc_counts['G'])/(sum(nuc_counts.values())*100) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
40 ratios['purine_percent'] = float(nuc_counts['A'] + nuc_counts['G'])/(sum(nuc_counts.values())*100) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
41 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
42 ratios['DIFF_GC'] = nuc_counts['G'] - nuc_counts['C'] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
43 ratios['DIFF_AT'] = nuc_counts['A'] - nuc_counts['T'] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
44 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
45 # Per bp |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
46 ratios['PLI_GC'] = float(ratios['DIFF_GC'])/sum(nuc_counts.values()) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
47 ratios['PLI_AT'] = float(ratios['DIFF_AT'])/sum(nuc_counts.values()) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
48 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
49 # Per 1000 bp |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
50 ratios['PLI_GC_1000'] = ratios['PLI_GC']*1000 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
51 ratios['PLI_AT_1000'] = ratios['PLI_AT']*1000 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
52 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
53 return ratios |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
54 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
55 def main(): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
56 parser = argparse.ArgumentParser() |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
57 parser.add_argument("species_list", help="List of species separated by commas") |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
58 args = parser.parse_args() |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
59 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
60 LN =['A','C','T','G'] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
61 Lratios = ['GC_percent', 'purine_percent', 'DIFF_GC', 'DIFF_AT', 'PLI_GC', 'PLI_AT', 'PLI_GC_1000', 'PLI_AT_1000'] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
62 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
63 list_inputs = [] |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
64 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
65 path_inputs = '01_input_files' |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
66 list_inputs = os.listdir(path_inputs) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
67 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
68 lsp = args.species_list.split(',') |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
69 lsp = sorted(lsp) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
70 flsp = '' |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
71 for el in lsp: |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
72 flsp += el+',' |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
73 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
74 path_outputs_1 = '02_tables_per_nucleotide' |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
75 path_outputs_2 = '02_tables_per_nuc_variable' |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
76 os.mkdir(path_outputs_1) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
77 os.mkdir(path_outputs_2) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
78 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
79 dict_for_files_nuc = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
80 dict_for_file_var = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
81 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
82 # All counts |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
83 for file in list_inputs: |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
84 # iterate over input files |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
85 sequences = dico(file, path_inputs) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
86 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
87 # TEMPORARY CORRECTION FOR SEQUENCES CONTAINING ONLY INDELS |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
88 # It appears than CDS_Search can bug sometimes and return an alignement where a species' sequence is made of indels only |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
89 # This causes a crash here (in the ratios function). The correction skip the whole file. |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
90 # When CDS_Search is corrected, lines with 'skip' can be removed |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
91 skip = False |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
92 for key in sequences.keys(): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
93 if all(x == '-' for x in sequences[key]): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
94 skip = True |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
95 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
96 if not skip: |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
97 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
98 nuc_counts_per_seq = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
99 nuc_var_per_seq = {} |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
100 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
101 for key in sequences.keys(): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
102 # iterate over sequences in the file |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
103 nuc_counts_per_seq[key] = all_nuc_counts(sequences[key]) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
104 nuc_var_per_seq[key] = ratios(nuc_counts_per_seq[key]) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
105 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
106 # Add NaN for missing species |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
107 for key in set(lsp).difference(set(sequences.keys())): |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
108 nuc_counts_per_seq[key] = fill_with_NaN(LN) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
109 nuc_var_per_seq[key] = fill_with_NaN(Lratios) |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
110 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
111 # Add computations to final dicts |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
112 dict_for_files_nuc[file] = nuc_counts_per_seq |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
113 dict_for_file_var[file] = nuc_var_per_seq |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
114 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
115 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
116 # Try with pandas ? |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
117 write_output(LN, flsp, path_outputs_1, dict_for_files_nuc) # one file per nuc |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
118 write_output(Lratios, flsp, path_outputs_2, dict_for_file_var) # one file per nuc_variable |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
119 |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
120 if __name__ == '__main__': |
|
acc3674e515b
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
abims-sbr
parents:
diff
changeset
|
121 main() |
