Mercurial > repos > iuc > hyphy_fubar
annotate scripts/strike-ambigs.bf @ 35:1b893cdead55 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit e7a89841d59689e87db592e112f9c8fb5331d954
author | iuc |
---|---|
date | Thu, 02 Mar 2023 15:18:36 +0000 |
parents | c5ef306c2041 |
children |
rev | line source |
---|---|
24
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
1 RequireVersion ("2.5.20"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
2 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
3 LoadFunctionLibrary ("libv3/tasks/alignments.bf"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
4 LoadFunctionLibrary ("libv3/tasks/trees.bf"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
5 LoadFunctionLibrary ("libv3/UtilityFunctions.bf"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
6 LoadFunctionLibrary ("libv3/IOFunctions.bf"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
7 LoadFunctionLibrary ("libv3/convenience/math.bf"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
8 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
9 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
10 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
11 filter.analysis_description = {terms.io.info : |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
12 " |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
13 Read an alignment of coding sequences and replace any ambiguous codons with ---. Write results to a new file in FASTA format, and report changed sequences to stdout |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
14 ", |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
15 terms.io.version : "0.1", |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
16 terms.io.reference : "TBD", |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
17 terms.io.authors : "Sergei L Kosakovsky Pond", |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
18 terms.io.contact : "spond@temple.edu", |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
19 terms.io.requirements : "An MSA" |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
20 }; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
21 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
22 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
23 io.DisplayAnalysisBanner (filter.analysis_description); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
24 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
25 utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", FALSE); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
26 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
27 KeywordArgument ("code", "Which genetic code should be used", "Universal"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
28 KeywordArgument ("alignment", "An in-frame codon alignment in one of the formats supported by HyPhy"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
29 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
30 filter.in = alignments.PromptForGeneticCodeAndAlignment ("filter.dataset", "filter.input"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
31 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
32 KeywordArgument ("output", ".fasta for compressed data", None); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
33 filter.out = io.PromptUserForFilePath(".fasta for filtered data"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
34 fprintf (filter.out, CLEAR_FILE, KEEP_OPEN); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
35 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
36 GetDataInfo (filter.site_patterns, filter.input); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
37 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
38 filter.patter2site = {}; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
39 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
40 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
41 for (i,j,v; in; filter.site_patterns) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
42 index = i+j; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
43 if (filter.patter2site / v == FALSE ) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
44 filter.patter2site [v] = {}; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
45 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
46 filter.patter2site [v] + index; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
47 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
48 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
49 GET_DATA_INFO_RETURNS_ONLY_THE_INDEX = TRUE; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
50 COUNT_GAPS_IN_FREQUENCIES = FALSE; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
51 filter.unique_patterns = utility.Array1D (filter.input.site_freqs); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
52 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
53 for (seq = 0; seq < filter.input.species; seq += 1) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
54 io.ReportProgressBar ("filter","Processing sequence " + (1+seq)); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
55 codons = {1, filter.input.sites}; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
56 codons [0] = ""; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
57 GetString (seq_name, filter.input, seq); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
58 GetDataInfo (seq_chars, filter.input, seq); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
59 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
60 filter.ambigs = 0; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
61 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
62 for (pattern = 0; pattern < filter.unique_patterns; pattern += 1) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
63 GetDataInfo (pattern_info, filter.input, seq, pattern); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
64 if (pattern_info >= 0) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
65 codon_start = (filter.patter2site[pattern])[0] * 3; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
66 codon = seq_chars [codon_start][codon_start+2]; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
67 } else { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
68 codon = "---"; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
69 filter.ambigs += Abs (filter.patter2site [pattern]) |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
70 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
71 for (c; in; filter.patter2site [pattern] ) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
72 codons[c] = codon; |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
73 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
74 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
75 if (filter.ambigs > 0) { |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
76 fprintf (stdout, "\nStriking ", filter.ambigs, " codons that are incompletely resolved from " + seq_name + "\n"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
77 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
78 fprintf (filter.out,">",seq_name,"\n",Join ("", codons), "\n"); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
79 } |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
80 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
81 fprintf (filter.out,CLOSE_FILE); |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
82 |
c5ef306c2041
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
iuc
parents:
diff
changeset
|
83 |