Mercurial > repos > bgruening > replace_column_by_key_value_file
comparison replaceColumn.xml @ 1:d533e4b75800 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/replaceColumn/tools/replaceColumn commit 0def21576e206a0732ce63bacd18533064ddf155
author | bgruening |
---|---|
date | Sun, 23 Sep 2018 04:03:34 -0400 |
parents | cc18bac5afdb |
children |
comparison
equal
deleted
inserted
replaced
0:cc18bac5afdb | 1:d533e4b75800 |
---|---|
1 <tool id="replace_column_with_key_value_file" name="Replace column" version="0.1"> | 1 <tool id="replace_column_with_key_value_file" name="Replace column" version="0.2"> |
2 <description>by values which are defined in a convert file</description> | 2 <description>by values which are defined in a convert file</description> |
3 <command> | 3 <command> |
4 <![CDATA[ | 4 <![CDATA[ |
5 python '$replaceColumnScript' | 5 python '$replaceColumnScript' |
6 ]]> | 6 ]]> |
12 | 12 |
13 replace_file = '$replace_information' | 13 replace_file = '$replace_information' |
14 original_file = '$original_file' | 14 original_file = '$original_file' |
15 column = int("$column_replace") - 1 | 15 column = int("$column_replace") - 1 |
16 ignore_start_lines = int("$skip_lines") | 16 ignore_start_lines = int("$skip_lines") |
17 delimiter_local = "\t" if str("$delimiter") == "" else str("$delimiter") | 17 delimiter_local = "\t" if str("$delimiter") == "tab" else str("$delimiter") |
18 comment_str = str("$pass_comments") | |
19 unk_strat = str("$unknowns_strategy") | |
18 | 20 |
19 ## read conversion information to index | 21 ## read conversion information to index |
20 conversion = {} | 22 conversion = {} |
21 | 23 |
22 with open(replace_file, 'r') as conversion_file: | 24 with open(replace_file, 'r') as conversion_file: |
23 for line in conversion_file: | 25 for line in conversion_file: |
24 conv_key_value = line.strip().split() | 26 conv_key_value = line.strip().split() |
25 if len(conv_key_value) == 2: | 27 if len(conv_key_value) == 2: |
26 conversion[conv_key_value[0]] = conv_key_value[1] | 28 conversion[conv_key_value[0]] = conv_key_value[1] |
27 | 29 |
28 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. | 30 ## read file line by line, search for column entry if it can be replaced. Otherwise it will be skipped. |
29 with open("output_file", 'w') as output: | 31 with open("output_file", 'w') as output: |
30 with open(original_file) as original: | 32 with open(original_file) as original: |
31 for i, line in enumerate(original): | 33 for i, line in enumerate(original): |
32 if i < ignore_start_lines: | 34 if i < ignore_start_lines or (comment_str and line.startswith(comment_str)): |
33 output.write(line) | 35 output.write(line) |
34 continue | 36 continue |
35 | 37 |
36 if str("$delimiter") == "": | 38 line_content = line.rstrip().split(delimiter_local) |
37 line_content = line.split() | |
38 else: | |
39 line_content = line.split(str("$delimiter")) | |
40 | 39 |
41 out = list() | 40 out = list() |
42 for j, line_content_column in enumerate(line_content): | 41 for j, line_content_column in enumerate(line_content): |
43 if j == column: | 42 if j == column: |
43 | |
44 if line_content_column in conversion: | 44 if line_content_column in conversion: |
45 out.append(conversion[line_content_column]) | 45 out.append(conversion[line_content_column]) |
46 elif unk_strat == "print": | |
47 out.append(line_content_column) | |
48 elif unk_strat == "error": | |
49 raise Exception('ERROR: Encountered a value [%s] in the file that is not in the replacements file and is not commented with [%s]' % (line_content_column, comment_str)) | |
46 else: | 50 else: |
47 out.append(line_content_column) | 51 out.append(line_content_column) |
48 | 52 |
49 if len(out) == len(line_content): | 53 if len(out) == len(line_content): |
50 output.write('%s\n' % delimiter_local.join(out)) | 54 output.write('%s\n' % delimiter_local.join(out)) |
61 help="This file contains in the first column the entries that should be replaced by the values of the second column." /> | 65 help="This file contains in the first column the entries that should be replaced by the values of the second column." /> |
62 <param name="column_replace" type="data_column" data_ref="original_file" multiple="false" | 66 <param name="column_replace" type="data_column" data_ref="original_file" multiple="false" |
63 label="Which column should be replaced?" /> | 67 label="Which column should be replaced?" /> |
64 <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> | 68 <param name="skip_lines" type='integer' value='0' label="Skip this many starting lines" /> |
65 <param name="delimiter" type="select" label="Delimited by"> | 69 <param name="delimiter" type="select" label="Delimited by"> |
66 <option value="" selected="True">Tab</option> | 70 <option value="tab" selected="True">Tab</option> |
67 <option value=" ">Whitespace</option> | 71 <option value=" ">Space</option> |
68 <option value=".">Dot</option> | 72 <option value=".">Dot</option> |
69 <option value=",">Comma</option> | 73 <option value=",">Comma</option> |
70 <option value="-">Dash</option> | 74 <option value="-">Dash</option> |
71 <option value="_">Underscore</option> | 75 <option value="_">Underscore</option> |
72 <option value="|">Pipe</option> | 76 <option value="|">Pipe</option> |
77 </param> | |
78 <param name="unknowns_strategy" type="select" label="When an unknown value is encountered"> | |
79 <option value="skip" selected="True">Skip / Do not print</option> | |
80 <option value="print">Print without modification</option> | |
81 <option value="error">Exit with an error</option> | |
82 </param> | |
83 <param name="pass_comments" type="text" value="#" label="Do not perform replacement on lines starting with"> | |
84 <sanitizer> | |
85 <valid> | |
86 <add value="#" /> | |
87 </valid> | |
88 </sanitizer> | |
73 </param> | 89 </param> |
74 </inputs> | 90 </inputs> |
75 <outputs> | 91 <outputs> |
76 <data name="outfile_replace" format="txt" from_work_dir="output_file"/> | 92 <data name="outfile_replace" format="txt" from_work_dir="output_file"/> |
77 </outputs> | 93 </outputs> |
79 <test> | 95 <test> |
80 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> | 96 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> |
81 <param name="original_file" value="original_file" ftype="tabular" /> | 97 <param name="original_file" value="original_file" ftype="tabular" /> |
82 <param name="column_replace" value="1"/> | 98 <param name="column_replace" value="1"/> |
83 <param name="skip_lines" value="1"/> | 99 <param name="skip_lines" value="1"/> |
84 <param name="delimiter" value="" /> | 100 <param name="delimiter" value="tab" /> |
101 <param name="unknowns_strategy" value="skip"/> | |
102 <param name="pass_comments" value="#"/> | |
85 <output name="outfile_replace" file="result_file"/> | 103 <output name="outfile_replace" file="result_file"/> |
86 </test> | 104 </test> |
87 <test> | 105 <test> |
88 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> | 106 <param name="replace_information" value="GRCh38_ensembl2UCSC.txt" ftype="tabular" /> |
89 <param name="original_file" value="empty_mapping" ftype="tabular" /> | 107 <param name="original_file" value="empty_mapping" ftype="tabular" /> |
90 <param name="column_replace" value="1"/> | 108 <param name="column_replace" value="1"/> |
91 <param name="skip_lines" value="1"/> | 109 <param name="skip_lines" value="1"/> |
92 <param name="delimiter" value="" /> | 110 <param name="delimiter" value="tab" /> |
111 <param name="unknowns_strategy" value="skip"/> | |
112 <param name="pass_comments" value="#"/> | |
93 <output name="outfile_replace" file="result_file_empty_mapping"/> | 113 <output name="outfile_replace" file="result_file_empty_mapping"/> |
114 </test> | |
115 <test expect_failure="True"> | |
116 <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> | |
117 <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> | |
118 <param name="column_replace" value="1"/> | |
119 <param name="skip_lines" value="0"/> | |
120 <param name="delimiter" value="tab" /> | |
121 <param name="unknowns_strategy" value="error"/> | |
122 <param name="pass_comments" value="#"/> | |
123 </test> | |
124 <test> | |
125 <param name="replace_information" value="neg_test_map.txt" ftype="tabular" /> | |
126 <param name="original_file" value="neg_test_commented.txt" ftype="tabular" /> | |
127 <param name="column_replace" value="1"/> | |
128 <param name="skip_lines" value="0"/> | |
129 <param name="delimiter" value="tab" /> | |
130 <param name="unknowns_strategy" value="print"/> | |
131 <param name="pass_comments" value="#"/> | |
132 <output name="outfile_replace" file="neg_test_commented.txt"/> | |
94 </test> | 133 </test> |
95 </tests> | 134 </tests> |
96 <help> | 135 <help> |
97 <![CDATA[ | 136 <![CDATA[ |
98 **What it does** | 137 **What it does** |
99 | 138 |
100 This tool replaces the entries of a defined column with entries given by a replacement file. | 139 This tool replaces the entries of a defined column with entries given by a replacement file. |
101 For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. | 140 For example the replacement file holds the information of the naming scheme of ensembl annotated chromosomes in the frist column and in the second the UCSC annotation. |
102 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. | 141 A file which is having information about chromosomes in ensembl notation in column x can now be converted to a file which holds the same information but in UCSC annotation. |
103 | 142 |
104 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings | 143 A useful repository for ensembl and UCSC chromosomes mapping is: https://github.com/dpryan79/ChromosomeMappings |
105 ]]> | 144 ]]> |
106 </help> | 145 </help> |