Mercurial > repos > saket-choudhary > merge_columns_with_delimiter
changeset 0:eaf7c9b0a1a4 draft default tip
Uploaded
| author | saket-choudhary | 
|---|---|
| date | Tue, 07 Oct 2014 19:38:23 -0400 | 
| parents | |
| children | |
| files | merge_columns_with_delimiter/merge_columns_with_delimiter.py merge_columns_with_delimiter/merge_columns_with_delimiter.xml merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_output.tsv merge_columns_with_delimiter/tool_dependecies.xml | 
| diffstat | 5 files changed, 146 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.py Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,56 @@ +import sys, re + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + try: + infile = open ( sys.argv[1], 'r') + outfile = open ( sys.argv[2], 'w') + except: + stop_err( 'Cannot open or create a file\n' ) + + if len( sys.argv ) < 5: + stop_err( 'No columns to merge' ) + else: + delimiter = sys.argv[3] + cols = sys.argv[4:] + + skipped_lines = 0 + + char_dict = { + 'T': '\t', + 's': '\s', + 'Dt': '\.', + 'Sl': '\\', + 'Sr': '/', + 'C': ',', + 'D': '-', + 'U': '_', + 'P': '\|', + 'Co': ':', + 'Sc': ';', + 'Ep': '' + } + for line in infile: + line = line.rstrip( '\r\n' ) + if line and not line.startswith( '#' ): + fields = line.split( '\t' ) + line += '\t' + for i, col in enumerate(cols): + try: + if i!=len(cols)-1: + line += fields[ int( col ) -1 ] + char_dict[delimiter] + else: + line += fields[ int( col ) -1 ] + + except: + skipped_lines += 1 + + print >>outfile, line + + if skipped_lines > 0: + print 'Skipped %d invalid lines' % skipped_lines + +if __name__ == "__main__" : __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/merge_columns_with_delimiter.xml Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,83 @@ +<tool id="merge_cols_with_delimiter1" name="Merge Columns with Delimiters" version="1.0.1"> + <description>together</description> + <command interpreter="python"> + merge_columns_with_delimiter.py + $input1 + $out_file1 + $delimiter + $col1 + $col2 + #for $col in $columns + ${col.datacol} + #end for + + </command> + <inputs> + <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/> + <param name="delimiter" type="select" label="Delimiter"> + <option value="s">Whitespaces</option> + <option value="T">Tabs</option> + <option value="Ep">EmptyString</option> + <option value="Dt">Dots</option> + <option value="C">Commas</option> + <option value="D">Dashes</option> + <option value="Sl">Left Slashes</option> + <option value="Sr">Right Slashes</option> + <option value="U">Underscores</option> + <option value="P">Pipes</option> + <option value="Co">Colons</option> + <option value="Sc">Semicolons</option> + </param> + <param name="col1" label="Merge column" type="data_column" data_ref="input1" /> + <param name="col2" label="with column" type="data_column" data_ref="input1" help="Need to add more columns? Use controls below."/> + + <repeat name="columns" title="Columns"> + <param name="datacol" label="Add column" type="data_column" data_ref="input1" /> + </repeat> + </inputs> + <outputs> + <data format="tabular" name="out_file1" /> + </outputs> + <tests> + <test> + <param name="input1" value="merge_columns_with_delimiter_input.tsv"/> + <param name="delimiter" value="D" /> + <param name="col1" value="5" /> + <param name="col2" value="1" /> + <param name="datacol" value="3" /> + <output name="out_file1" file="merge_columns_with_delimiter_output.tsv"/> + </test> + </tests> +<help> + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**What it does** + +This tool merges columns together separating them with the specified delimiter. +Any number of valid columns can be merged in any order. +Tool and code inspired from 'merge_columns' tool distributed with Galaxy. + +----- + +**Example** + +Input dataset (five columns: c1, c2, c3, c4, and c5):: + + 1 10 1000 gene1 chr + 2 100 1500 gene2 chr + +merging columns "**c5,c1**" with "-"(dash) will return:: + + 1 10 1000 gene1 chr chr-1 + 2 100 1500 gene2 chr chr-2 + +.. class:: warningmark + +Note that all original columns are preserved and the result of merge is added as the rightmost column. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/merge_columns_with_delimiter/test-data/merge_columns_with_delimiter_input.tsv Tue Oct 07 19:38:23 2014 -0400 @@ -0,0 +1,2 @@ +1 10 1000 gene1 chr +2 100 1500 gene2 chr
