0
|
1 #! /usr/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5 use Scalar::Util qw(looks_like_number);
|
|
6
|
|
7 my @in = ();
|
|
8 my $column_delimiters_href_split = {
|
|
9 'TAB' => q{\t},
|
|
10 'COMMA' => ",",
|
|
11 'DASH' => "-",
|
|
12 'UNDERSCORE' => "_",
|
|
13 'PIPE' => q{\|},
|
|
14 'DOT' => q{\.},
|
|
15 'SPACE' => " "
|
|
16 };
|
|
17
|
|
18 my $column_delimiters_href_join = {
|
|
19 'TAB' => qq{\t},
|
|
20 'COMMA' => ",",
|
|
21 'DASH' => "-",
|
|
22 'UNDERSCORE' => "_",
|
|
23 'PIPE' => "|",
|
|
24 'DOT' => ".",
|
|
25 'SPACE' => " "
|
|
26 };
|
|
27
|
|
28 # a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy
|
|
29 # convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1]
|
|
30
|
|
31 die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5;
|
|
32 die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0;
|
|
33 die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]};
|
|
34
|
|
35 # process input
|
|
36 my $input = $ARGV[0];
|
|
37 $ARGV[1] =~ s/\s+//g;
|
|
38 my $col = --$ARGV[1];
|
|
39 my $delim = $ARGV[2];
|
|
40 my $map_file = $ARGV[3];
|
|
41 my $output = $ARGV[4];
|
|
42 my $delim_split = $column_delimiters_href_split->{$delim};
|
|
43 my $delim_join = $column_delimiters_href_join->{$delim};
|
|
44
|
|
45 open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n";
|
|
46 my %chr_map;
|
|
47 while(my $line = <MAP>) {
|
|
48 chop $line;
|
|
49 next if grep /^#/, $line;
|
|
50 my @map = split /\t/, $line;
|
|
51 $map[1] = "remove" unless $#map;
|
|
52 $chr_map{$map[0]} = $map[1];
|
|
53 }
|
|
54 close MAP;
|
|
55
|
|
56 open (IN, "<$input") or die "Cannot open $input:$!\n";
|
|
57 open (OUT, ">$output") or die "Cannot create $output:$!\n";
|
|
58 while (my $line = <IN>) {
|
|
59 chop $line;
|
|
60 @in = split /$delim_split/, $line;
|
|
61 if(defined $in[$col] && defined $chr_map{$in[$col]}) {
|
|
62 $in[$col] = $chr_map{$in[$col]};
|
|
63 if($in[$col] eq "remove") {
|
|
64 print "Removed line \"$line\" as chromosome does not have a proper mapping\n";
|
|
65 } else {
|
|
66 print OUT join($delim_join, @in), "\n";
|
|
67 }
|
|
68 } elsif(grep /^#/, $in[0]) {
|
|
69 print OUT join($delim_join, @in), "\n";
|
|
70 } else {
|
|
71 print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n";
|
|
72 }
|
|
73 }
|
|
74 close IN;
|
|
75 close OUT;
|
|
76
|
|
77
|