comparison map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:48:29 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e60f92a8e1c8
1 #! /usr/bin/perl -w
2
3 use strict;
4 use warnings;
5 use Scalar::Util qw(looks_like_number);
6
7 my @in = ();
8 my $column_delimiters_href_split = {
9 'TAB' => q{\t},
10 'COMMA' => ",",
11 'DASH' => "-",
12 'UNDERSCORE' => "_",
13 'PIPE' => q{\|},
14 'DOT' => q{\.},
15 'SPACE' => " "
16 };
17
18 my $column_delimiters_href_join = {
19 'TAB' => qq{\t},
20 'COMMA' => ",",
21 'DASH' => "-",
22 'UNDERSCORE' => "_",
23 'PIPE' => "|",
24 'DOT' => ".",
25 'SPACE' => " "
26 };
27
28 # a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy
29 # convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1]
30
31 die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5;
32 die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0;
33 die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]};
34
35 # process input
36 my $input = $ARGV[0];
37 $ARGV[1] =~ s/\s+//g;
38 my $col = --$ARGV[1];
39 my $delim = $ARGV[2];
40 my $map_file = $ARGV[3];
41 my $output = $ARGV[4];
42 my $delim_split = $column_delimiters_href_split->{$delim};
43 my $delim_join = $column_delimiters_href_join->{$delim};
44
45 open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n";
46 my %chr_map;
47 while(my $line = <MAP>) {
48 chop $line;
49 next if grep /^#/, $line;
50 my @map = split /\t/, $line;
51 $map[1] = "remove" unless $#map;
52 $chr_map{$map[0]} = $map[1];
53 }
54 close MAP;
55
56 open (IN, "<$input") or die "Cannot open $input:$!\n";
57 open (OUT, ">$output") or die "Cannot create $output:$!\n";
58 while (my $line = <IN>) {
59 chop $line;
60 @in = split /$delim_split/, $line;
61 if(defined $in[$col] && defined $chr_map{$in[$col]}) {
62 $in[$col] = $chr_map{$in[$col]};
63 if($in[$col] eq "remove") {
64 print "Removed line \"$line\" as chromosome does not have a proper mapping\n";
65 } else {
66 print OUT join($delim_join, @in), "\n";
67 }
68 } elsif(grep /^#/, $in[0]) {
69 print OUT join($delim_join, @in), "\n";
70 } else {
71 print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n";
72 }
73 }
74 close IN;
75 close OUT;
76
77