annotate map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:48:29 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
1 #! /usr/bin/perl -w
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
2
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
3 use strict;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
4 use warnings;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
5 use Scalar::Util qw(looks_like_number);
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
6
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
7 my @in = ();
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
8 my $column_delimiters_href_split = {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
9 'TAB' => q{\t},
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
10 'COMMA' => ",",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
11 'DASH' => "-",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
12 'UNDERSCORE' => "_",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
13 'PIPE' => q{\|},
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
14 'DOT' => q{\.},
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
15 'SPACE' => " "
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
16 };
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
17
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
18 my $column_delimiters_href_join = {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
19 'TAB' => qq{\t},
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
20 'COMMA' => ",",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
21 'DASH' => "-",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
22 'UNDERSCORE' => "_",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
23 'PIPE' => "|",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
24 'DOT' => ".",
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
25 'SPACE' => " "
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
26 };
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
27
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
28 # a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
29 # convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1]
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
30
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
31 die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
32 die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
33 die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]};
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
34
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
35 # process input
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
36 my $input = $ARGV[0];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
37 $ARGV[1] =~ s/\s+//g;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
38 my $col = --$ARGV[1];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
39 my $delim = $ARGV[2];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
40 my $map_file = $ARGV[3];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
41 my $output = $ARGV[4];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
42 my $delim_split = $column_delimiters_href_split->{$delim};
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
43 my $delim_join = $column_delimiters_href_join->{$delim};
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
44
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
45 open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
46 my %chr_map;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
47 while(my $line = <MAP>) {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
48 chop $line;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
49 next if grep /^#/, $line;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
50 my @map = split /\t/, $line;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
51 $map[1] = "remove" unless $#map;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
52 $chr_map{$map[0]} = $map[1];
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
53 }
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
54 close MAP;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
55
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
56 open (IN, "<$input") or die "Cannot open $input:$!\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
57 open (OUT, ">$output") or die "Cannot create $output:$!\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
58 while (my $line = <IN>) {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
59 chop $line;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
60 @in = split /$delim_split/, $line;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
61 if(defined $in[$col] && defined $chr_map{$in[$col]}) {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
62 $in[$col] = $chr_map{$in[$col]};
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
63 if($in[$col] eq "remove") {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
64 print "Removed line \"$line\" as chromosome does not have a proper mapping\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
65 } else {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
66 print OUT join($delim_join, @in), "\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
67 }
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
68 } elsif(grep /^#/, $in[0]) {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
69 print OUT join($delim_join, @in), "\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
70 } else {
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
71 print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n";
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
72 }
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
73 }
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
74 close IN;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
75 close OUT;
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
76
e60f92a8e1c8 Uploaded
pmac
parents:
diff changeset
77