Mercurial > repos > pmac > map_chromosomes
comparison map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:48:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e60f92a8e1c8 |
---|---|
1 #! /usr/bin/perl -w | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use Scalar::Util qw(looks_like_number); | |
6 | |
7 my @in = (); | |
8 my $column_delimiters_href_split = { | |
9 'TAB' => q{\t}, | |
10 'COMMA' => ",", | |
11 'DASH' => "-", | |
12 'UNDERSCORE' => "_", | |
13 'PIPE' => q{\|}, | |
14 'DOT' => q{\.}, | |
15 'SPACE' => " " | |
16 }; | |
17 | |
18 my $column_delimiters_href_join = { | |
19 'TAB' => qq{\t}, | |
20 'COMMA' => ",", | |
21 'DASH' => "-", | |
22 'UNDERSCORE' => "_", | |
23 'PIPE' => "|", | |
24 'DOT' => ".", | |
25 'SPACE' => " " | |
26 }; | |
27 | |
28 # a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy | |
29 # convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1] | |
30 | |
31 die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5; | |
32 die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0; | |
33 die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]}; | |
34 | |
35 # process input | |
36 my $input = $ARGV[0]; | |
37 $ARGV[1] =~ s/\s+//g; | |
38 my $col = --$ARGV[1]; | |
39 my $delim = $ARGV[2]; | |
40 my $map_file = $ARGV[3]; | |
41 my $output = $ARGV[4]; | |
42 my $delim_split = $column_delimiters_href_split->{$delim}; | |
43 my $delim_join = $column_delimiters_href_join->{$delim}; | |
44 | |
45 open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n"; | |
46 my %chr_map; | |
47 while(my $line = <MAP>) { | |
48 chop $line; | |
49 next if grep /^#/, $line; | |
50 my @map = split /\t/, $line; | |
51 $map[1] = "remove" unless $#map; | |
52 $chr_map{$map[0]} = $map[1]; | |
53 } | |
54 close MAP; | |
55 | |
56 open (IN, "<$input") or die "Cannot open $input:$!\n"; | |
57 open (OUT, ">$output") or die "Cannot create $output:$!\n"; | |
58 while (my $line = <IN>) { | |
59 chop $line; | |
60 @in = split /$delim_split/, $line; | |
61 if(defined $in[$col] && defined $chr_map{$in[$col]}) { | |
62 $in[$col] = $chr_map{$in[$col]}; | |
63 if($in[$col] eq "remove") { | |
64 print "Removed line \"$line\" as chromosome does not have a proper mapping\n"; | |
65 } else { | |
66 print OUT join($delim_join, @in), "\n"; | |
67 } | |
68 } elsif(grep /^#/, $in[0]) { | |
69 print OUT join($delim_join, @in), "\n"; | |
70 } else { | |
71 print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n"; | |
72 } | |
73 } | |
74 close IN; | |
75 close OUT; | |
76 | |
77 |