Mercurial > repos > pmac > map_chromosomes
diff map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:48:29 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/map_chromosomes.pl Wed Jun 01 03:48:29 2016 -0400 @@ -0,0 +1,77 @@ +#! /usr/bin/perl -w + +use strict; +use warnings; +use Scalar::Util qw(looks_like_number); + +my @in = (); +my $column_delimiters_href_split = { + 'TAB' => q{\t}, + 'COMMA' => ",", + 'DASH' => "-", + 'UNDERSCORE' => "_", + 'PIPE' => q{\|}, + 'DOT' => q{\.}, + 'SPACE' => " " +}; + +my $column_delimiters_href_join = { + 'TAB' => qq{\t}, + 'COMMA' => ",", + 'DASH' => "-", + 'UNDERSCORE' => "_", + 'PIPE' => "|", + 'DOT' => ".", + 'SPACE' => " " +}; + +# a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy +# convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1] + +die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5; +die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0; +die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]}; + +# process input +my $input = $ARGV[0]; +$ARGV[1] =~ s/\s+//g; +my $col = --$ARGV[1]; +my $delim = $ARGV[2]; +my $map_file = $ARGV[3]; +my $output = $ARGV[4]; +my $delim_split = $column_delimiters_href_split->{$delim}; +my $delim_join = $column_delimiters_href_join->{$delim}; + +open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n"; +my %chr_map; +while(my $line = <MAP>) { + chop $line; + next if grep /^#/, $line; + my @map = split /\t/, $line; + $map[1] = "remove" unless $#map; + $chr_map{$map[0]} = $map[1]; +} +close MAP; + +open (IN, "<$input") or die "Cannot open $input:$!\n"; +open (OUT, ">$output") or die "Cannot create $output:$!\n"; +while (my $line = <IN>) { + chop $line; + @in = split /$delim_split/, $line; + if(defined $in[$col] && defined $chr_map{$in[$col]}) { + $in[$col] = $chr_map{$in[$col]}; + if($in[$col] eq "remove") { + print "Removed line \"$line\" as chromosome does not have a proper mapping\n"; + } else { + print OUT join($delim_join, @in), "\n"; + } + } elsif(grep /^#/, $in[0]) { + print OUT join($delim_join, @in), "\n"; + } else { + print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n"; + } +} +close IN; +close OUT; + +