Mercurial > repos > pmac > map_chromosomes
view map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:48:29 -0400 |
parents | |
children |
line wrap: on
line source
#! /usr/bin/perl -w use strict; use warnings; use Scalar::Util qw(looks_like_number); my @in = (); my $column_delimiters_href_split = { 'TAB' => q{\t}, 'COMMA' => ",", 'DASH' => "-", 'UNDERSCORE' => "_", 'PIPE' => q{\|}, 'DOT' => q{\.}, 'SPACE' => " " }; my $column_delimiters_href_join = { 'TAB' => qq{\t}, 'COMMA' => ",", 'DASH' => "-", 'UNDERSCORE' => "_", 'PIPE' => "|", 'DOT' => ".", 'SPACE' => " " }; # a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy # convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1] die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5; die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0; die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]}; # process input my $input = $ARGV[0]; $ARGV[1] =~ s/\s+//g; my $col = --$ARGV[1]; my $delim = $ARGV[2]; my $map_file = $ARGV[3]; my $output = $ARGV[4]; my $delim_split = $column_delimiters_href_split->{$delim}; my $delim_join = $column_delimiters_href_join->{$delim}; open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n"; my %chr_map; while(my $line = <MAP>) { chop $line; next if grep /^#/, $line; my @map = split /\t/, $line; $map[1] = "remove" unless $#map; $chr_map{$map[0]} = $map[1]; } close MAP; open (IN, "<$input") or die "Cannot open $input:$!\n"; open (OUT, ">$output") or die "Cannot create $output:$!\n"; while (my $line = <IN>) { chop $line; @in = split /$delim_split/, $line; if(defined $in[$col] && defined $chr_map{$in[$col]}) { $in[$col] = $chr_map{$in[$col]}; if($in[$col] eq "remove") { print "Removed line \"$line\" as chromosome does not have a proper mapping\n"; } else { print OUT join($delim_join, @in), "\n"; } } elsif(grep /^#/, $in[0]) { print OUT join($delim_join, @in), "\n"; } else { print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n"; } } close IN; close OUT;