diff map_chromosomes.pl @ 0:e60f92a8e1c8 draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:48:29 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/map_chromosomes.pl	Wed Jun 01 03:48:29 2016 -0400
@@ -0,0 +1,77 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+use Scalar::Util qw(looks_like_number);
+
+my @in = ();
+my $column_delimiters_href_split = {
+	'TAB' => q{\t},
+	'COMMA' => ",",
+	'DASH' => "-",
+	'UNDERSCORE' => "_",
+	'PIPE' => q{\|},
+	'DOT' => q{\.},
+	'SPACE' => " "
+};
+
+my $column_delimiters_href_join = {
+        'TAB' => qq{\t},
+        'COMMA' => ",",
+        'DASH' => "-",
+        'UNDERSCORE' => "_",
+        'PIPE' => "|",
+        'DOT' => ".",
+        'SPACE' => " "
+};
+
+# a wrapper for converting between UCSC and ensembl chromosome representations from within galaxy
+# convert_UCSC_ensembl.pl [input] [col] [delimiter] [genome] [out_file1]
+
+die "Check arguments: $0 [input] [col] [delimiter] [map] [out_file1]\n" unless @ARGV == 5;
+die "No columns specified: $ARGV[1]\n" if looks_like_number($ARGV[1]) == 0;
+die "Delimeter must be one of TAB, COMMA, DASH, UNDERSCORE, PIPE, DOT, SPACE\n" unless defined $column_delimiters_href_split->{$ARGV[2]};
+
+# process input
+my $input = $ARGV[0];
+$ARGV[1] =~ s/\s+//g;
+my $col = --$ARGV[1];
+my $delim = $ARGV[2];
+my $map_file = $ARGV[3];
+my $output = $ARGV[4];
+my $delim_split = $column_delimiters_href_split->{$delim};
+my $delim_join = $column_delimiters_href_join->{$delim};
+
+open (MAP, "<$map_file") or die "Cannot open map file $map_file:$!\n";
+my %chr_map;
+while(my $line = <MAP>) {
+	chop $line;
+	next if grep /^#/, $line;
+	my @map = split /\t/, $line;
+	$map[1] = "remove" unless $#map;
+	$chr_map{$map[0]} = $map[1];
+}
+close MAP;
+
+open (IN,  "<$input") or die "Cannot open $input:$!\n";
+open (OUT, ">$output") or die "Cannot create $output:$!\n";
+while (my $line = <IN>) {
+	chop $line;
+	@in = split /$delim_split/, $line; 
+	if(defined $in[$col] && defined $chr_map{$in[$col]}) {
+		$in[$col] = $chr_map{$in[$col]};
+		if($in[$col] eq "remove") {
+			print "Removed line \"$line\" as chromosome does not have a proper mapping\n";
+		} else {
+			print OUT join($delim_join, @in), "\n";
+		}
+	} elsif(grep /^#/, $in[0]) {
+		print OUT join($delim_join, @in), "\n";
+	} else {
+		print "Removed line \"$line\" as \"$in[$col]\" is not a valid chromosome name\n";
+	}
+}
+close IN;
+close OUT;
+
+