diff get_genelist.pl @ 23:cad6a1dfb174 draft

Uploaded
author big-tiandm
date Wed, 05 Nov 2014 21:11:49 -0500
parents 7d12d9b130bc
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_genelist.pl	Wed Nov 05 21:11:49 2014 -0500
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#Filename:
+#Author: chentt
+#Email:
+#Date: 2012-4-6
+#Modified:
+#Description:
+my $version=1.00;
+
+use strict;
+use Getopt::Long;
+
+my %opts;
+GetOptions(\%opts,"i=s","o=s","h");
+if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
+&usage;
+}
+open IN,"<$opts{i}";
+open OUT ,">$opts{o}";
+print OUT "#ID\tchr\tstart\tend\tstrand\n";
+my $n=1;
+my %gene1;
+while (my $aline=<IN>) {
+	chomp $aline;
+	next if($aline=~/^\#/);
+	my @tmp=split/\t/,$aline;
+	my $ID;
+	if ($tmp[2] eq "gene") {
+		$tmp[0]=~s/Chr\./Chr/;
+		#$tmp[0]=~s/Chr/chr/;
+		my @infor=split/;/,$tmp[8];
+		for (my $i=0;$i<@infor ;$i++) {
+			if ($infor[$i]=~/Alias\=(\S+)$/) {
+				$ID=$1;
+				last;
+			}
+			else {
+				$ID="unknown$n";
+				$n++;
+			}
+		}
+		#$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
+		push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]];
+		print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
+	}
+}
+close IN;
+close OUT;
+
+
+sub usage{
+print <<"USAGE";
+Version $version
+Usage:
+$0 -i -o -h
+options:
+-i input cluster file
+-o output file
+-h help
+USAGE
+exit(1);
+}
\ No newline at end of file