view get_genelist.pl @ 25:dd21719ca6e3 draft

Uploaded
author big-tiandm
date Thu, 06 Nov 2014 01:42:55 -0500
parents 7d12d9b130bc
children
line wrap: on
line source

#!/usr/bin/perl -w
#Filename:
#Author: chentt
#Email:
#Date: 2012-4-6
#Modified:
#Description:
my $version=1.00;

use strict;
use Getopt::Long;

my %opts;
GetOptions(\%opts,"i=s","o=s","h");
if (!(defined $opts{i} and defined $opts{o}) || defined $opts{h}) { #necessary arguments
&usage;
}
open IN,"<$opts{i}";
open OUT ,">$opts{o}";
print OUT "#ID\tchr\tstart\tend\tstrand\n";
my $n=1;
my %gene1;
while (my $aline=<IN>) {
	chomp $aline;
	next if($aline=~/^\#/);
	my @tmp=split/\t/,$aline;
	my $ID;
	if ($tmp[2] eq "gene") {
		$tmp[0]=~s/Chr\./Chr/;
		#$tmp[0]=~s/Chr/chr/;
		my @infor=split/;/,$tmp[8];
		for (my $i=0;$i<@infor ;$i++) {
			if ($infor[$i]=~/Alias\=(\S+)$/) {
				$ID=$1;
				last;
			}
			else {
				$ID="unknown$n";
				$n++;
			}
		}
		#$gene{$tmp[0]}{$ID}=[$tmp[3],$tmp[4],$tmp[6]];#$gene{chr}{geneID}=[start,end,strand]
		push @{$gene1{$ID}},[$tmp[3],$tmp[4],$tmp[0]];
		print OUT "$ID\t$tmp[0]\t$tmp[3]\t$tmp[4]\t$tmp[6]\n";
	}
}
close IN;
close OUT;


sub usage{
print <<"USAGE";
Version $version
Usage:
$0 -i -o -h
options:
-i input cluster file
-o output file
-h help
USAGE
exit(1);
}