comparison splicetrap/bin/TXdbgen.pl @ 7:37a16ff93dd9 draft default tip

planemo upload
author bioitcore
date Thu, 12 Oct 2017 16:26:36 -0400
parents
children
comparison
equal deleted inserted replaced
6:6d54abd510d7 7:37a16ff93dd9
1 # this script is to generate TXdb database files from bed/gtf file
2
3 use strict;
4 use Cwd;
5 use Getopt::Long;
6
7 my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo');
8 foreach my $program (@programs)
9 {
10 die ("CHECK: $program not found\n") if(system("hash $program >/dev/null"));
11
12 }
13
14
15 my $genomedir = "";
16
17 my $annofilename = "";
18 my $txdbname = "userdefined";
19 my $knownonly = 0;
20 my $gtfinput = 0;
21
22 GetOptions (
23 "g:s"=>\$genomedir,
24 "a:s"=>\$annofilename,
25 "n:s"=>\$txdbname,
26 "gtf"=>\$gtfinput,
27 "knownonly"=>\$knownonly
28 );
29
30 my $InputParaDes=" Usage of the script:
31 -g genome fasta file location
32 -a annotation file (bed/gtf)
33 -n txdb name
34 --gtf specify this if annotation file is in gtf format
35 ";
36
37 if($genomedir eq "" or $annofilename eq "")
38 {
39 print $InputParaDes;
40 exit;
41 }
42
43 $genomedir = Cwd::abs_path($genomedir);
44 $annofilename = Cwd::abs_path($annofilename);
45
46 my $annofilebase = `basename $annofilename`;
47 chomp($annofilebase);
48 #need a cache folder to avoid mess
49
50 my $cachefolder = $annofilebase.".cache";
51
52 if (! -e $cachefolder)
53 {
54 mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n";
55 }
56 if($gtfinput)
57 {
58 print "TXDBGEN: converting gtf file into bed format\n";
59 system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed");
60 $annofilename = "$cachefolder/$annofilebase.bed";
61 }
62
63
64 print "TXDBGEN: scan $annofilename for AS events...\n";
65 system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp");
66 print "TXDBGEN: fetch sequences from $genomedir...\n";
67 system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort");
68 #get fasta file list
69 system("ls $genomedir/*.fa >$cachefolder/chr.list");
70
71 system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta");
72
73 print "TXDBGEN: generate files for parallel computing...\n";
74 if (! -e "$cachefolder/parallel")
75 {
76 mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n";
77 }
78 system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed");
79 system("rm $cachefolder/out.bed");
80 system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi");
81 system("rm $cachefolder/TXdb.tmp.evi");
82 system("bash $SrcFolder/splitdb.sh $cachefolder/parallel");
83 print "TXDBGEN: build Bowtie index...\n";
84
85 if (! -e "$cachefolder/btw")
86 {
87 mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n";
88 }
89 system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb");
90 system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list");
91 print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n";
92
93 system("mv $cachefolder $SrcFolder/../db/$txdbname");
94 print "TXDBGEN: Done!\n";
95