Mercurial > repos > mir-bioinf > multi_join_left
comparison multi_join_serial/run-multi_join_serial.pl @ 0:1b7d0d2a3543 draft
Uploaded
| author | mir-bioinf |
|---|---|
| date | Wed, 15 Apr 2015 14:23:56 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1b7d0d2a3543 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 | |
| 4 use Getopt::Long; | |
| 5 use Pod::Usage; | |
| 6 use IO::File; | |
| 7 use Data::Dumper; | |
| 8 | |
| 9 #require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/multi_join_shell.pl'; ##comment this line out when finished testing | |
| 10 #require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl'; | |
| 11 #require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/GetOptWC.pm'; | |
| 12 | |
| 13 GetOptions( | |
| 14 "log=s" => \$log, | |
| 15 "join_file=s" => \$data_in, | |
| 16 "join_col=s" => \$coljoin, | |
| 17 "time" => \$mTime, | |
| 18 "q|quiet" => \$quiet, | |
| 19 "iteration=i" => \$I, | |
| 20 "totalfiles=i" => \$N, | |
| 21 "with_header=s" => \$header_yes, | |
| 22 "input_name=s" => \$in_name, | |
| 23 "resultsfile=s" => \$out_file, | |
| 24 # "h|help" => \$help | |
| 25 ) or pod2usage( -exitval => 2, -verbose => 2 ); | |
| 26 | |
| 27 | |
| 28 #check parameters and options | |
| 29 my $debug = scalar(@ARGV); | |
| 30 | |
| 31 $coljoin--; | |
| 32 #pod2usage(-msg => "To troubleshoot. ARGV should be @ARGV with $debug arguments in it."); | |
| 33 pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1); | |
| 34 | |
| 35 $N++; | |
| 36 | |
| 37 # # | |
| 38 use IO::Handle; | |
| 39 open OUTPUT, '>>',$log or die "cant open this file for OUTPUT: $log. Computer says: $!\n";; | |
| 40 open ERROR, '>>', $log or die "cant open this file for ERROR: $log. Computer says: $!\n"; | |
| 41 STDOUT->fdopen( \*OUTPUT, 'a' ) or die "cant open file $!\n"; #cms changing mode from 'w' to 'a' for multiple files in one run | |
| 42 STDERR->fdopen( \*ERROR, 'a' ) or die "cant open file $!\n"; #cms changing mode from 'w' to 'a' for multiple files in one run | |
| 43 # # # | |
| 44 | |
| 45 my @options; | |
| 46 | |
| 47 my $fileno = $I + 1; | |
| 48 | |
| 49 ##Keeping track of the input files (one per iteration of this script) in an external file: | |
| 50 open $Filenames, '>>', "temp_filenames.txt" or die "cannot open the temporary file $!\n"; | |
| 51 print $Filenames "$data_in\t"; | |
| 52 print $Filenames "$coljoin\n"; | |
| 53 | |
| 54 if (($I==$N-1)&&($N>=2)) { | |
| 55 ## At the end of the last iteration | |
| 56 close($Filenames); | |
| 57 | |
| 58 print "\nLAST ITERATION COMPLETED and at least two input files provided.\n"; | |
| 59 | |
| 60 ##Read in file temp_filenames.txt | |
| 61 open(my $tmpfile, "<", "temp_filenames.txt") or die "Cannot open temp file: $!"; | |
| 62 my @fileArray = <$tmpfile>; | |
| 63 #unshift @fileArray,$conditions; ##don't need to do this since conditions aren't used here | |
| 64 close($tmpfile) or die "what is that??!!! $!"; | |
| 65 | |
| 66 | |
| 67 ##Need to send output file name to shell script: | |
| 68 push @fileArray, $out_file; ##adds out_file to the end of fileArray | |
| 69 ##Also need to send yes/no for keeping header: | |
| 70 push @fileArray, $header_yes; | |
| 71 | |
| 72 ##Debug: | |
| 73 print "\nFirst file fileArray[0] is $fileArray[0]."; | |
| 74 print "\nOutput file is next-to-last val in fileArray, $fileArray[-2]."; | |
| 75 print "\nUse header? is last val in fileArray, $fileArray[-1]."; | |
| 76 print "\nSecond file now is fileArray[2], $fileArray[2]."; | |
| 77 | |
| 78 ##@fileArray has one file per line,output,header_yes, so $N+1 rows | |
| 79 my $f=0; | |
| 80 my @first; | |
| 81 my @second; | |
| 82 do { | |
| 83 | |
| 84 @first = split('\t',$fileArray[$f]); ##was filename\tJoinCol | |
| 85 print "\njoin column from first line is $first[1]."; | |
| 86 | |
| 87 ##CMS DEALING WITH HEADER OR NOT: | |
| 88 if ($header_yes eq "no") { | |
| 89 my $fh1; | |
| 90 $fh1 = IO::File->new("<$first[0]"); | |
| 91 my $line1file1 = $fh1->getline(); | |
| 92 $line1file1 =~ s/\s+$//; | |
| 93 #print "\nline1file1 is $line1file1\n"; ##DEBUG | |
| 94 @cols = split "\t",$line1file1; | |
| 95 my $numcols1 = @cols; | |
| 96 my $head1; | |
| 97 for (my $i=1; $i<$numcols1; $i++) { | |
| 98 $head1.="C$i\t"; | |
| 99 } | |
| 100 $head1.="C$numcols1\n"; | |
| 101 open(my $fh_sub, '>', './header1.txt') or die "OOPIES: $!\n"; | |
| 102 print "\nheader first file $first[0] on next line:\n$head1"; ##DEBUG | |
| 103 print $fh_sub $head1; | |
| 104 close $fh_sub; | |
| 105 system("cat $first[0] >> ./header1.txt"); ##put header in front of file | |
| 106 ##now want to use ./header1.txt instead of what was in $first[0] earlier | |
| 107 $first[0] = "./header1.txt"; | |
| 108 } | |
| 109 | |
| 110 | |
| 111 | |
| 112 @second = split('\t',$fileArray[$f+1]); | |
| 113 | |
| 114 if ($header_yes eq "no") { | |
| 115 my $fh2; | |
| 116 $fh2 = IO::File->new("<$second[0]"); | |
| 117 my $line1file2 = $fh2->getline(); | |
| 118 $line1file2 =~ s/\s+$//; | |
| 119 @cols = split "\t",$line1file2; | |
| 120 my $numcols2 = @cols; | |
| 121 my $head2; | |
| 122 for (my $i=1; $i<$numcols2; $i++) { | |
| 123 $head2.="C$i\t"; | |
| 124 } | |
| 125 $head2.="C$numcols2\n"; | |
| 126 open(my $fh_sub, '>', './header2.txt') or die "OOPIES: $!\n"; | |
| 127 print "\nheader from $second[0] on next line:\n$head2"; ##DEBUG | |
| 128 print $fh_sub $head2; | |
| 129 close $fh_sub; | |
| 130 system("cat $second[0] >> ./header2.txt"); | |
| 131 $second[0]="./header2.txt"; | |
| 132 } | |
| 133 | |
| 134 print "\ncommand following:\n"; | |
| 135 print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=$first[0] -File2=$second[0] -cola1=$first[1] -cola2=$second[1] -colb1=$first[1] -colb2=$second[1]"; | |
| 136 system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl", "-File1=$first[0]", "-File2=$second[0]", "-cola1=$first[1]", "-cola2=$second[1]", "-colb1=$first[1]", "-colb2=$second[1]"); | |
| 137 print "\nOut from system call on next line:\n$!"; | |
| 138 $f+=2; | |
| 139 system("mv file1_file2.txt joined.txt"); | |
| 140 if ($header_yes eq "no") { | |
| 141 system("rm ./header2.txt"); | |
| 142 system("rm ./header1.txt"); | |
| 143 } | |
| 144 } while ($f < 2); ##FIRST TWO ONLY!!! | |
| 145 | |
| 146 for ($f; $f<$N; $f++) { | |
| 147 my @current = split('\t',$fileArray[$f]); ##was filename\tJoinCol | |
| 148 print "\njoin column from first line is $first[1]."; | |
| 149 | |
| 150 if ($header_yes eq "no") { | |
| 151 my $fh; | |
| 152 $fh = IO::File->new("<$current[0]"); | |
| 153 my $line1file = $fh->getline(); | |
| 154 $line1file =~ s/\s+$//; | |
| 155 @cols = split "\t",$line1file; | |
| 156 my $numcols = @cols; | |
| 157 my $head; | |
| 158 for (my $i=1; $i<$numcols; $i++) { | |
| 159 $head.="C$i\t"; | |
| 160 } | |
| 161 $head.="C$numcols\n"; | |
| 162 open(my $fh_sub, '>', './header.txt') or die "OOPIES: $!\n"; | |
| 163 print "\nheader from file $current[0] on next line:\n$head"; ##DEBUG | |
| 164 print $fh_sub $head; | |
| 165 close $fh_sub; | |
| 166 system("cat $current[0] >> ./header.txt"); | |
| 167 $current[0]="./header.txt"; | |
| 168 } | |
| 169 | |
| 170 print "\ncommand following:\n"; | |
| 171 print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=joined.txt -File2=$current[0] -cola1=$first[1] -cola2=$current[1] -colb1=$first[1] -colb2=$current[1]"; | |
| 172 system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl","-File1=joined.txt", "-File2=$current[0]", "-cola1=$first[1]", "-cola2=$current[1]", "-colb1=$first[1]", "-colb2=$current[1]"); | |
| 173 print "\nOut from system call on next line:\n$!"; | |
| 174 system("mv file1_file2.txt joined.txt"); | |
| 175 if ($header_yes eq "no") { | |
| 176 system("rm ./header.txt"); | |
| 177 } | |
| 178 } | |
| 179 | |
| 180 system("mv joined.txt $fileArray[-2]"); | |
| 181 | |
| 182 ##NOT SURE WHAT TO DO WITH THIS FOR THE MULTI-JOIN TOOL: | |
| 183 ##Now, make the EC files from the genes-results files (extract appropriate columns): | |
| 184 #my $condStr = multi_join_shell(@fileArray); ##RSEMgetTPMs needs to take care of carriage returns | |
| 185 ##NEED TO MODIFY RSEMTOEBSEQ_SHELL SO IT TAKES THE OUTPUT FILENAME AS WELL | |
| 186 | |
| 187 system("rm temp_filenames.txt"); | |
| 188 | |
| 189 } | |
| 190 elsif ($N<2) { | |
| 191 print "\n<br /><i>Only one file; not running join.</i>\n"; | |
| 192 } | |
| 193 | |
| 194 | |
| 195 | |
| 196 #print "LOG $mv\n"; |
