Mercurial > repos > mir-bioinf > multi_join_left

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multi_join_serial.xml	Wed Apr 15 14:43:04 2015 -0400
@@ -0,0 +1,92 @@
+<tool id="Multi_Join_serial" name="Join multiple" version="0.0.1" force_history_refresh="True">
+  <description>tab delimited files serially</description>
+  <!-- cms commenting out to troubleshoot -->
+  <command interpreter="perl">
+	#for $j, $s in enumerate( $Files )
+		#silent	$j
+	#end for
+
+	#for $i, $s in enumerate( $Files )
+	   /opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/run-multi_join_serial.pl --join_file $s.joinMe --join_col $s.joinCol --iteration $i --totalfiles $j --with_header $headerYes --resultsfile $Joined_all --log $log
+	  ##print "loop iteration $i.\n";
+          ;
+        #end for
+  </command>
+  <inputs>
+	<repeat name="Files" title="Join file">
+		<param name="joinMe" type="data" checked="yes" format="tabular" label="Join" />
+		<param name="joinCol" label="using column" type="data_column" data_ref="joinMe" />
+	</repeat>
+        <param name="headerYes" type="select" label="Treat first line as header?" help="If header starts with #, it will NOT be read, so this field should be set to no. Otherwise it can be set to yes if first line is header for ALL FILES.">
+                <option value="yes" selected="true">Yes</option>
+                <option value="no">No</option>
+        </param>
+  </inputs>
+  <outputs>
+    	<data format="tabular" name="Joined_all" label="Multi-Join result"/>
+	<data format="txt" name="log" label="debug_info"/>
+  </outputs>
+  <tests>
+     <test>
+	<param name="Files_0|joinMe" value="multi_join_serial_in1.tab" ftype="tabular"/>
+	<param name="Files_0|joinCol" value="1"/>
+	<param name="Files_1|joinMe" value="multi_join_serial_in2.tab" ftype="tabular"/>
+	<param name="Files_1|joinCol" value="1"/>
+	<param name="Files_2|joinMe" value="multi_join_serial_in3.tab" ftype="tabular"/>
+	<param name="Files_2joinCol" value="2"/>
+	<param name="headerYes" value="yes"/>
+	<output name="Joined_all" value="multi_join_serial_out.tab" ftype="tabular"/>
+	<output name="log" value="multi_join_serial_debug.txt" ftype="tabular"/>
+     <test/>
+  <tests/>
+  <help>
+
+This tool performs a left-outer join on multiple (at least two) files using a perl script that Ron wrote (thanks, Ron!). The resulting joined file will have the same number of rows as the first file chosen and subsequent files' matches will be shown if present. Rows in the first file without matches in the other files will have empty cells. If none of the input files have a header present, a simple column number header will be added to the output file to denote the start of each set of matches (from each file, start denoted by "C1").
+
+To convert from left-outer join result to inner join result (only include rows in common to all datasets), run Filter out rows and columns with non-numeric values tool with the following options selected (last 3 options, all are drop-down select menus):
+	1. Replace/remove: Empty only
+	2. Remove entire column or row (leave default)
+	3. Remove non-numeric/empty cell-containing ROWS from dataset
+
+
+.. class:: warningmark
+
+This tool may fail due to the system running out of memory depending on the number and size of input files and number of matching lines. The higher all of these are, the more likely the tool is to fail. A red output dataset saying "Job killed" typically means the system ran into an out of memory error and as a result the job was killed. This issue has yet to be addressed at the moment...
+
+
+**Steps:**
+	1. Click Add new File for each tab-delimited file you'd like to add and the column you want to join on.
+	2. After adding all files to join, select whether the headers should all be preserved (this should be Yes if all input datasets have headers).
+	3. Click Execute.
+	4. Please report any issues and/or suggestions to Christy.
+
+-----
+
+**Example**
+
+Dataset1::
+
+  chr1 10 20 geneA
+  chr1 50 80 geneB
+  chr5 10 40 geneL
+
+Dataset2::
+
+  geneA tumor-supressor
+  geneB Foxp2
+  geneC Gnas1
+  geneE INK4a
+
+Joining the 4th column of Dataset1 with the 1st column of Dataset2, no header, will yield::
+
+  C1   C2 C3 C4    C1    C2
+  chr1 10 20 geneA geneA tumor-suppressor
+  chr1 50 80 geneB geneB Foxp2
+  chr5 10 40 geneL
+
+
+</help>
+
+
+</tool>
+
--- a/multi_join_serial/multi_join_serial.xml	Wed Apr 15 14:33:47 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-<tool id="Multi_Join_serial" name="Join multiple" version="0.0.1" force_history_refresh="True">
-  <description>tab delimited files serially</description>
-  <!-- cms commenting out to troubleshoot -->
-  <command interpreter="perl">
-	#for $j, $s in enumerate( $Files )
-		#silent	$j
-	#end for
-
-	#for $i, $s in enumerate( $Files )
-	   /opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/run-multi_join_serial.pl --join_file $s.joinMe --join_col $s.joinCol --iteration $i --totalfiles $j --with_header $headerYes --resultsfile $Joined_all --log $log
-	  ##print "loop iteration $i.\n";
-          ;
-        #end for
-  </command>
-  <inputs>
-	<repeat name="Files" title="Join file">
-		<param name="joinMe" type="data" checked="yes" format="tabular" label="Join" />
-		<param name="joinCol" label="using column" type="data_column" data_ref="joinMe" />
-	</repeat>
-        <param name="headerYes" type="select" label="Treat first line as header?" help="If header starts with #, it will NOT be read, so this field should be set to no. Otherwise it can be set to yes if first line is header for ALL FILES.">
-                <option value="yes" selected="true">Yes</option>
-                <option value="no">No</option>
-        </param>
-  </inputs>
-  <outputs>
-    	<data format="tabular" name="Joined_all" label="Multi-Join result"/>
-	<data format="txt" name="log" label="debug_info"/>
-  </outputs>
-  <tests>
-     <test>
-	<param name="Files_0|joinMe" value="multi_join_serial_in1.tab" ftype="tabular"/>
-	<param name="Files_0|joinCol" value="1"/>
-	<param name="Files_1|joinMe" value="multi_join_serial_in2.tab" ftype="tabular"/>
-	<param name="Files_1|joinCol" value="1"/>
-	<param name="Files_2|joinMe" value="multi_join_serial_in3.tab" ftype="tabular"/>
-	<param name="Files_2joinCol" value="2"/>
-	<param name="headerYes" value="yes"/>
-	<output name="Joined_all" value="multi_join_serial_out.tab" ftype="tabular"/>
-	<output name="log" value="multi_join_serial_debug.txt" ftype="tabular"/>
-     <test/>
-  <tests/>
-  <help>
-
-This tool performs a left-outer join on multiple (at least two) files using a perl script that Ron wrote (thanks, Ron!). The resulting joined file will have the same number of rows as the first file chosen and subsequent files' matches will be shown if present. Rows in the first file without matches in the other files will have empty cells. If none of the input files have a header present, a simple column number header will be added to the output file to denote the start of each set of matches (from each file, start denoted by "C1").
-
-To convert from left-outer join result to inner join result (only include rows in common to all datasets), run Filter out rows and columns with non-numeric values tool with the following options selected (last 3 options, all are drop-down select menus):
-	1. Replace/remove: Empty only
-	2. Remove entire column or row (leave default)
-	3. Remove non-numeric/empty cell-containing ROWS from dataset
-
-
-.. class:: warningmark
-
-This tool may fail due to the system running out of memory depending on the number and size of input files and number of matching lines. The higher all of these are, the more likely the tool is to fail. A red output dataset saying "Job killed" typically means the system ran into an out of memory error and as a result the job was killed. This issue has yet to be addressed at the moment...
-
-
-**Steps:**
-	1. Click Add new File for each tab-delimited file you'd like to add and the column you want to join on.
-	2. After adding all files to join, select whether the headers should all be preserved (this should be Yes if all input datasets have headers).
-	3. Click Execute.
-	4. Please report any issues and/or suggestions to Christy.
-
------
-
-**Example**
-
-Dataset1::
-
-  chr1 10 20 geneA
-  chr1 50 80 geneB
-  chr5 10 40 geneL
-
-Dataset2::
-
-  geneA tumor-supressor
-  geneB Foxp2
-  geneC Gnas1
-  geneE INK4a
-
-Joining the 4th column of Dataset1 with the 1st column of Dataset2, no header, will yield::
-
-  C1   C2 C3 C4    C1    C2
-  chr1 10 20 geneA geneA tumor-suppressor
-  chr1 50 80 geneB geneB Foxp2
-  chr5 10 40 geneL
-
-
-</help>
-
-
-</tool>
-
--- a/multi_join_serial/run-multi_join_serial.pl	Wed Apr 15 14:33:47 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,196 +0,0 @@
-#!/usr/bin/perl
-
-
-use Getopt::Long;
-use Pod::Usage;
-use IO::File;
-use Data::Dumper;
-
-#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/multi_join_shell.pl';  ##comment this line out when finished testing
-#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl';
-#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/GetOptWC.pm';
-
-GetOptions(
-    "log=s"                     => \$log,
-    "join_file=s"             => \$data_in,
-    "join_col=s"	     => \$coljoin,
-    "time"                   => \$mTime,
-    "q|quiet"                => \$quiet,
-    "iteration=i"	     => \$I,
-    "totalfiles=i"	     => \$N,
-    "with_header=s"	     => \$header_yes,
-    "input_name=s"	     => \$in_name,
-    "resultsfile=s"	     => \$out_file,
-#    "h|help"                 => \$help
-) or pod2usage( -exitval => 2, -verbose => 2 );
-
-
-#check parameters and options
-my $debug = scalar(@ARGV);
-
-$coljoin--;
-#pod2usage(-msg => "To troubleshoot. ARGV should be @ARGV with $debug arguments in it.");
-pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
-
-$N++;
-
-# #
-use IO::Handle;
-open OUTPUT, '>>',$log or die "cant open this file for OUTPUT: $log. Computer says: $!\n";;
-open ERROR,  '>>', $log  or die "cant open this file for ERROR: $log. Computer says: $!\n";
-STDOUT->fdopen( \*OUTPUT, 'a' ) or die "cant open file $!\n";   #cms changing mode from 'w' to 'a' for multiple files in one run
-STDERR->fdopen( \*ERROR,  'a' ) or die "cant open file $!\n";   #cms changing mode from 'w' to 'a' for multiple files in one run
-# # #
-
-my @options;
-
-my $fileno = $I + 1;
-
-##Keeping track of the input files (one per iteration of this script) in an external file:
-open $Filenames, '>>', "temp_filenames.txt" or die "cannot open the temporary file $!\n";
-print $Filenames "$data_in\t";
-print $Filenames "$coljoin\n";
-
-if (($I==$N-1)&&($N>=2)) {
-        ## At the end of the last iteration
-	close($Filenames);
-
-	print "\nLAST ITERATION COMPLETED and at least two input files provided.\n";
-
-	##Read in file temp_filenames.txt
-	open(my $tmpfile, "<", "temp_filenames.txt") or die "Cannot open temp file: $!";
-	my @fileArray = <$tmpfile>;
-	#unshift @fileArray,$conditions; ##don't need to do this since conditions aren't used here
-	close($tmpfile) or die "what is that??!!! $!";
-
-
-	##Need to send output file name to shell script:
-	push @fileArray, $out_file;  ##adds out_file to the end of fileArray
-	##Also need to send yes/no for keeping header:
-	push @fileArray, $header_yes;
-
-	##Debug:
-	print "\nFirst file fileArray[0] is $fileArray[0].";
-	print "\nOutput file is next-to-last val in fileArray, $fileArray[-2].";
-	print "\nUse header? is last val in fileArray, $fileArray[-1].";
-	print "\nSecond file now is fileArray[2], $fileArray[2].";
-
-	##@fileArray has one file per line,output,header_yes, so $N+1 rows
-	my $f=0;
-	my @first;
-	my @second;
-	do {
-
-		@first = split('\t',$fileArray[$f]);  ##was filename\tJoinCol
-		print "\njoin column from first line is $first[1].";
-
-		##CMS DEALING WITH HEADER OR NOT:
-                if ($header_yes eq "no") {
-			my $fh1;
-                	$fh1 = IO::File->new("<$first[0]");
-			my $line1file1 = $fh1->getline();
-			$line1file1 =~ s/\s+$//;
-			#print "\nline1file1 is $line1file1\n"; ##DEBUG
-			@cols = split "\t",$line1file1;
-			my $numcols1 = @cols;
-			my $head1;
-			for (my $i=1; $i<$numcols1; $i++) {
-				$head1.="C$i\t";
-			}
-			$head1.="C$numcols1\n";
-			open(my $fh_sub, '>', './header1.txt') or die "OOPIES: $!\n";
-			print "\nheader first file $first[0] on next line:\n$head1"; ##DEBUG
-			print $fh_sub $head1;
-			close $fh_sub;
-			system("cat $first[0] >> ./header1.txt");  ##put header in front of file
-			##now want to use ./header1.txt instead of what was in $first[0] earlier
-			$first[0] = "./header1.txt";
-		}
-
-
-
-		@second = split('\t',$fileArray[$f+1]);
-
-		if ($header_yes eq "no") {
-                        my $fh2;
-                        $fh2 = IO::File->new("<$second[0]");
-                        my $line1file2 = $fh2->getline();
-                        $line1file2 =~ s/\s+$//;
-                        @cols = split "\t",$line1file2;
-                        my $numcols2 = @cols;
-                        my $head2;
-                        for (my $i=1; $i<$numcols2; $i++) {
-                                $head2.="C$i\t";
-                        }
-                        $head2.="C$numcols2\n";
-                        open(my $fh_sub, '>', './header2.txt') or die "OOPIES: $!\n";
-                        print "\nheader from $second[0] on next line:\n$head2";  ##DEBUG
-			print $fh_sub $head2;
-                        close $fh_sub;
-			system("cat $second[0] >> ./header2.txt");
-			$second[0]="./header2.txt";
-                }
-
-		print "\ncommand following:\n";
-		print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=$first[0] -File2=$second[0] -cola1=$first[1] -cola2=$second[1] -colb1=$first[1] -colb2=$second[1]";
-		system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl", "-File1=$first[0]", "-File2=$second[0]", "-cola1=$first[1]", "-cola2=$second[1]", "-colb1=$first[1]", "-colb2=$second[1]");
-		print "\nOut from system call on next line:\n$!";
-		$f+=2;
-		system("mv file1_file2.txt joined.txt");
-		if ($header_yes eq "no") {
-			system("rm ./header2.txt");
-			system("rm ./header1.txt");
-		}
-	} while ($f < 2);  ##FIRST TWO ONLY!!!
-
-	for ($f; $f<$N; $f++) {
-                my @current = split('\t',$fileArray[$f]);  ##was filename\tJoinCol
-                print "\njoin column from first line is $first[1].";
-
-		if ($header_yes eq "no") {
-                        my $fh;
-                        $fh = IO::File->new("<$current[0]");
-                        my $line1file = $fh->getline();
-                        $line1file =~ s/\s+$//;
-                        @cols = split "\t",$line1file;
-                        my $numcols = @cols;
-                        my $head;
-                        for (my $i=1; $i<$numcols; $i++) {
-                                $head.="C$i\t";
-                        }
-                        $head.="C$numcols\n";
-                        open(my $fh_sub, '>', './header.txt') or die "OOPIES: $!\n";
-			print "\nheader from file $current[0] on next line:\n$head";  ##DEBUG
-                        print $fh_sub $head;
-                        close $fh_sub;
-                        system("cat $current[0] >> ./header.txt");
-                        $current[0]="./header.txt";
-                }
-
-		print "\ncommand following:\n";
-		print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=joined.txt -File2=$current[0] -cola1=$first[1] -cola2=$current[1] -colb1=$first[1] -colb2=$current[1]";
-                system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl","-File1=joined.txt", "-File2=$current[0]", "-cola1=$first[1]", "-cola2=$current[1]", "-colb1=$first[1]", "-colb2=$current[1]");
-                print "\nOut from system call on next line:\n$!";
-		system("mv file1_file2.txt joined.txt");
-		if ($header_yes eq "no") {
-			system("rm ./header.txt");
-		}
-        }
-
-	system("mv joined.txt $fileArray[-2]");
-
-	##NOT SURE WHAT TO DO WITH THIS FOR THE MULTI-JOIN TOOL:
-	##Now, make the EC files from the genes-results files (extract appropriate columns):
-	#my $condStr =  multi_join_shell(@fileArray);  ##RSEMgetTPMs needs to take care of carriage returns
-	##NEED TO MODIFY RSEMTOEBSEQ_SHELL SO IT TAKES THE OUTPUT FILENAME AS WELL
-
-	system("rm temp_filenames.txt");
-
-}
-elsif ($N<2) {
-	print "\n<br /><i>Only one file; not running join.</i>\n";
-}
-
-
-
-#print "LOG $mv\n";
--- a/multi_join_serial/test-data/multi_join_serial_in1.tab	Wed Apr 15 14:33:47 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-symbol	sample1	sample2	sample3
-CSNK2A1P	12345	1.2345	8.90
-APOM	0.0	0.0	0.0
-HIST1H2AJ	89.5	75.6	32.3
-ASPHD1	0.001	6.98	2.33
-STBD1	0.2545	600.7	0.05
-
--- a/multi_join_serial/test-data/multi_join_serial_in2.tab	Wed Apr 15 14:33:47 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-Gene	Sample_4	Sample_5	Sample_6	Sample_7
-NOP10	11.1	22.2	33.3	0.0
-COX7B	0.05	0.0	6.75	0.0
-ASPHD1	10	0.075	3.57	1.14
-TSTD1	1.14	0.788	99.06	94.57
-APOM	123	123	123	123
-
--- a/multi_join_serial/test-data/multi_join_serial_in3.tab	Wed Apr 15 14:33:47 2015 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-Experiment	GENE	SampleX	SampleY	SampleZ
-1	HIST1H2AJ	0	0	0
-1	COX7B	1.1100	59.142	16602
-1	ZYX	1.04	1.04	1.04
-1	APOM	132	85	97
-2	PFDN6	0.06	1.87	99.96
-2	NPIPL3	1234	80.14	0.075
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run-multi_join_serial.pl	Wed Apr 15 14:43:04 2015 -0400
@@ -0,0 +1,196 @@
+#!/usr/bin/perl
+
+
+use Getopt::Long;
+use Pod::Usage;
+use IO::File;
+use Data::Dumper;
+
+#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/multi_join_shell.pl';  ##comment this line out when finished testing
+#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl';
+#require '/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/GetOptWC.pm';
+
+GetOptions(
+    "log=s"                     => \$log,
+    "join_file=s"             => \$data_in,
+    "join_col=s"	     => \$coljoin,
+    "time"                   => \$mTime,
+    "q|quiet"                => \$quiet,
+    "iteration=i"	     => \$I,
+    "totalfiles=i"	     => \$N,
+    "with_header=s"	     => \$header_yes,
+    "input_name=s"	     => \$in_name,
+    "resultsfile=s"	     => \$out_file,
+#    "h|help"                 => \$help
+) or pod2usage( -exitval => 2, -verbose => 2 );
+
+
+#check parameters and options
+my $debug = scalar(@ARGV);
+
+$coljoin--;
+#pod2usage(-msg => "To troubleshoot. ARGV should be @ARGV with $debug arguments in it.");
+pod2usage(-msg => "Forward probability should be in [0, 1]!", -exitval => 2, -verbose => 2) if ($probF < 0 || $probF > 1);
+
+$N++;
+
+# #
+use IO::Handle;
+open OUTPUT, '>>',$log or die "cant open this file for OUTPUT: $log. Computer says: $!\n";;
+open ERROR,  '>>', $log  or die "cant open this file for ERROR: $log. Computer says: $!\n";
+STDOUT->fdopen( \*OUTPUT, 'a' ) or die "cant open file $!\n";   #cms changing mode from 'w' to 'a' for multiple files in one run
+STDERR->fdopen( \*ERROR,  'a' ) or die "cant open file $!\n";   #cms changing mode from 'w' to 'a' for multiple files in one run
+# # #
+
+my @options;
+
+my $fileno = $I + 1;
+
+##Keeping track of the input files (one per iteration of this script) in an external file:
+open $Filenames, '>>', "temp_filenames.txt" or die "cannot open the temporary file $!\n";
+print $Filenames "$data_in\t";
+print $Filenames "$coljoin\n";
+
+if (($I==$N-1)&&($N>=2)) {
+        ## At the end of the last iteration
+	close($Filenames);
+
+	print "\nLAST ITERATION COMPLETED and at least two input files provided.\n";
+
+	##Read in file temp_filenames.txt
+	open(my $tmpfile, "<", "temp_filenames.txt") or die "Cannot open temp file: $!";
+	my @fileArray = <$tmpfile>;
+	#unshift @fileArray,$conditions; ##don't need to do this since conditions aren't used here
+	close($tmpfile) or die "what is that??!!! $!";
+
+
+	##Need to send output file name to shell script:
+	push @fileArray, $out_file;  ##adds out_file to the end of fileArray
+	##Also need to send yes/no for keeping header:
+	push @fileArray, $header_yes;
+
+	##Debug:
+	print "\nFirst file fileArray[0] is $fileArray[0].";
+	print "\nOutput file is next-to-last val in fileArray, $fileArray[-2].";
+	print "\nUse header? is last val in fileArray, $fileArray[-1].";
+	print "\nSecond file now is fileArray[2], $fileArray[2].";
+
+	##@fileArray has one file per line,output,header_yes, so $N+1 rows
+	my $f=0;
+	my @first;
+	my @second;
+	do {
+
+		@first = split('\t',$fileArray[$f]);  ##was filename\tJoinCol
+		print "\njoin column from first line is $first[1].";
+
+		##CMS DEALING WITH HEADER OR NOT:
+                if ($header_yes eq "no") {
+			my $fh1;
+                	$fh1 = IO::File->new("<$first[0]");
+			my $line1file1 = $fh1->getline();
+			$line1file1 =~ s/\s+$//;
+			#print "\nline1file1 is $line1file1\n"; ##DEBUG
+			@cols = split "\t",$line1file1;
+			my $numcols1 = @cols;
+			my $head1;
+			for (my $i=1; $i<$numcols1; $i++) {
+				$head1.="C$i\t";
+			}
+			$head1.="C$numcols1\n";
+			open(my $fh_sub, '>', './header1.txt') or die "OOPIES: $!\n";
+			print "\nheader first file $first[0] on next line:\n$head1"; ##DEBUG
+			print $fh_sub $head1;
+			close $fh_sub;
+			system("cat $first[0] >> ./header1.txt");  ##put header in front of file
+			##now want to use ./header1.txt instead of what was in $first[0] earlier
+			$first[0] = "./header1.txt";
+		}
+
+
+
+		@second = split('\t',$fileArray[$f+1]);
+
+		if ($header_yes eq "no") {
+                        my $fh2;
+                        $fh2 = IO::File->new("<$second[0]");
+                        my $line1file2 = $fh2->getline();
+                        $line1file2 =~ s/\s+$//;
+                        @cols = split "\t",$line1file2;
+                        my $numcols2 = @cols;
+                        my $head2;
+                        for (my $i=1; $i<$numcols2; $i++) {
+                                $head2.="C$i\t";
+                        }
+                        $head2.="C$numcols2\n";
+                        open(my $fh_sub, '>', './header2.txt') or die "OOPIES: $!\n";
+                        print "\nheader from $second[0] on next line:\n$head2";  ##DEBUG
+			print $fh_sub $head2;
+                        close $fh_sub;
+			system("cat $second[0] >> ./header2.txt");
+			$second[0]="./header2.txt";
+                }
+
+		print "\ncommand following:\n";
+		print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=$first[0] -File2=$second[0] -cola1=$first[1] -cola2=$second[1] -colb1=$first[1] -colb2=$second[1]";
+		system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl", "-File1=$first[0]", "-File2=$second[0]", "-cola1=$first[1]", "-cola2=$second[1]", "-colb1=$first[1]", "-colb2=$second[1]");
+		print "\nOut from system call on next line:\n$!";
+		$f+=2;
+		system("mv file1_file2.txt joined.txt");
+		if ($header_yes eq "no") {
+			system("rm ./header2.txt");
+			system("rm ./header1.txt");
+		}
+	} while ($f < 2);  ##FIRST TWO ONLY!!!
+
+	for ($f; $f<$N; $f++) {
+                my @current = split('\t',$fileArray[$f]);  ##was filename\tJoinCol
+                print "\njoin column from first line is $first[1].";
+
+		if ($header_yes eq "no") {
+                        my $fh;
+                        $fh = IO::File->new("<$current[0]");
+                        my $line1file = $fh->getline();
+                        $line1file =~ s/\s+$//;
+                        @cols = split "\t",$line1file;
+                        my $numcols = @cols;
+                        my $head;
+                        for (my $i=1; $i<$numcols; $i++) {
+                                $head.="C$i\t";
+                        }
+                        $head.="C$numcols\n";
+                        open(my $fh_sub, '>', './header.txt') or die "OOPIES: $!\n";
+			print "\nheader from file $current[0] on next line:\n$head";  ##DEBUG
+                        print $fh_sub $head;
+                        close $fh_sub;
+                        system("cat $current[0] >> ./header.txt");
+                        $current[0]="./header.txt";
+                }
+
+		print "\ncommand following:\n";
+		print "/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl -File1=joined.txt -File2=$current[0] -cola1=$first[1] -cola2=$current[1] -colb1=$first[1] -colb2=$current[1]";
+                system("/opt/galaxy/galaxy-dist/tools/ngs_rna/Unreleased/addColumnsFromFile2ToFile1.pl","-File1=joined.txt", "-File2=$current[0]", "-cola1=$first[1]", "-cola2=$current[1]", "-colb1=$first[1]", "-colb2=$current[1]");
+                print "\nOut from system call on next line:\n$!";
+		system("mv file1_file2.txt joined.txt");
+		if ($header_yes eq "no") {
+			system("rm ./header.txt");
+		}
+        }
+
+	system("mv joined.txt $fileArray[-2]");
+
+	##NOT SURE WHAT TO DO WITH THIS FOR THE MULTI-JOIN TOOL:
+	##Now, make the EC files from the genes-results files (extract appropriate columns):
+	#my $condStr =  multi_join_shell(@fileArray);  ##RSEMgetTPMs needs to take care of carriage returns
+	##NEED TO MODIFY RSEMTOEBSEQ_SHELL SO IT TAKES THE OUTPUT FILENAME AS WELL
+
+	system("rm temp_filenames.txt");
+
+}
+elsif ($N<2) {
+	print "\n<br /><i>Only one file; not running join.</i>\n";
+}
+
+
+
+#print "LOG $mv\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/multi_join_serial_in1.tab	Wed Apr 15 14:43:04 2015 -0400
@@ -0,0 +1,7 @@
+symbol	sample1	sample2	sample3
+CSNK2A1P	12345	1.2345	8.90
+APOM	0.0	0.0	0.0
+HIST1H2AJ	89.5	75.6	32.3
+ASPHD1	0.001	6.98	2.33
+STBD1	0.2545	600.7	0.05
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/multi_join_serial_in2.tab	Wed Apr 15 14:43:04 2015 -0400
@@ -0,0 +1,7 @@
+Gene	Sample_4	Sample_5	Sample_6	Sample_7
+NOP10	11.1	22.2	33.3	0.0
+COX7B	0.05	0.0	6.75	0.0
+ASPHD1	10	0.075	3.57	1.14
+TSTD1	1.14	0.788	99.06	94.57
+APOM	123	123	123	123
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/multi_join_serial_in3.tab	Wed Apr 15 14:43:04 2015 -0400
@@ -0,0 +1,7 @@
+Experiment	GENE	SampleX	SampleY	SampleZ
+1	HIST1H2AJ	0	0	0
+1	COX7B	1.1100	59.142	16602
+1	ZYX	1.04	1.04	1.04
+1	APOM	132	85	97
+2	PFDN6	0.06	1.87	99.96
+2	NPIPL3	1234	80.14	0.075