Mercurial > repos > basfplant > interproscan_to_excel

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/Galaxy-Workflow-Export_xml_and_raw_output_from_iprscan_to_Excel.ga	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,89 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "",
+    "format-version": "0.1",
+    "name": "Export xml and raw output from iprscan to Excel",
+    "steps": {
+        "0": {
+            "annotation": "",
+            "id": 0,
+            "input_connections": {},
+            "inputs": [],
+            "name": "Interproscan functional predictions of ORFs",
+            "outputs": [
+                {
+                    "name": "output",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 200,
+                "top": 169
+            },
+            "post_job_actions": {},
+            "tool_errors": null,
+            "tool_id": "interproscan",
+            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"appl\": \"[\\\"seg\\\", \\\"profilescan\\\", \\\"fprintscan\\\", \\\"patternscan\\\", \\\"superfamily\\\", \\\"hmmpir\\\", \\\"hmmpfam\\\", \\\"hmmsmart\\\", \\\"hmmtigr\\\", \\\"hmmpanther\\\", \\\"hamap\\\", \\\"gene3d\\\", \\\"coils\\\", \\\"blastprodom\\\"]\", \"chromInfo\": \"\\\"/home/katrien/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"oformat\": \"\\\"xml\\\"\"}",
+            "tool_version": "1.1",
+            "type": "tool",
+            "user_outputs": []
+        },
+        "1": {
+            "annotation": "",
+            "id": 1,
+            "input_connections": {},
+            "inputs": [],
+            "name": "Interproscan functional predictions of ORFs",
+            "outputs": [
+                {
+                    "name": "output",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 195,
+                "top": 277
+            },
+            "post_job_actions": {},
+            "tool_errors": null,
+            "tool_id": "interproscan",
+            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"appl\": \"[\\\"seg\\\", \\\"profilescan\\\", \\\"fprintscan\\\", \\\"patternscan\\\", \\\"superfamily\\\", \\\"hmmpir\\\", \\\"hmmpfam\\\", \\\"hmmsmart\\\", \\\"hmmtigr\\\", \\\"hmmpanther\\\", \\\"hamap\\\", \\\"gene3d\\\", \\\"coils\\\", \\\"blastprodom\\\"]\", \"chromInfo\": \"\\\"/home/katrien/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"oformat\": \"\\\"raw\\\"\"}",
+            "tool_version": "1.1",
+            "type": "tool",
+            "user_outputs": []
+        },
+        "2": {
+            "annotation": "",
+            "id": 2,
+            "input_connections": {
+                "input|input1": {
+                    "id": 0,
+                    "output_name": "output"
+                },
+                "input|input2": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [],
+            "name": "Export iprscan output to Excel",
+            "outputs": [
+                {
+                    "name": "output",
+                    "type": "xlsx"
+                }
+            ],
+            "position": {
+                "left": 547,
+                "top": 229
+            },
+            "post_job_actions": {},
+            "tool_errors": null,
+            "tool_id": "iprscanToExcel",
+            "tool_state": "{\"input\": \"{\\\"input2\\\": null, \\\"input1\\\": null, \\\"select\\\": \\\"3\\\", \\\"__current_case__\\\": 2}\", \"chromInfo\": \"\\\"/home/katrien/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}",
+            "tool_version": "1.0",
+            "type": "tool",
+            "user_outputs": []
+        }
+    }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/iprscanToExcel.props	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,14 @@
+# iprscanToExcel.PROPS
+#
+# author: Katrien Bernaerts and Domantas Motiejunas
+# corresponding author: domantas dot motiejunas at cropdesign dot com
+# affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+
+# Date: 20/4/2012
+#
+# Properties file for iprscanToExcel
+# Use a comma as delimiter between the different flags. A flag should always start with -
+#
+delimiter=,
+flags=-xml, -raw, -excel
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/iprscanToExcel.xml	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,66 @@
+<tool id="iprscanToExcel" name="Export iprscan output to Excel"  version="1.0">
+	<description>Export iprscan output to Excel</description>
+    <command>
+#if $input.select == '1':
+	java -jar ${GALAXY_ROOT_DIR}/tools/iprscan/iprscanToExcel_v20.jar -xml $xmlfile -excel $output
+#else if $input.select == '2':
+	java -jar ${GALAXY_ROOT_DIR}/tools/iprscan/iprscanToExcel_v20.jar -raw $rawfile -excel $output
+#else:
+	java -jar ${GALAXY_ROOT_DIR}/tools/iprscan/iprscanToExcel_v20.jar -xml $input1 -raw $input2 -excel $output
+#end if
+	</command>
+        <inputs>
+		<conditional name="input">
+    			<param name="select" type="select" label="Mode">
+        			<option value="1">convert XML file to Excel to get the iprscan summary tables for each protein</option>
+        			<option value="2">convert raw file to Excel to get all the iprscan results in one table</option>
+				<option value="3" selected="true">convert both XML and raw file to Excel</option>
+    			</param>
+    			<when value="1">
+        			<param name="xmlfile" type="data" format="xml" label="Select the desired XML file from history" />
+    			</when>
+    			<when value="2">
+        			<param name="rawfile" type="data" format="raw" label="Select the desired raw data file from history" />
+    			</when>
+    			<when value="3">
+        			<param name="input1" type="data" format="xml" label="Select the desired XML file from history" />
+        			<param name="input2" type="data" format="raw" label="Select the desired raw data file from history" />
+    			</when>
+		</conditional>
+
+        </inputs>
+    <outputs>
+        <data format="xlsx" name="output"></data>
+    </outputs>
+    <requirements>
+    </requirements>
+    <help>
+**What it does**
+
+iprscanToExcel converts raw and/or xml output files from the interproscan program to Excel format (xlsx), which is more user-friendly and convenient than e.g. xml. Three modes of operation are available: convert both XML and raw iprscan output files to Excel, convert only the xml output file to Excel or convert only the raw file to Excel.
+
+The xml output file of the interproscan program contains the source data for the Excel tabsheet "summary tables". Those summary tables give for each protein family information concerning the detailed matches, the parent, the child_list, where they are found_in, the GO-terms, ...
+
+The raw output file of the interproscan program contains the source data for the Excel tabsheet "iprscan results", containing an overview table with proteinID, protein crc64, protein length, match dbname, classification id, classification description, start, end, score, status, date, interproID, interpro name, (title, GO number, description)n. The columns can be sorted and filtered via the filters present in the headers of the columns.
+
+The program requires the availability of raw and/or xml files in the Galaxy history. The files can be generated via the application "Interproscan functional predictions" (under the header Sequence Annotation).
+
+**Author and affiliation**
+
+Katrien Bernaerts and Domantas Motiejunas, 10/06/2012
+
+Corresponding author: domantas dot motiejunas at cropdesign dot com
+
+
+
+Affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+
+**Terms of use**
+
+iprscanToExcel - Copyright (C) 2012 CropDesign N.V. - this software may be used, copied and redistributed, with or without modification freely, without advance permission, provided that the above Copyright statement is reproduced with each copy.
+THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE (INCLUDING NEGLIGENCE OR OTHERWISE).
+
+(R)Excel is a registered trademark of Microsoft Corporation in the United States and/or other countries
+
+	</help>
+</tool>
Binary file export_iprscan_to_Excel/iprscanToExcel_v20.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/readme.txt	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,58 @@
+Installation of iprscanToExcel
+------------------------------
+
+1) The program iprscanToExcel does not work if InterProScan and the corresponding Galaxy wrapper are not present.
+
+2) Change the paths in the <command> part of Galaxy wrapper "interproscan.xml" to the paths on your system, at least if this is required
+
+	${GALAXY_ROOT_DIR}/tools/iprscan/iprscanToExcel_v20.jar
+
+3) Installation of iprscanToExcel_v20.jar, iprscanToExcel.props and the Galaxy XML wrapper iprscanToExcel.xml
+
+- The wrapper file "iprscanToExcel.xml", the program "iprscanToExcel_v20.jar" and its corresponding properties file "iprscanToExcel.props" should all be copied to the same directory, namely Galaxy tools directory "iprscan", {GALAXY_ROOT_DIR}/tools/iprscan
+- Make GALAXY aware of the new tool: GALAXY knows about installed tools (and also what to display on the left pane) from the file {GALAXY_ROOT_DIR}/tool_conf.xml
+Use a text editor to add a line for the interproscan.xml wrapper to e.g.the Sequence Annotation section.
+
+	<label text="My Tools" id="My tools" />
+	<section name="Sequence Annotation" id="sequence_annotation" >
+	<tool file="iprscan/interproscan.xml" />
+	<tool file="iprscan/iprscanToExcel.xml" />
+	</section>
+
+- start up GALAXY again, open it in the web browser and test
+
+
+iprscanToExcel functionality
+----------------------------
+
+iprscanToExcel is a Java program that converts raw and/or xml output files from the interproscan program to Excel format (xlsx). Three modes of operation are available: convert both XML and raw iprscan output files to Excel, convert only the xml output file to Excel or convert only the raw file to Excel.
+
+The xml output file of the interproscan program contains the source data for the Excel tabsheet "summary tables". Those summary tables give for each protein family information concerning the detailed matches, the parent, the child_list, where they are found_in, the GO-terms, ...
+
+The raw output file of the interproscan program contains the source data for the Excel tabsheet "iprscan results", containing an overview table with proteinID, protein crc64, protein length, match dbname, classification id, classification description, start, end, score, status, date, interproID, interpro name, (title, GO number, description)n. The columns can be sorted and filtered via the filters present in the headers of the columns.
+
+The program requires the availability of raw and/or xml files in the Galaxy history. The files can be generated via the application "Interproscan functional predictions" (under the header Sequence Annotation).
+
+
+Galaxy workflow example
+-----------------------
+
+The file "Galaxy-Workflow-Export_xml_and_raw_output_from_iprscan_to_Excel.ga" stores a workflow. In the first two sections, a sequence file (fasta) can be uploaded and all InterProScan applications will be executed to generate the and xml and a raw InterProScan output file. In the third section of the workflow, those two InterProScan output files will be used as input for the iprscanToExcel program, resulting in an Excel file (.xlsx) with two tab pages.
+
+
+Author and affiliation
+----------------------
+
+Katrien Bernaerts and Domantas Motiejunas
+corresponding author: gb-ctk-open-source-support@basf.com
+10/06/2012
+
+CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+
+
+Terms of use
+--------------------------
+iprscanToExcel - Copyright (C) 2012 CropDesign N.V. - this software may be used, copied and redistributed, with or without modification freely, without advance permission, provided that the above Copyright statement is reproduced with each copy.
+THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE (INCLUDING NEGLIGENCE OR OTHERWISE).
+
+(R)Excel is a registered trademark of Microsoft Corporation in the United States and/or other countries.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/.classpath	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+	<classpathentry kind="src" path="src"/>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-scratchpad-3.8-beta5-20111217.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-ooxml-schemas-3.8-beta5-20111217.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-ooxml-3.8-beta5-20111217.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-excelant-3.8-beta5-20111217.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-examples-3.8-beta5-20111217.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/poi-3.8-beta5-20111217.jar">
+		<attributes>
+			<attribute name="javadoc_location" value="file:/home/katrien/workspace/poi-3.8-beta5/docs/apidocs/"/>
+		</attributes>
+	</classpathentry>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/lib/log4j-1.2.13.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/lib/junit-3.8.1.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/lib/commons-logging-1.1.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/ooxml-lib/xmlbeans-2.3.0.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/ooxml-lib/stax-api-1.0.1.jar"/>
+	<classpathentry kind="lib" path="/home/katrien/workspace/poi-3.8-beta5/ooxml-lib/dom4j-1.6.1.jar"/>
+	<classpathentry kind="output" path="bin"/>
+</classpath>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/.project	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>iprscanToExcel_v20_toolshed</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.eclipse.jdt.core.javabuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.eclipse.jdt.core.javanature</nature>
+	</natures>
+</projectDescription>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/.settings/org.eclipse.jdt.core.prefs	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,12 @@
+#Fri Mar 23 16:49:12 CET 2012
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.6
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.6
Binary file export_iprscan_to_Excel/source_files/iprscanToExcel_v20/bin/be/cropdesign/iprscan/EnumFlags.class has changed
Binary file export_iprscan_to_Excel/source_files/iprscanToExcel_v20/bin/be/cropdesign/iprscan/Main.class has changed
Binary file export_iprscan_to_Excel/source_files/iprscanToExcel_v20/bin/be/cropdesign/iprscan/RawToExcel.class has changed
Binary file export_iprscan_to_Excel/source_files/iprscanToExcel_v20/bin/be/cropdesign/iprscan/XMLToExcel.class has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/iprscanToExcel.props	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,13 @@
+# iprscanToExcel.PROPS
+#
+# author: Katrien Bernaerts and Domantas Motiejunas
+# affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+
+# Date: 20/4/2012
+#
+# Properties file for iprscanToExcel
+# Use a comma as delimiter between the different flags. A flag should always start with -
+#
+delimiter=,
+flags=-xml, -raw, -excel
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/src/be/cropdesign/iprscan/EnumFlags.java	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,37 @@
+package be.cropdesign.iprscan;
+
+/**
+ * @author: Katrien Bernaerts and Domantas Motiejunas
+ * @date: 21/06/2012
+ * @affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+ */
+public enum EnumFlags {
+	XML("-xml"),
+	RAW("-raw"),
+	EXCEL("-excel");
+
+	// attributes
+	private String flag;
+
+	/**
+	 * constructor
+	 * @param flag
+	 */
+	EnumFlags(String flag){
+		this.flag = flag;
+	}
+
+	/**
+	 * @param flag the flag to set
+	 */
+	public void setFlag(String flag) {
+		this.flag = flag;
+	}
+
+	/**
+	 * @return the flag
+	 */
+	public String getFlag() {
+		return flag;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/src/be/cropdesign/iprscan/Main.java	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,105 @@
+package be.cropdesign.iprscan;
+
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+
+//include POI 3.8-beta5 jars in classpath
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+/**
+ * the .jar needs as parameters the path to the XML file with the summary tables and
+ * the .raw file with the iprscan results.
+ * Further the path and filename of the exported Excel file (.xlsx) is given as a parameter.
+ * java -jar iprscan.jar -xml $XML -raw $raw -excel $excel
+ * @author: Katrien Bernaerts and Domantas Motiejunas
+ * @date: 21/06/2012
+ * @affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+ */
+public class Main {
+
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		/*
+		 * store the arguments and their corresponding flags in a HashMap.
+		 * The availability of a HashMap with key (-flag) / value (argument) pairs makes it easier
+		 * to handle the arguments and their flags.
+		 */
+		HashMap<String, String> arguments = new HashMap<String, String>();
+		if (args.length == 0){
+			System.err.println("No arguments were found at the command line.");
+		} else {
+			for(int i = 0; i < args.length; i++){
+				// for the even args. args[0] is a flag, args[1] is the corresponding argument etc.
+				if (i % 2 == 0){
+					arguments.put(args[i], args[i+1]); // define key (-flag) - value (argument) pairs
+				}
+			}
+		}
+
+
+		/*
+		 * fetch the argument values for each flag in the FLAGS enumeration
+		 */
+		String XMLFile = arguments.get(EnumFlags.XML.getFlag());
+		String rawFile = arguments.get(EnumFlags.RAW.getFlag());
+		String xlsxName = arguments.get(EnumFlags.EXCEL.getFlag());
+
+		/*
+		 * create an Excel workbook using the Apache POI library
+		 * ref: http://sanjaal.com/java/105/java-file/writing-to-excel-file-using-apache-poi/
+		 */
+		XSSFWorkbook wb = new XSSFWorkbook();
+		XSSFRow myRow = null;
+		XSSFCell myCell = null;
+
+		/*
+		 * determine which arguments (-xml or -raw or (-xml && -raw)) were used at the command line,
+		 * and execute the methods corresponding with those arguments
+		 */
+		if(arguments.containsKey(EnumFlags.XML.getFlag()) && arguments.containsKey(EnumFlags.RAW.getFlag())){
+			parseXML(wb, myRow, myCell, XMLFile);
+			parseRaw(wb, myRow, myCell, rawFile);
+		} else if (arguments.containsKey(EnumFlags.XML.getFlag())) {
+			parseXML(wb, myRow, myCell, XMLFile);
+		} else if (arguments.containsKey(EnumFlags.RAW.getFlag())) {
+			parseRaw(wb, myRow, myCell, rawFile);
+		}
+
+		/*
+		 * write the parsed results to Excel
+		 */
+		FileOutputStream fileOut;
+		try {
+			fileOut = new FileOutputStream(xlsxName);
+			wb.write(fileOut);
+			fileOut.close();
+		} catch (FileNotFoundException fnf) {
+			System.err.println("Filenot found: " + fnf);
+		} catch (IOException io) {
+			System.err.println("Input/output error: " + io);
+		}
+	}
+
+	/**
+	 * generate Excel tabsheet 2 containing the summary tables obtained by parsing the XML file
+	 */
+	public static void parseXML(XSSFWorkbook wb, XSSFRow myRow, XSSFCell myCell, String XMLFile){
+		XSSFSheet sheet2 = wb.createSheet("summary tables");
+		new XMLToExcel(wb, sheet2, myRow, myCell, XMLFile);
+	}
+
+	/**
+	 * generate tabsheet3: write the iprscan results in the .raw file to Excel, tabsheet "iprscan results"
+	 */
+	public static void parseRaw(XSSFWorkbook wb, XSSFRow myRow, XSSFCell myCell, String rawFile){
+		XSSFSheet sheet3 = wb.createSheet("iprscan results");
+		new RawToExcel(wb, sheet3,myRow, myCell, rawFile);
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/src/be/cropdesign/iprscan/RawToExcel.java	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,403 @@
+package be.cropdesign.iprscan;
+/**
+ * Converts the *.raw output file of the InterProScan program to an Excel file (*.xlsx)
+ * @author: Katrien Bernaerts and Domantas Motiejunas
+ * @date: 21/06/2012
+ * @affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+ */
+import java.awt.Color;
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFColor;
+import org.apache.poi.xssf.usermodel.XSSFDataFormat;
+import org.apache.poi.xssf.usermodel.XSSFFont;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+public class RawToExcel {
+
+	private int colnr;
+	private int maxNr;
+	private int rownr;
+	private int maxNrOfGOTerms;
+
+	/**
+	 * fields needed to generate Excel
+	 */
+	private XSSFWorkbook wb;
+	private XSSFSheet sheet;
+	private XSSFRow myRow;
+	private XSSFCell myCell;
+	private String rawFile;
+	private boolean interproInfo; // this info is not standard in every .raw file, but can occur
+	private boolean GOInfo; // this info is not standard in every .raw file, but can occur
+
+	private ArrayList<String> titles;
+
+	/**
+	 * constructor
+	 * @param wb
+	 * @param sheet3
+	 * @param myRow
+	 * @param myCell
+	 * @param rawFile
+	 */
+	public RawToExcel(XSSFWorkbook wb, XSSFSheet sheet3,XSSFRow myRow, XSSFCell myCell, String rawFile){
+		this.wb = wb;
+		this.sheet = sheet3;
+		this.myRow = myRow;
+		this.myCell = myCell;
+		this.rawFile = rawFile;
+		maxNr = 0;
+		maxNrOfGOTerms = 0;
+		interproInfo = false;
+		GOInfo = false;
+		titles = new ArrayList<String>();
+		parseRaw();
+		addHeaderTitles();
+		formatStyle();
+	}
+
+	/**********************************
+	 * parsing of the .raw file content
+	 **********************************/
+	/**
+	 * method to parse the data in the .raw file with a BufferedReader/FileReader. The parsed data
+	 * are written to Excel using the Apache POI library.
+	 */
+	public void parseRaw() {
+		BufferedReader reader = null;
+		try {
+			reader = new BufferedReader(new FileReader(rawFile));
+
+			String line = reader.readLine();
+
+			/*
+			 * start with rownr = 1 to write content because an empty row has to be reserved for the titles
+			 * The title row cannot be filled yet at this point because we first have to determine how many
+			 * columns are needed, and by consequence, how many titles. The problem is that different .raw files
+			 * can have a different amount of columns.
+			 * For each line read with the buffered reader, the number of columns is compared to the maxColnr,
+			 * because sometimes not all lines of the same file have the same number of columns, e.g. because the
+			 * last column is empty. However, if for some rows the last column is empty, a column title is still needed
+			 * for the other rows. Therefore, the method getMaxColumns(colnr) is called for each line.
+			 */
+			rownr = 1;
+			while(line != null) {
+				String[] splits = line.split("\t"); // tab delimited file
+				myRow = sheet.createRow(rownr);
+				colnr = 0;
+				for (String string : splits){
+					myCell = myRow.createCell(colnr);
+					myCell.setCellValue(string);
+
+					/*
+					 * interProScan info is not present in all raw files. For the creation of the right
+					 * header titles, it is important to know whether the parsed raw file contained
+					 * interProScan info
+					 */
+					if (string.contains("IPR")){
+						interproInfo = true;
+					}
+
+					/*
+					 * format the cell content as Integer for the columns protein length (colnr=2),
+					 * start (colnr=6) or end (colnr=7). To know which input only contains integers,
+					 * a regex is used. If only numbers or spaces are found in the input string,
+					 * and if the input string is not empty, the corresponding Excel cell is
+					 * formatted as Integer.
+					 * If the cell content is not formatted as number, sorting etc. via the filters
+					 * in the headers does not work correct.
+					 */
+					if (checkRegex("^([0-9]+\\s*)*$", string)){
+						myCell.setCellValue(Integer.parseInt(string));
+					}
+
+					/*
+					 * create a cell style that formats numbers in scientific notation (exponential)
+					 * for the score column (index 8)
+					 */
+					if (checkRegex("^[-+]?([0-9]*\\.?[0-9]+([eE][-+]?[0-9]+))*$", string)){
+						formatExponential(string);
+					}
+					/*
+					 * split up the line with GO classification information further such
+					 * that the splitted line of GO information can be stored in different
+					 * Excel cells instead of all information concatenated into one cell (like it is
+					 * in the original .raw file generated by iprscan). First the information
+					 * in the splitted line is stored in a double array. In a second step (at the end of the
+					 * current method, the double array content is written to Excel.
+					 */
+					if (string.contains("GO:")){
+						GOInfo = true;
+						splitGOTerms(string);
+					}
+					getMax(colnr);
+					colnr++;
+				}
+				line = reader.readLine();
+				rownr++;
+				myRow = sheet.createRow(rownr);
+			}
+		} catch (FileNotFoundException e) {
+				System.err.println("The .raw file cannot be found.");
+		} catch (IOException e) {
+			System.err.println("An input/output exception occurred while reading the .raw file.");
+		}
+		finally {
+			if (reader != null) {
+				try {
+					reader.close();
+				}
+				catch (IOException e){}
+			}
+		}
+	}
+
+	/**
+	 * Helper method for parseRaw()
+	 * Split a string containing GO information. A typical string looks like:
+	 * "Molecular Function: sequence-specific DNA binding transcription factor activity (GO:0003700), Cellular Component: nucleus (GO:0005634), Biological Process: regulation of transcription, DNA-dependent (GO:0006355), Molecular Function: sequence-specific DNA binding (GO:0043565)"
+	 * or in more general terms:
+	 * "Title1: description1 (GO number1), Title2: description2 (GO number2), Title3: description3 (GO number3)"
+	 * The string should be splitted in three parts: title, description and GO number.
+	 * In fact we are dealing with comma delimited strings, but split may not happen at every comma,
+	 * only when comma is preceded by (
+	 * Split may for example not happen at the comma in case of "Molecular Function: transferase activity,
+	 * transferring phosphorus-containing groups (GO:0016772)"
+	 * In order to assure that the splitting occurs at the right place, the comma's at the places where splitting has to
+	 * occur are replaced by the unique splitting character ;
+	 * @return
+	 */
+	public void splitGOTerms(String string){
+		if (string != null &&!string.isEmpty() && !string.trim().isEmpty()){
+
+			String modifiedString = string.replace("),", ");");
+			/*
+			 * the modifiedString is splitted at the ;
+			 */
+			String[] splitsClassification = modifiedString.split("; ");
+			int numberOfGoTerms = splitsClassification.length;
+			getMaxNrOfGOTerms(numberOfGoTerms);
+			for (int i = 0; i < splitsClassification.length; i++){
+				myCell = myRow.createCell(colnr);
+				myCell.setCellValue(splitsClassification[i].substring(0, splitsClassification[i].indexOf(':'))); //title
+				colnr++;
+				myCell = myRow.createCell(colnr);
+				myCell.setCellValue(splitsClassification[i].substring(splitsClassification[i].indexOf('(')+1, splitsClassification[i].indexOf(')'))); // GO term
+				colnr++;
+				myCell = myRow.createCell(colnr);
+				myCell.setCellValue(splitsClassification[i].substring(splitsClassification[i].indexOf(':')+2, splitsClassification[i].indexOf('('))); // description)
+				colnr++;
+			}
+		}
+	}
+
+	/**
+	 * Helper method for parseRaw()
+	 * Check whether a certain input string (stringToMatch) matches a certain regular expression.
+	 * @param regex
+	 * @param stringToMatch
+	 * @return
+	 */
+	public boolean checkRegex(String regex, String stringToMatch){
+		Pattern p = Pattern.compile(regex);
+		Matcher m = p.matcher(stringToMatch);
+		if(m.matches() && stringToMatch != null &&!stringToMatch.isEmpty() && !stringToMatch.trim().isEmpty()){
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	/**********************************
+	 * header titles of the Excel sheet
+	 *********************************/
+	/**
+	 * the header titles are generated in the first row (index 0) of the spreadsheet
+	 * All the potential column titles are added to the titles ArrayList. The .raw file always contains
+	 * some fixed part (standard titles), but can also contain more information (titles which are not
+	 * required for every .raw file).
+	 * This last category of headers is only added in case the information occurs in the .raw file.
+	 */
+	public void addHeaderTitles(){
+		/*
+		 *  standard titles
+		 */
+		titles.add("protein ID");
+		titles.add("protein crc64");
+		titles.add("protein length");
+		titles.add("match dbname");
+		titles.add("classification id");
+		titles.add("classification description");
+		titles.add("start");
+		titles.add("end");
+		titles.add("score");
+		titles.add("status");
+		titles.add("date");
+		/*
+		 * titles which are not required for every .raw file
+		 */
+		if (interproInfo){ // only if the .raw file contains "IPR" boolean interproInfo becomes true
+			titles.add("interpro ID");
+			titles.add("interpro name");
+		}
+
+		if (GOInfo){
+			for (int i = 0; i < maxNrOfGOTerms; i++){
+				titles.add("title");
+				titles.add("GO number");
+				titles.add("description");
+			}
+		}
+
+		myRow = sheet.createRow(0);
+		// show the headers in the table
+		for (int i = 0; i < titles.size() ; i ++){
+			myCell = myRow.createCell(i);
+			myCell.setCellValue(titles.get(i)); // content of the headercell
+			formatHeader(); // color of the headercell
+		}
+	}
+
+	/**
+	 * method to find the number of Excel columns needed for the GO terms information (title, description and GO number)n
+	 * The method looks for the maximal number of columns needed, because sometimes there are
+	 * rows with no GO information and other rows which have GO information.
+	 * As long as in one Excel sheet there is one row with GO information, the titles for the GO information
+	 * have to be showed correctly. The method getNrOfGOTerms helps in this task.
+	 */
+	public void getMaxNrOfGOTerms(int nr){
+		if (nr > maxNrOfGOTerms){
+			maxNrOfGOTerms = nr;
+		}
+	}
+
+
+	/*******************************
+	 * formatting of the Excel sheet
+	 ******************************/
+	/**
+	 * method to apply all formatting to the Excel tabsheet containing the .raw data
+	 */
+	public void formatStyle(){
+		setAutoFilters();
+		autoSizeColumns();
+		freezeRow();
+	}
+
+	/**
+	 * make autofilters of the column headers in Excel
+	 * ref: http://stackoverflow.com/questions/3114220/poi-auto-filter
+	 */
+	public void setAutoFilters(){
+		if (GOInfo){
+			/*
+			 * for some reason, the first empty column contains an autofilter in the case there is
+			 * GO information. However, only the columns which are not empty should have and autofilter.
+			 * In order to avoid this small bug, the autofilter method was changed
+			 * slightly: maxNr -1 instead of maxNr
+			 * method: public XSSFAutoFilter setAutoFilter(CellRangeAddress, range);
+			 */
+			sheet.setAutoFilter(org.apache.poi.ss.util.CellRangeAddress.valueOf("A1:"+ (Character.toString((char)( 65+maxNr-1)))+"1"));
+		} else {
+			sheet.setAutoFilter(org.apache.poi.ss.util.CellRangeAddress.valueOf("A1:"+ (Character.toString((char)( 65+maxNr)))+"1"));
+		}
+	}
+
+	/**
+	 * set the column width automatically to the width of the content
+	 */
+	public void autoSizeColumns(){
+		for(int column = 0; column < maxNr; column++){
+			sheet.autoSizeColumn(column);
+		}
+	}
+
+	/**
+	 * helper method for setAutoFilters() and autoSizeColumns() to find the number
+	 * of columns present in the tab sheet of the Excel file.
+	 * Sometimes, there are columns which are empty in a certain row, but filled in
+	 * another row. We always have to cope with the most extreme situation. Therefore,
+	 * the maximum number of columns is determined.
+	 * @param nr
+	 */
+	public void getMax(int nr){
+		if (nr > maxNr){
+			maxNr = nr;
+		}
+	}
+
+	/**
+	 * give the header cells a blue color and bold formatting
+	 */
+	public void formatHeader(){
+		XSSFCellStyle style = wb.createCellStyle();
+	    XSSFFont font = wb.createFont();
+	    font.setColor(new XSSFColor(Color.BLUE));
+	    font.setBold(true);
+	    style.setFont(font);
+	    myCell.setCellStyle(style);
+	}
+
+	/**
+	 * freeze the header row
+	 * method: public void createFreezePane(int colSplit, int rowSplit, intleftmostColumn, int topRow)
+	 */
+	public void freezeRow(){
+		sheet.createFreezePane(0, 1, 0, 1);
+	}
+
+	/**
+	 * create a cell style that formats numbers in scientific notation (exponential)
+	 * for the score column (index 8)
+	 * Differentiate the text content (NA) from the exponential values via an if ... else
+	 * because otherwise the formatting as exponential value is not OK.
+	 */
+	public void formatExponential(String s){
+		if (s.equals("NA")){
+			myCell.setCellValue("NA");
+		}
+		else {
+			XSSFCellStyle cs = wb.createCellStyle();
+			XSSFDataFormat df = wb.createDataFormat();
+			cs.setDataFormat(df.getFormat("0.0E+0"));
+			myCell.setCellValue(Double.parseDouble(s));
+			myCell.setCellStyle(cs);
+		}
+	}
+
+	/*********************
+	 * getters and setters
+	 *********************/
+	public void setColnr(int colnr) {
+		this.colnr = colnr;
+	}
+
+	public int getColnr() {
+		return colnr;
+	}
+
+	/**
+	 * @param nrOfGOTerms the nrOfGOTerms to set
+	 */
+	public void setNrOfGOTerms(int nrOfGOTerms) {
+		this.maxNrOfGOTerms = nrOfGOTerms;
+	}
+
+	/**
+	 * @return the nrOfGOTerms
+	 */
+	public int getNrOfGOTerms() {
+		return maxNrOfGOTerms;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/source_files/iprscanToExcel_v20/src/be/cropdesign/iprscan/XMLToExcel.java	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,488 @@
+package be.cropdesign.iprscan;
+/**
+ * Converts the *.xml output file of the InterProScan program to an Excel file (*.xlsx)
+ * @author: Katrien Bernaerts and Domantas Motiejunas
+ * @date: 21/06/2012
+ * @affiliation: CropDesign N.V., a BASF Plant Science Company - Technologiepark 3, 9052 Zwijnaarde - Belgium
+ */
+import java.awt.Color;
+import java.io.File;
+import java.io.IOException;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.FactoryConfigurationError;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.poi.ss.usermodel.CellStyle;
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFCellStyle;
+import org.apache.poi.xssf.usermodel.XSSFColor;
+import org.apache.poi.xssf.usermodel.XSSFFont;
+import org.apache.poi.xssf.usermodel.XSSFRow;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+public class XMLToExcel {
+	/**
+	 * fields needed to generate Excel
+	 */
+	private XSSFWorkbook wb;
+	private XSSFSheet sheet;
+	private XSSFRow myRow;
+	private XSSFCell myCell;
+	private String XMLFile;
+
+	/**
+	 * Fields/counters
+	 */
+	private int rownr;
+	private int colnr;
+	private int maxColnr;
+
+	/**
+	 * constructor
+	 */
+	public XMLToExcel(XSSFWorkbook wb, XSSFSheet sheet,XSSFRow myRow, XSSFCell myCell, String XMLFile){
+		this.wb = wb;
+		this.sheet = sheet;
+		this.myRow = myRow;
+		this.myCell = myCell;
+		this.XMLFile = XMLFile;
+		rownr = 0;
+		colnr = 0;
+		makeDOMParser();
+		setColumnWidth();
+	}
+
+	/**
+	 * method to make a DOM parser. This method calls the method parseProteins, which parses the protein nodes, which further calls methods to parse deeper nodes.
+	 */
+	public void makeDOMParser(){
+		try {
+			File file = new File(XMLFile);
+			if (file.exists()) {
+				DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+				DocumentBuilder db = dbf.newDocumentBuilder();
+				Document doc = db.parse(file);
+				doc.getDocumentElement().normalize();
+
+				// parse protein nodes, the first level subnodes of root node interpro_matches
+				parseProteins(doc);
+			} else {
+				System.err.println("The XML file you try to convert to Excel does not exist.");
+			}
+		} catch (IOException ioe) {
+			System.err.println("Input/output exception: " + ioe);
+		} catch (SAXException saxe) {
+			System.err.println("SAX parsing exception: " + saxe);
+		} catch (FactoryConfigurationError fce) {
+			System.err.println("Factory configuration error: " + fce);
+		} catch (ParserConfigurationException pce) {
+			System.err.println("Parser configuration exception: " + pce);
+		}
+	}
+
+	/**************************
+	 * parse nodes and subnodes
+	 *************************/
+	/**
+	 * method parseProteins to parse the protein nodes, the first level subnodes of root node interpro_matches
+	 */
+	public void parseProteins(Document doc){
+		// parse through the protein nodes
+		NodeList proteinLst = doc.getElementsByTagName("protein");
+		if (proteinLst != null && proteinLst.getLength() > 0) {
+			for (int i = 0; i < proteinLst.getLength(); i++) {
+				Node proteinNode = proteinLst.item(i);
+				if (proteinNode.getNodeType() == Node.ELEMENT_NODE ) {
+					Element proteinElement = (Element) proteinNode;
+					Attr proteinId = proteinElement.getAttributeNode("id");
+
+					myRow = sheet.createRow((short)rownr);
+					makeCellText(colnr, "PROTEIN");
+					formatTitle();
+					makeCellText(colnr+1, proteinId.getValue());
+					formatTitle();
+					makeRow();
+
+					// parse the child nodes of the protein nodes, namely interpro
+					parseInterpro(proteinNode);
+				}// end if proteinNode
+			}// end if proteinLst
+		} else {
+			System.out.print("XML file empty.");
+		}
+	}
+
+	/**
+	 * parse interpro nodes, which are child nodes of the protein nodes
+	 */
+	public void parseInterpro(Node proteinNode){
+		// parse through the interpro nodes, which are subnodes of the protein node
+		NodeList interproLst = proteinNode.getChildNodes();
+		if (interproLst != null	&& interproLst.getLength() > 0) {
+			for (int j = 0; j < interproLst.getLength(); j++) {
+				Node interproNode = interproLst.item(j);
+				if (interproNode.getNodeType() == Node.ELEMENT_NODE && interproNode.getNodeName().equals("interpro")) {
+					Element interproElement = (Element) interproNode;
+					Attr interproId = interproElement.getAttributeNode("id");
+					makeCellText(colnr, interproId.getValue());
+					makeBold();
+					Attr interproName = interproElement.getAttributeNode("name");
+					makeCellText(colnr+1, interproName.getValue());
+					makeBold();
+					Attr interproParentId = interproElement.getAttributeNode("parent_id");
+					makeRow();
+
+					/*
+					 *  parse match node, a child node of the interpro node
+					 */
+					parseMatchInterproNode(interproNode);
+
+					/*
+					 * parent information from interpro attribute
+					 */
+					makeCellText(colnr, "parent ");
+					makeItalicRight();
+					if (interproParentId != null) {
+						makeCellText(colnr+1, interproParentId.getValue());
+						setUpperBorder();
+					}
+					else {
+						makeCellText(colnr+1, "No parent");
+						setUpperBorder();
+					}
+					makeRow();
+
+					/*
+					 * child_list subnode from interpro
+					 */
+					parseInterproChild("child_list ", "No children", interproNode);
+
+					/*
+					 *  found_in subnode of interpro
+					 */
+					parseInterproChild("found_in ", "No entries", interproNode);
+
+					/*
+					 *  contains subnode of interpro
+					 */
+					parseInterproChild("contains ", "No entries", interproNode);
+
+					/*
+					 *  GO-terms subnode of interpro
+					 */
+					parseGOterms(interproNode);
+
+				}// end if interproNode
+			}// end for interproLst, counter j
+			makeRow();
+		}// end if interproLst
+	}
+
+	/**
+	 * method parseMatch to parse through the match nodes.
+	 * The match nodes are child nodes of the interpro nodes
+	 */
+	public void parseMatchInterproNode(Node interproNode){
+		makeTableHeaders();
+		// parse through the child nodes of a node
+		NodeList interproChildLst = interproNode.getChildNodes();
+		if (interproChildLst != null && interproChildLst.getLength() > 0) {
+			for (int k = 0; k < interproChildLst.getLength(); k++) {
+				Node interproChildNode = interproChildLst.item(k);
+				if (interproChildNode.getNodeType() == Node.ELEMENT_NODE && interproChildNode.getNodeName().equals("match")) {
+					Element matchElement = (Element) interproChildNode;
+					getMatchAttributes(matchElement);
+					// parse the location information of the matches. Location nodes are subnodes of match nodes.
+					parseLocationMatch(interproChildNode);
+				}// end if interproChildNode match
+			}// end for interproChildLst, counter k
+		}// end if interproChildLst
+		/*
+		 * border line under table
+		 */
+		for (int i = 1; i < 8; i++){
+			makeCellText(i, " ");
+			setUpperBorder();
+		}
+	}
+
+	/**
+	 * method parseLocation to parse through the location nodes, which are child nodes of the match node.
+	 * The match nodes can be one of the childnode types of the interpro node or childnodes of the
+	 * protein node.
+	 */
+	public void parseLocationMatch(Node node){
+		NodeList locationLst = node.getChildNodes();
+		if (locationLst != null	&& locationLst.getLength() > 0) {
+			for (int l = 0; l < locationLst.getLength(); l++) {
+				Node locationNode = locationLst.item(l);
+				if (locationNode.getNodeType() == Node.ELEMENT_NODE) {
+					Element locationElement = (Element) locationNode;
+					getLocationAttributes(locationElement);
+				}// end if locationNode
+			}// end for locationLst, counter l
+		} // end if locationLst
+	}
+
+	/**
+	 * method parseInterProChild to parse the nodes child_list, found_in, contains, GO_terms and their contents. These nodes are childnodes of the interpro node.
+	 */
+	public void parseInterproChild(String nodeName, String emptyNode, Node interproNode){
+		makeCellText(0, nodeName);
+		makeItalicRight();
+		makeCellText(1, emptyNode);
+
+		// parse through the child nodes of an interpro node
+		NodeList interproChildLst = interproNode.getChildNodes();
+		if (interproChildLst != null && interproChildLst.getLength() > 0) {
+			for (int k = 0; k < interproChildLst.getLength(); k++) {
+				Node interproChildNode = interproChildLst.item(k);
+				// parse the child node of the interpro node with as name nodeName (parameter of the method)
+				if (interproChildNode.getNodeType() == Node.ELEMENT_NODE && interproChildNode.getNodeName().equals(nodeName.trim())) {
+					// parse through the rel_ref nodes which are childnodes of contains, found_in and child_list
+					NodeList rel_refLst = interproChildNode.getChildNodes();
+					if (rel_refLst != null	&& rel_refLst.getLength() > 0) {
+						for (int s = 0; s < rel_refLst.getLength(); s++) {
+							Node ref_relNode = rel_refLst.item(s);
+							if (ref_relNode.getNodeType() == Node.ELEMENT_NODE) {
+								Element ref_relElement = (Element) ref_relNode;
+								Attr ipr_ref = ref_relElement.getAttributeNode("ipr_ref");
+								colnr++;
+								makeCellText(colnr, ipr_ref.getValue());
+							}// end if ref_relNode
+						}//end for ref_relLst
+						getMaxColumns(colnr + rel_refLst.getLength() + 1);
+						colnr = 0;//reset colnr to 0 in order to let the other items start in the column with id 0
+					}//end if relf_refLst
+				} // end if interproChildNode childnode
+			}// end for interproChildLst childnode
+		}// end if interproChildLst childnode
+		makeRow();
+	}
+
+	/**
+	 * parse classification node, which is a subnode of the interpro node
+	 */
+	public void parseGOterms(Node interproNode){
+		makeCellText(0, "GO terms ");
+		makeItalicRight();
+		makeCellText(1, "no GO terms");
+		int rowGO = rownr;
+
+		// parse through the child nodes of an interpro node
+		NodeList interproChildLst = interproNode.getChildNodes();
+		if (interproChildLst != null && interproChildLst.getLength() > 0) {
+			for (int k = 0; k < interproChildLst.getLength(); k++) {
+				Node interproChildNode = interproChildLst.item(k);
+				if (interproChildNode.getNodeType() == Node.ELEMENT_NODE && interproChildNode.getNodeName().equals("classification")) {
+					Element classificationElement = (Element) interproChildNode;
+					Attr classificationId = classificationElement.getAttributeNode("id");
+					makeCellText(colnr+2, classificationId.getValue());
+					// parse through the child nodes of a classification node
+					NodeList classificationLst = classificationElement.getChildNodes();
+					if (classificationLst != null && classificationLst.getLength() > 0) {
+						for (int l=0; l < classificationLst.getLength(); l++) {
+							Node classificationChildNode = classificationLst.item(l);
+							if (classificationChildNode.getNodeType() == Node.ELEMENT_NODE && classificationChildNode.getNodeName().equals("category")) {
+								Element categoryElement = (Element) classificationChildNode;
+								makeCellText(colnr+1, categoryElement.getTextContent());
+							}
+							if (classificationChildNode.getNodeType() == Node.ELEMENT_NODE && classificationChildNode.getNodeName().equals("description")) {
+								Element descriptionElement = (Element) classificationChildNode;
+								makeCellText(colnr+3, descriptionElement.getTextContent());
+							}
+						}
+					}// end if classificationLst
+					makeRow();
+				}// end if interproChildNode classification
+			}// end for interproChildLst, counter k
+			getMaxColumns(colnr + interproChildLst.getLength() + 1);
+		}// end if interproChildLst
+		/*
+		 * insert empty row after the row GO term
+		 */
+		if (sheet.getRow(rowGO).getCell(1).toString().equals("no GO terms")){
+			makeRow();
+		}
+		makeRow();
+	}
+
+	/**
+	 * Get the attribute values of the match elements and fill them in in Excel
+	 * @param matchElement
+	 */
+	public void getMatchAttributes(Element matchElement){
+		Attr matchDbname = matchElement.getAttributeNode("dbname");
+		makeCellText(colnr+1, matchDbname.getValue());
+		Attr matchId = matchElement.getAttributeNode("id");
+		makeCellText(colnr+2, matchId.getValue());
+		Attr matchName = matchElement.getAttributeNode("name");
+		makeCellText(colnr+3, matchName.getValue());
+		wrapText();//if the cell content does not fit the width of the column, the text is automatically wrapped
+	}
+
+	/**
+	 * Get the attribute values of the location elements and fill them in in Excel
+	 * @param locationElement
+	 */
+	public void getLocationAttributes(Element locationElement){
+		// get the desired attribute values of the location node in the desired order
+		Attr locationScore = locationElement.getAttributeNode("score");
+		Attr locationStart = locationElement.getAttributeNode("start");
+		Attr locationEnd = locationElement.getAttributeNode("end");
+		Attr locationStatus = locationElement.getAttributeNode("status");
+		makeCellText(colnr+4, locationScore.getValue());
+		makeCellNumber(colnr+5, Integer.parseInt(locationStart.getValue()));
+		makeCellNumber(colnr+6, Integer.parseInt(locationEnd.getValue()));
+		makeCellText(colnr+7, locationStatus.getValue());
+		makeRow();
+	}
+
+	/******************************
+	 * make rows and cells in Excel
+	 ******************************/
+	/**
+	 * method to fill cells of a row in an Excel sheet with text content
+	 */
+	public void makeCellText(int colnr, String content){
+		myCell = myRow.createCell((short)colnr);
+		myCell.setCellValue(content);
+	}
+
+	/**
+	 * method to fill cells of a row in an Excel sheet with number formatted content
+	 */
+	public void makeCellNumber(int colnr, int content){
+		myCell = myRow.createCell((short)colnr);
+		myCell.setCellValue(content);
+	}
+
+	/**
+	 * method to make a new row in an Excel sheet
+	 */
+	public void makeRow(){
+		rownr++;
+		myRow = sheet.createRow((short)rownr);
+	}
+
+	/***********************************
+	 * table headers and titles in Excel
+	 ***********************************/
+	/**
+	 * method to make table headers for the matches table
+	 */
+	public void makeTableHeaders(){
+		// headers
+		makeCellText(0, "matches ");
+		makeItalicRight();
+		//headers of table
+		String[] headers = {"Method", "Identifier", "Description", "Score", "Start", "End", "Status"};
+		for (int i =0; i < headers.length; i++){
+			makeCellText(colnr+i+1, headers[i]);
+			setTitleBorder();
+		}
+		getMaxColumns(colnr + headers.length + 1); // needed for method setColumnWidth()
+		makeRow();
+		// end headers
+	}
+
+	/**
+	 * format the protein titles
+	 */
+	public void formatTitle(){
+		XSSFCellStyle style = wb.createCellStyle();
+	    XSSFFont font = wb.createFont();
+	    font.setColor(new XSSFColor(Color.BLUE));
+	    font.setBold(true);
+	    font.setFontHeightInPoints((short)12);
+	    style.setFont(font);
+	    myCell.setCellStyle(style);
+	}
+
+	/**
+	 * provide the table headers of the matches part with border lines
+	 */
+	public void setTitleBorder(){
+		XSSFCellStyle border = wb.createCellStyle();
+		border.setBorderBottom(XSSFCellStyle.BORDER_DOUBLE);
+		border.setBorderTop(XSSFCellStyle.BORDER_THIN);
+	    myCell.setCellStyle(border);
+	}
+
+	/**
+	 * make a border
+	 */
+	public void setUpperBorder(){
+		XSSFCellStyle border = wb.createCellStyle();
+		border.setBorderTop(XSSFCellStyle.BORDER_THIN);
+	    myCell.setCellStyle(border);
+	}
+
+	/************
+	 * formatting
+	 ***********/
+	/**
+	 * format text in bold and align text right
+	 */
+	public void makeBold(){
+		XSSFCellStyle style = wb.createCellStyle();
+	    XSSFFont font = wb.createFont();
+	    font.setBold(true);
+	    style.setFont(font);
+	    myCell.setCellStyle(style);
+	}
+
+	/**
+	 * format text as italic and align right
+	 */
+	public void makeItalicRight(){
+		XSSFCellStyle style = wb.createCellStyle();
+	    XSSFFont font = wb.createFont();
+	    font.setItalic(true);
+	    style.setFont(font);
+	    style.setAlignment(CellStyle.ALIGN_RIGHT);
+	    myCell.setCellStyle(style);
+	}
+
+	/**
+	 * adapt column width depending on the content, except for some cases where the column width would become too big
+	 */
+	public void setColumnWidth(){
+		for (int col = 0; col < maxColnr; col++){
+			sheet.autoSizeColumn(col);
+		}
+		//for some columns, fixed size is desired above autosize
+		sheet.setColumnWidth(1, 256*15);
+		sheet.setColumnWidth(3, 256*27);
+	}
+
+	/**
+	 * Helper method for setColumnWitdt()
+	 * Method determines the number of columns which is filled with content
+	 * This number is needed to loop through the columns and autofit their width with method setColumnWidth()
+	 * @return
+	 */
+	public void getMaxColumns(int colnr){
+		if (colnr > maxColnr){
+			maxColnr = colnr;
+		}
+	}
+
+	/**
+	 * if text does not fit on one line, this method wraps the text
+	 */
+	public void wrapText(){
+		XSSFCellStyle wrap=wb.createCellStyle();
+		wrap.setWrapText(true);
+		myCell.setCellStyle(wrap);
+	}
+}
Binary file export_iprscan_to_Excel/source_files/libraries/commons-logging-1.1.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/dom4j-1.6.1.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/junit-3.8.1.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/log4j-1.2.13.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-examples-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-excelant-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-ooxml-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-ooxml-schemas-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/poi-scratchpad-3.8-beta5-20111217.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/stax-api-1.0.1.jar has changed
Binary file export_iprscan_to_Excel/source_files/libraries/xmlbeans-2.3.0.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/testfiles/Galaxy14-[Interproscan_calculation_on_data_3]_input.xml	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,38 @@
+<interpro_matches>
+
+   <protein id="MADS-box_protein__AGL45_" length="249" crc64="5518E4F05E8B7962" >
+	<interpro id="IPR002100" name="Transcription factor, MADS-box" type="Domain">
+	  <classification id="GO:0003677" class_type="GO">
+	    <category>Molecular Function</category>
+	    <description>DNA binding</description>
+	  </classification>
+	  <classification id="GO:0046983" class_type="GO">
+	    <category>Molecular Function</category>
+	    <description>protein dimerization activity</description>
+	  </classification>
+	  <match id="PR00404" name="MADSDOMAIN" dbname="PRINTS">
+	    <location start="3" end="23" score="1.3e-10" status="T" evidence="FPrintScan" />
+	    <location start="23" end="38" score="1.3e-10" status="T" evidence="FPrintScan" />
+	    <location start="38" end="59" score="1.3e-10" status="T" evidence="FPrintScan" />
+	  </match>
+	  <match id="PF00319" name="SRF-TF" dbname="PFAM">
+	    <location start="11" end="50" score="5.2e-15" status="T" evidence="HMMPfam" />
+	  </match>
+	  <match id="SM00432" name="no description" dbname="SMART">
+	    <location start="1" end="60" score="3.4e-24" status="T" evidence="HMMSmart" />
+	  </match>
+	  <match id="SSF55455" name="SRF-like" dbname="SUPERFAMILY">
+	    <location start="2" end="96" score="4.3e-25" status="T" evidence="superfamily" />
+	  </match>
+	</interpro>
+	<interpro id="noIPR" name="unintegrated" type="unintegrated">
+	  <match id="PTHR11945" name="FAMILY NOT NAMED" dbname="PANTHER">
+	    <location start="2" end="234" score="8.7e-189" status="T" evidence="HMMPanther" />
+	  </match>
+	  <match id="PTHR11945:SF63" name=" MADS-BOX PROTEIN (AGL45)" dbname="PANTHER">
+	    <location start="2" end="234" score="8.7e-189" status="T" evidence="HMMPanther" />
+	  </match>
+	</interpro>
+   </protein>
+
+</interpro_matches>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/export_iprscan_to_Excel/testfiles/Galaxy16-[Interproscan_calculation_on_data_3]_input.raw	Tue Mar 05 04:00:19 2013 -0500
@@ -0,0 +1,8 @@
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	superfamily	SSF55455	SRF-like	2	96	4.3e-25	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	FPrintScan	PR00404	MADSDOMAIN	3	23	1.3e-10	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	FPrintScan	PR00404	MADSDOMAIN	23	38	1.3e-10	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	FPrintScan	PR00404	MADSDOMAIN	38	59	1.3e-10	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	HMMSmart	SM00432	no description	1	60	3.4e-24	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	HMMPanther	PTHR11945:SF63	 MADS-BOX PROTEIN (AGL45)	2	234	8.7e-189	T	22-jun-2012	NULL	NULL
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	HMMPanther	PTHR11945	FAMILY NOT NAMED	2	234	8.7e-189	T	22-jun-2012	NULL	NULL
+MADS-box_protein__AGL45_	5518E4F05E8B7962	249	HMMPfam	PF00319	SRF-TF	11	50	5.2e-15	T	22-jun-2012	IPR002100	Transcription factor, MADS-box	Molecular Function: DNA binding (GO:0003677), Molecular Function: protein dimerization activity (GO:0046983)
Binary file export_iprscan_to_Excel/testfiles/example_Excel_output_file.xlsx has changed