Mercurial > repos > proteore > proteore_venn_diagram

--- a/venn_diagram.py	Wed Aug 29 05:03:49 2018 -0400
+++ b/venn_diagram.py	Tue Dec 18 10:06:31 2018 -0500
@@ -5,7 +5,7 @@
 import json
 import operator
 import argparse
-import re
+import re, csv
 from itertools import combinations

 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -62,6 +62,7 @@
             file_content = inputs[i][0].split()

         ids.update(file_content)
+        if 'NA' in ids : ids.remove('NA')
         comp_dict[title] = ids

     return comp_dict, title_dict
@@ -105,36 +106,37 @@

     return result

+#Write intersections of input to text output file
 def write_text_venn(json_result):
-    """
-    Write intersections of input to text output file
-    """
-    output = open("venn_diagram_text_output.txt", "w")
-    string = ""
     lines = []
     result = dict((k, v) for k, v in json_result["data"].iteritems() if v != [])
-    max_count = max(len(v) for v in result.values())
-    for i in range(max_count):
-        lines.append("")
-
-    for i in range(max_count):
-        header = ""
-        for d in range(len(result.keys())):
-            data = result.keys()[d]
-            name = "_".join([json_result["name"][x] for x in data])
-            header += name + "\t"
-            if len(result[data]) > i:
-                print("a", result[data][i])
-                lines[i] += result[data][i] + "\t"
-            else:
-                lines[i] += "\t"
-    # Strip last tab in the end of the lines
-    header = header.rstrip()
-    lines = [line.rstrip() for line in lines]
-    string += header + "\n"
-    string += "\n".join(lines)
-    output.write(string)
-    output.close()
+    for key in result :
+        if 'NA' in result[key] : result[key].remove("NA")
+    list_names = dict((k, v) for k, v in json_result["name"].iteritems() if v != [])
+    nb_lines_max = max(len(v) for v in result.values())
+
+    #get list names associated to each column
+    column_dict = {}
+    for key in result :
+        if key in list_names :
+            column_dict[key] = list_names[key]
+        else :
+            keys= list(key)
+            column_dict[key] = "_".join([list_names[k] for k in keys])
+
+    #construct tsv
+    for key in result :
+        line = [column_dict[key]]
+        line.extend(result[key])
+        if len(line) < nb_lines_max :
+            line.extend(['NA']*(nb_lines_max-len(line)))
+        lines.append(line)
+    #transpose tsv
+    lines=zip(*lines)
+
+    with open("venn_diagram_text_output.tsv", "w") as output:
+        tsv_output = csv.writer(output, delimiter='\t')
+        tsv_output.writerows(lines)

 def write_summary(summary_file, inputs):
     """
--- a/venn_diagram.xml	Wed Aug 29 05:03:49 2018 -0400
+++ b/venn_diagram.xml	Tue Dec 18 10:06:31 2018 -0500
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
-<tool id="Jvenn" name="JVenn diagram" version="1.0">
-	<description>processes lists and produces Venn diagrams
+<tool id="Jvenn" name="Venn diagram" version="2018.12.18">
+	<description>[JVenn]
 	</description>
 	<command><![CDATA[
 		python $__tool_directory__/venn_diagram.py
@@ -17,20 +17,20 @@
 	]]></command>
 	<inputs>
 		<!-- Files -->
-		<repeat name="series" title="Lists to compare" min="2" max="6" >
+		<repeat name="series" title="List to compare" min="2" max="6" >
             <conditional name="se" >
-                    <param type="select" name="input" label="Please provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
-                        <option value="list">Copy/paste your identifiers </option>
-                        <option value="file" selected="true">Input file containing your identifiers</option>
+                    <param type="select" name="input" label="Enter your list" help="Copy/paste or from a file (e.g. table)" >
+                        <option value="list">Copy/paste list </option>
+                        <option value="file" selected="true">Input file containing your list</option>
                     </param>
                     <when value="file">
-                        <param type="data" name="file" format="txt,tabular" label="Choose a file that contains your list of IDs" />
-                        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-		            	<param type="text" name="ncol" value="c1" label="Please specify the column where you would like to apply the comparison" help ='For example, fill in "c1" if you want to filter the first column' />
-		            	<param type="text" name="name" value="" label="Please enter the name of this list" help="This name will be displayed on venn diagram" />
+                        <param type="data" name="file" format="txt,tabular" label="Select your file" />
+                        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+		            	<param type="text" name="ncol" value="c1" label="Column number on which apply the comparison" help ='For example, fill in "c1" if you want to filter the first column' />
+		            	<param type="text" name="name" value="" label="Enter the name of this list" help="This name will be displayed on venn diagram" />
                     </when>
                     <when value="list">
-                        <param type="text" name="list" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
+                        <param type="text" name="list" label="Copy/paste list" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
                             <sanitizer>
                                 <valid initial="string.printable">
                                     <remove value="&apos;"/>
@@ -40,14 +40,14 @@
                                 </mapping>
                             </sanitizer>
                         </param>
-                        <param type="text" name="name" value="" label="Please enter the name of this list" help="This name will be displayed on venn diagram" />
+                        <param type="text" name="name" value="" label="Enter the name of this list" help="This name will be displayed on venn diagram" />
                     </when>
                 </conditional>
 		</repeat>
 	</inputs>
 	<outputs>
 		<data format="html" name="output_summary" label="Venn diagram" from_work_dir="venn_diagram_summary.html" />
-		<data format="tabular" name="output_text" label="Venn diagram text output" from_work_dir="venn_diagram_text_output.txt" />
+		<data format="tsv" name="output_text" label="Venn diagram text output" from_work_dir="venn_diagram_text_output.tsv" />
 	</outputs>
 	<tests>
 	    <test>
@@ -82,42 +82,51 @@
 	        <output name="output_text" file="Venn_text_output.txt" />
 	    </test>
 	</tests>
-	<help>
+	<help><![CDATA[
+
+**Description**

-This tool draw a venn diagram from lists/files using Jvenn plug-in (http://jvenn.toulouse.inra.fr/app/index.html). It also creates output files that contain common or specific elements between query and each compared lists/files.
+This tool is used for cross-comparison purpose between several lists having a common key (e.g. IDs) and draw a venn diagram from lists/files using the Jvenn plug-in (see Authors below).
+It also creates output text file that contain common or specific elements between query and each compared lists/files.
+
+-----

 **Inputs**

-* **Query file:** A file containing different information of proteins, could be output of previous components.
+Can be either a list entered in a copy/paste mode or a single or multi-columns file (txt, tsv, csv, tab, output from other tools) up to six lists/files

-* **File of a list of IDs:** .TXT format, each line contains 1 ID
-
-  AMY1A
+* **List of IDs in a copy/paste mode:** IDs have to be separated by a space (e.g. AMY1A ALB IGKC CSTA IGHA1 ACTG1)
+
+* **for example an IDs list file** in .txt format, with 1 ID per line

-  ALB
-
-  IGKC
+AMY1A
+
+ALB

-  CSTA
+IGKC

-  IGHA1
+CSTA

-  ACTG1
-
-* **List of IDs:** IDs separated by a space
+IGHA1
+
+If you use a file as input list, it is necessary to specify the column number on which to apply the comparison.

-  AMY1A ALB IGKC CSTA IGHA1 ACTG1
+-----

-If you choose a file, it is necessary to specify the column where you would like to perform the comparison.
+**Parameter**
+
+"Please enter the name of this list": each list or set should be named, this information will be reported in both output (see below)

-**Outputs**
+-----
+
+**Output**

-* **Summary file** (venn_diagram_summary.html):
-    Venn diagram: Could be downloaded as image (PNG, SVG)
+Two outputs are generated:
+
+* **Graphical file**: Venn diagram that you can either display (interactive mode) or download as image (PNG, SVG format)
+
+* **Venn text output file** : a text file containing specific elements or shared by lists/files.

-* **Venn text output file**
-    A text file containing common/specific elements among compared lists/files.
-
 -----

 .. class:: infomark
@@ -135,8 +144,7 @@
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit,Migale Bioinformatics platform

 Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
-
-	</help>
-	<citations>
-    </citations>
+]]></help>
+   <citations>
+   </citations>
 </tool>