annotate Gtf.py @ 21:884ee2a71680 draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 6ac76e7da539ca1773fb809054679f0bf8a06972-dirty
author yating-l
date Wed, 12 Apr 2017 15:05:33 -0400
parents 0152500d9acd
children 2677f1899aa8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
1 #!/usr/bin/python
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
2
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
3 import os
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
4 import tempfile
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
5
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
6 # Internal dependencies
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
7 from Datatype import Datatype
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
8 from util import subtools
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
9
19
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
10 class InfoModifiedGtf():
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
11 def __init__(self, is_modified=False, array_modified_lines=[]):
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
12 self.is_modified = is_modified
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
13 self.array_modified_lines = array_modified_lines
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
14
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
15 def get_str_modified_lines(self):
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
16 return ','.join(map(str, self.array_modified_lines))
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
17
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
18 class Gtf( Datatype ):
10
acc233161f50 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
rmarenco
parents: 1
diff changeset
19 def __init__( self, input_gtf_false_path, data_gtf):
acc233161f50 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
rmarenco
parents: 1
diff changeset
20
acc233161f50 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b1063f90004764bcf504f4340738eca5c4b1f9d
rmarenco
parents: 1
diff changeset
21 super(Gtf, self).__init__()
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
22
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
23 self.track = None
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
24
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
25 self.input_gtf_false_path = input_gtf_false_path
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
26 self.name_gtf = data_gtf["name"]
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
27 self.priority = data_gtf["order_index"]
16
3233451a3bd6 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit fc73ec22a0db3ab09c4ac13dc58f0b54ae37845c
rmarenco
parents: 13
diff changeset
28 self.track_color = data_gtf["track_color"]
17
c02720d1afee planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1adbf397de1fc7af4d91e026093d7fff983e21cf
rmarenco
parents: 16
diff changeset
29 # TODO: Think about how to avoid repetition of the group_name everywhere
c02720d1afee planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1adbf397de1fc7af4d91e026093d7fff983e21cf
rmarenco
parents: 16
diff changeset
30 self.group_name = data_gtf["group_name"]
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
31
11
d05236b15f81 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
rmarenco
parents: 10
diff changeset
32 #print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf)
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
33
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
34 # TODO: See if we need these temporary files as part of the generated files
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
35 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred")
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
36 unsorted_bigGenePred_file = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsorted.bigGenePred")
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
37 sorted_bigGenePred_file = tempfile.NamedTemporaryFile(suffix=".sortedBed.bigGenePred")
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
38
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
39 # GtfToGenePred
19
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
40 ## Checking the integrity of the inputs
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
41 modified_gtf = self._checkAndFixGtf()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
42
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
43 ## Processing the gtf
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
44 subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name)
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
45
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
46 # TODO: From there, refactor because common use with Gff3.py
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
47 # genePredToBigGenePred processing
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
48 subtools.genePredToBigGenePred(genePredFile.name, unsorted_bigGenePred_file.name)
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
49
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
50 # Sort processing
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
51 subtools.sort(unsorted_bigGenePred_file.name, sorted_bigGenePred_file.name)
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
52
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
53 # bedToBigBed processing
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
54 trackName = "".join( ( self.name_gtf, ".bb") )
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
55
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
56 auto_sql_option = os.path.join(self.tool_directory, 'bigGenePred.as')
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
57
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
58 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName)
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
59
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
60 with open(myBigBedFilePath, 'w') as bigBedFile:
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
61 subtools.bedToBigBed(sorted_bigGenePred_file.name,
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
62 self.chromSizesFile.name,
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
63 bigBedFile.name,
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
64 autoSql=auto_sql_option,
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
65 typeOption='bed12+8',
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
66 tab=True)
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
67
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
68
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
69 # Create the Track Object
11
d05236b15f81 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
rmarenco
parents: 10
diff changeset
70 self.createTrack(file_path=trackName,
d05236b15f81 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 3760d0c8353b924ecf994131a5c2eb381aa81fb2
rmarenco
parents: 10
diff changeset
71 track_name=trackName,
13
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
72 long_label=self.name_gtf, track_type='bigGenePred',
25809f699cb3 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 65ab931ef2b05a5acf06cbde3a746c94a0a0a4cb
rmarenco
parents: 11
diff changeset
73 visibility='dense', priority=self.priority,
16
3233451a3bd6 planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit fc73ec22a0db3ab09c4ac13dc58f0b54ae37845c
rmarenco
parents: 13
diff changeset
74 track_file=myBigBedFilePath,
17
c02720d1afee planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1adbf397de1fc7af4d91e026093d7fff983e21cf
rmarenco
parents: 16
diff changeset
75 track_color=self.track_color,
c02720d1afee planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1adbf397de1fc7af4d91e026093d7fff983e21cf
rmarenco
parents: 16
diff changeset
76 group_name=self.group_name)
1
fb5e60d4d18a planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff changeset
77
19
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
78 # TODO: Use Logging instead of print
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
79 if modified_gtf.is_modified:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
80 print("- Warning: Gtf %s created with a modified version of your Gtf because of start/end coordinates issues."
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
81 % self.name_gtf)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
82 print("Here are the lines removed: " + modified_gtf.get_str_modified_lines())
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
83 else:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
84 print("- Gtf %s created" % self.name_gtf)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
85
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
86 def _checkAndFixGtf(self):
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
87 """
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
88 Call _checkAndFixGtf, check the integrity of gtf file,
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
89 if coordinates exceed chromosome size, either removed the whole line(s) or truncated to the end of the scaffold
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
90 depending on the user choice
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
91 default: remove the whole line(s)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
92 """
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
93 # Set the boolean telling if we had to modify the file
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
94 modified_gtf = InfoModifiedGtf()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
95
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
96 # Create a temp gtf just in case we have issues
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
97 temp_gtf = tempfile.NamedTemporaryFile(bufsize=0, suffix=".gtf", delete=False)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
98
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
99 # TODO: Get the user choice and use it
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
100 # TODO: Check if the start > 0 and the end <= chromosome size
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
101 # Get the chrom.sizes into a dictionary to have a faster access
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
102 # TODO: Think about doing this in Datatype.py, so everywhere we have access to this read-only dictionary
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
103 dict_chrom_sizes = {}
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
104 with open(self.chromSizesFile.name, 'r') as chromSizes:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
105 lines = chromSizes.readlines()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
106 for line in lines:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
107 fields = line.split()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
108 # fields[1] should be the name of the scaffold
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
109 # fields[2] should be the size of the scaffold
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
110 # TODO: Ensure this is true for all lines
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
111 dict_chrom_sizes[fields[0]] = fields[1]
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
112
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
113 # Parse the GTF and check each line using the chrom sizes dictionary
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
114 with open(temp_gtf.name, 'a+') as tmp:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
115 with open(self.input_gtf_false_path, 'r') as gtf:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
116 lines = gtf.readlines()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
117 for index, line in enumerate(lines):
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
118 # If this is not a comment, we check the fields
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
119 if not line.startswith('#'):
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
120 fields = line.split()
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
121 # We are interested in fields[0] => Seqname (scaffold)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
122 # We are interested in fields[3] => Start of the scaffold
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
123 # We are interested in fields[4] => End of the scaffold
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
124 scaffold_size = dict_chrom_sizes[fields[0]]
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
125 start_position = fields[3]
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
126 end_position = fields[4]
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
127
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
128 if start_position > 0 and end_position <= scaffold_size:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
129 # We are good, so we copy this line
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
130 tmp.write(line)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
131 tmp.write(os.linesep)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
132
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
133
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
134 # The sequence is not good, we are going to process it regarding the user choice
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
135 # TODO: Process the user choice
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
136 # By default, we are assuming the user choice is to remove the lines: We don't copy it
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
137
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
138 # If we are here, it means the gtf has been modified
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
139 else:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
140 # We save the line for the feedback to the user
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
141 modified_gtf.array_modified_lines.append(index + 1)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
142
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
143 if modified_gtf.is_modified is False:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
144 modified_gtf.is_modified = True
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
145 else:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
146 pass
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
147 else:
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
148 tmp.write(line)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
149 tmp.write(os.linesep)
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
150
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
151 # Once the process it completed, we just replace the path of the gtf
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
152 self.input_gtf_false_path = temp_gtf.name
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
153
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
154 # TODO: Manage the issue with the fact the dataset is going to still exist on the disk because of delete=False
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
155
0152500d9acd Uploaded
rmarenco
parents: 17
diff changeset
156 return modified_gtf