# HG changeset patch # User xuebing # Date 1333213697 14400 # Node ID 3ee42ab930b63877a742a722603eecc435e5af09 # Parent 8fcf33cf58be55ea87270050a4fd34a3294dce25 Deleted selected files diff -r 8fcf33cf58be -r 3ee42ab930b6 bedClean.py --- a/bedClean.py Sat Mar 31 13:03:54 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -import sys - -def readChrSize(filename): - f = open(filename) - chrSize = {} - for line in f: - chrom,size = line.strip().split() - chrSize[chrom]=int(size) - f.close() - return chrSize - -def cleanFile(filename,chrSize,outfile): - f = open(filename) - out = open(outfile,'w') - i = 0 - for line in f: - i = i + 1 - flds = line.strip().split('\t') - if len(flds) < 3: - print 'line',i,'incomplete line:\n',line - elif chrSize.has_key(flds[0]): - if int(flds[1]) > int(flds[2]): - tmp = flds[1] - flds[1] = flds[2] - flds[2] = tmp - if int( flds[1]) < 0 or int(flds[2]) <0: - print 'line',i,'negative coordinates:\n',line - elif int(flds[2]) > chrSize[flds[0]]: - print 'line',i,'end larger than chr size:\n',line - else: - if flds[5] == '*': - flds[5] = '+' - print 'line',i,' strand * changed to +\n', line - out.write('\t'.join(flds)+'\n') - else: - print 'line',i,'chromosome',flds[0],'not found!\n',line - f.close() - out.close() - -if len(sys.argv) < 4: - print "python bedClean.py in.bed chrsizefile out.bed" - exit() -cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3]) diff -r 8fcf33cf58be -r 3ee42ab930b6 bedclean.xml --- a/bedclean.xml Sat Mar 31 13:03:54 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ - - remove off-chromosome lines - bedclean.py $input $genome $output > $log - - - - - - - - - - - - - - - - - -**Description** - -remove lines that are - -1. comment or track name lines - -2. on chr*_random - -3. or have negative coordinates - -4. or the end is larger than chromosome size - - - -