Mercurial > repos > bgruening > doclayoutyolo
comparison segment_text_yolo.py @ 0:28b4dc80d58b draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/image_processing/yolo-utils/doclayoutyolo commit 80167f52fb9bd60b57e4df9d68152876171228d6
| author | bgruening |
|---|---|
| date | Fri, 13 Jun 2025 14:54:15 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:28b4dc80d58b |
|---|---|
| 1 """ | |
| 2 Segment text using DocLayout Yolo model | |
| 3 """ | |
| 4 | |
| 5 import argparse | |
| 6 import json | |
| 7 import os | |
| 8 | |
| 9 import cv2 | |
| 10 from doclayout_yolo import YOLOv10 | |
| 11 from geojson import Feature, FeatureCollection | |
| 12 from shapely.geometry import box, mapping | |
| 13 | |
| 14 | |
| 15 def load_model_and_predict( | |
| 16 model_path, input_image_path, input_confidence, image_size, output_image_path | |
| 17 ): | |
| 18 | |
| 19 model = YOLOv10(model=model_path) | |
| 20 | |
| 21 det_res = model.predict( | |
| 22 input_image_path, imgsz=int(image_size), conf=float(input_confidence) | |
| 23 ) | |
| 24 annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20) | |
| 25 cv2.imwrite(output_image_path, annotated_frame) | |
| 26 return det_res[0] | |
| 27 | |
| 28 | |
| 29 def extract_bb_crop(results, output_segmentation_coordiates): | |
| 30 bounding_boxes = [] | |
| 31 features = [] | |
| 32 for bx in results.boxes.xyxy.cpu().numpy(): | |
| 33 x1, y1, x2, y2 = bx | |
| 34 bounding_boxes.append((x1, y1, x2, y2)) | |
| 35 | |
| 36 for i, (x1, y1, x2, y2) in enumerate(bounding_boxes): | |
| 37 poly = box(x1, y1, x2, y2) | |
| 38 feature = Feature(geometry=mapping(poly), properties={"id": i}) | |
| 39 features.append(feature) | |
| 40 | |
| 41 geojson_obj = FeatureCollection(features) | |
| 42 | |
| 43 with open(output_segmentation_coordiates, "w") as f: | |
| 44 json.dump(geojson_obj, f) | |
| 45 | |
| 46 | |
| 47 if __name__ == "__main__": | |
| 48 arg_parser = argparse.ArgumentParser() | |
| 49 arg_parser.add_argument( | |
| 50 "-im", "--yolo_model", required=True, help="Input Yolo model" | |
| 51 ) | |
| 52 arg_parser.add_argument( | |
| 53 "-ii", "--input_image", required=True, help="Input image file" | |
| 54 ) | |
| 55 arg_parser.add_argument( | |
| 56 "-ie", "--input_image_ext", required=True, help="Input image file extension" | |
| 57 ) | |
| 58 arg_parser.add_argument( | |
| 59 "-ic", "--input_confidence", required=True, help="Input confidence" | |
| 60 ) | |
| 61 arg_parser.add_argument( | |
| 62 "-is", "--input_image_size", required=True, help="Input image size" | |
| 63 ) | |
| 64 arg_parser.add_argument("-oi", "--output_image", required=True, help="Output image") | |
| 65 arg_parser.add_argument( | |
| 66 "-ogj", "--output_geojson", required=True, help="Output segmented coordinates" | |
| 67 ) | |
| 68 args = vars(arg_parser.parse_args()) | |
| 69 model_path = args["yolo_model"] | |
| 70 input_image_path = args["input_image"] | |
| 71 input_ext = args["input_image_ext"] | |
| 72 confidence = args["input_confidence"] | |
| 73 image_size = args["input_image_size"] | |
| 74 output_image_path = args["output_image"] | |
| 75 output_segmentation_coordiates = args["output_geojson"] | |
| 76 | |
| 77 model_link = "yolo_model.pt" | |
| 78 input_image = f"input_image.{input_ext}" | |
| 79 output_image = f"output_image.{input_ext}" | |
| 80 | |
| 81 os.symlink(model_path, model_link) | |
| 82 os.symlink(input_image_path, input_image) | |
| 83 os.symlink(output_image_path, output_image) | |
| 84 | |
| 85 segmented_image = load_model_and_predict( | |
| 86 model_link, input_image, confidence, image_size, output_image | |
| 87 ) | |
| 88 extract_bb_crop(segmented_image, output_segmentation_coordiates) |
