Spaces:

CatmanJr
/

YOLO11-earth

Sleeping

App Files Files Community

CatmanJr commited on Mar 16

Commit

903ab5f

verified ·

1 Parent(s): 6aee1db

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -88

app.py CHANGED Viewed

@@ -3,15 +3,10 @@ import cv2
 import numpy as np
 from ultralytics import YOLO
-# 加载YOLO模型import gradio as gr
-import cv2
-import numpy as np
-from ultralytics import YOLO
-# 加载YOLO模型
-model = YOLO('yolo11s-earth.pt')  # 加载你的模型
-# 默认类别
 default_classes = [
     'airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge',
     'chimney', 'dam', 'Expressway-Service-area', 'Expressway-toll-station',
@@ -21,29 +16,29 @@ default_classes = [
 ]
 def process_frame(frame, classes_input):
-    # 处理用户输入的类别
     if classes_input and classes_input.strip():
         classes_list = [cls.strip() for cls in classes_input.split(',')]
-        model.set_classes(classes_list)  # 设置模型的类别
     else:
-        # 如果没有输入或输入为空，则使用默认类别
         model.set_classes(default_classes)
-    # 复制帧为可写数组
     frame = frame.copy()
-    # 调整图像大小以加快处理速度（可选）
     h, w = frame.shape[:2]
-    new_size = (1024, int(h * (1024 / w))) if w > h else (int(w * (1024 / h)), 1024)
     resized_frame = cv2.resize(frame, new_size)
-    # 转换图像格式
     rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
-    # 使用模型进行检测
     results = model.predict(rgb_frame)
-    # 绘制检测结果
     for result in results:
         boxes = result.boxes
         for box in boxes:
@@ -52,90 +47,26 @@ def process_frame(frame, classes_input):
             cls = box.cls[0]
             class_name = model.names[int(cls)]
-            # 调整坐标到原始图像大小
             x1 = int(x1 * w / new_size[0])
             y1 = int(y1 * h / new_size[1])
             x2 = int(x2 * w / new_size[0])
             y2 = int(y2 * h / new_size[1])
-            # 绘制边界框和标签
             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(frame, f'{class_name}:{conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
     return frame
 def main():
-    # 创建Gradio界面
-    with gr.Blocks() as demo:
-        gr.Markdown("# YOLO11s-Earth open vocabulary detection（DIOR finetuning）")
-        with gr.Row():
-            cam_input = gr.Image(type="numpy", sources=["webcam"], streaming=True, label="Webcam")
-            classes_input = gr.Textbox(label="new classes（逗号分隔）", placeholder="exp：airplane, airport, tennis court")
-        output = gr.Image(label="results", type="numpy")
-        cam_input.stream(
-            process_frame,
-            inputs=[cam_input, classes_input],
-            outputs=output
-        )
-    # 启动Gradio应用
-    demo.launch()
-if __name__ == "__main__":
-    main()
-model = YOLO('yolo11s-earth.pt')  # 加载你的模型
-# 默认类别
-default_classes = [
-    'airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge',
-    'chimney', 'dam', 'Expressway-Service-area', 'Expressway-toll-station',
-    'golffield', 'groundtrackfield', 'harbor', 'overpass', 'ship',
-    'stadium', 'storagetank', 'tenniscourt', 'trainstation', 'vehicle',
-    'windmill'
-]
-def process_frame(frame, classes_input):
-    # 将输入的类别字符串转为列表
-    if classes_input:
-        classes_list = [cls.strip() for cls in classes_input.split(',')]
-        model.set_classes(classes_list)  # 设置模型的类别
-    else:
-        # 如果没有输入，则使用默认类别
-        model.set_classes(default_classes)
-    # 复制帧为可写数组
-    frame = frame.copy()
-    # 转换图像格式
-    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    # 使用模型进行检测
-    results = model.predict(rgb_frame)
-    # 绘制检测结果
-    for result in results:
-        boxes = result.boxes
-        for box in boxes:
-            x1, y1, x2, y2 = box.xyxy[0]
-            conf = box.conf[0]
-            cls = box.cls[0]
-            class_name = model.names[int(cls)]
-            # 绘制边界框和标签
-            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
-            cv2.putText(frame, f'{class_name}:{conf:.2f}', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
-    return frame
-def main():
-    # 创建Gradio界面
     with gr.Blocks() as demo:
-        gr.Markdown("# YOLO11s-Earth open vocabulary detection（DIOR finetuning）")
         with gr.Row():
             cam_input = gr.Image(type="numpy", sources=["webcam"], streaming=True, label="Webcam")
-            classes_input = gr.Textbox(label="new classes（逗号分隔）", placeholder="exp：airplane, airport, tennis court")
-        output = gr.Image(label="results", type="numpy", height=800)
         cam_input.stream(
             process_frame,
@@ -143,7 +74,7 @@ def main():
             outputs=output
         )
-    # 启动Gradio应用
     demo.launch()
 if __name__ == "__main__":

 import numpy as np
 from ultralytics import YOLO
+# Load YOLO model
+model = YOLO('yolo11s-earth.pt')  # Load your model
+# Default classes
 default_classes = [
     'airplane', 'airport', 'baseballfield', 'basketballcourt', 'bridge',
     'chimney', 'dam', 'Expressway-Service-area', 'Expressway-toll-station',
 ]
 def process_frame(frame, classes_input):
+    # Process user input classes
     if classes_input and classes_input.strip():
         classes_list = [cls.strip() for cls in classes_input.split(',')]
+        model.set_classes(classes_list)  # Set model classes
     else:
+        # Use default classes if no input or input is empty
         model.set_classes(default_classes)
+    # Copy frame to a writable array
     frame = frame.copy()
+    # Resize image to speed up processing (optional)
     h, w = frame.shape[:2]
+    new_size = (640, int(h * (640 / w))) if w > h else (int(w * (640 / h)), 640)
     resized_frame = cv2.resize(frame, new_size)
+    # Convert image format
     rgb_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
+    # Use model for detection
     results = model.predict(rgb_frame)
+    # Draw detection results
     for result in results:
         boxes = result.boxes
         for box in boxes:
             cls = box.cls[0]
             class_name = model.names[int(cls)]
+            # Adjust coordinates to original image size
             x1 = int(x1 * w / new_size[0])
             y1 = int(y1 * h / new_size[1])
             x2 = int(x2 * w / new_size[0])
             y2 = int(y2 * h / new_size[1])
+            # Draw bounding box and label
             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
             cv2.putText(frame, f'{class_name}:{conf:.2f}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
     return frame
 def main():
+    # Create Gradio interface
     with gr.Blocks() as demo:
+        gr.Markdown("# YOLO11s-Earth open vocabulary detection (DIOR finetuning)")
         with gr.Row():
             cam_input = gr.Image(type="numpy", sources=["webcam"], streaming=True, label="Webcam")
+            classes_input = gr.Textbox(label="New classes (comma-separated)", placeholder="e.g.: airplane, airport, tennis court")
+        output = gr.Image(label="Results", type="numpy", height=480)  # Set height to 480
         cam_input.stream(
             process_frame,
             outputs=output
         )
+    # Launch Gradio app
     demo.launch()
 if __name__ == "__main__":