image-to-data

GitHub 作者 LeoYeAI/openclaw-master-skills

Extract data from construction images using AI Vision. Analyze site photos, scanned documents, drawings.

安装 / 下载方式

TotalClaw CLI推荐

totalclaw install github:LeoYeAI~openclaw-master-skills~image-to-data

cURL直接下载，无需登录

curl -fsSL https://skills.taituai.com/api/skills/github%3ALeoYeAI~openclaw-master-skills~image-to-data/file -o image-to-data.md

# Image To Data

## Overview

Based on DDC methodology (Chapter 2.4), this skill extracts structured data from construction images using computer vision, OCR, and AI models to analyze site photos, scanned documents, and drawings.

**Book Reference:** "Преобразование данных в структурированную форму" / "Data Transformation to Structured Form"

## Quick Start

```python
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
import json
import base64

class ImageType(Enum):
    """Types of construction images"""
    SITE_PHOTO = "site_photo"
    SCANNED_DOCUMENT = "scanned_document"
    FLOOR_PLAN = "floor_plan"
    ELEVATION = "elevation"
    DETAIL_DRAWING = "detail_drawing"
    PROGRESS_PHOTO = "progress_photo"
    SAFETY_PHOTO = "safety_photo"
    DEFECT_PHOTO = "defect_photo"
    MATERIAL_PHOTO = "material_photo"
    EQUIPMENT_PHOTO = "equipment_photo"

class ExtractionType(Enum):
    """Types of data extraction"""
    OCR_TEXT = "ocr_text"
    TABLE = "table"
    OBJECT_DETECTION = "object_detection"
    MEASUREMENT = "measurement"
    CLASSIFICATION = "classification"
    PROGRESS = "progress"

@dataclass
class BoundingBox:
    """Bounding box for detected region"""
    x: int
    y: int
    width: int
    height: int
    confidence: float = 1.0

@dataclass
class TextRegion:
    """Extracted text region from image"""
    text: str
    bbox: BoundingBox
    confidence: float
    language: str = "en"

@dataclass
class DetectedObject:
    """Detected object in image"""
    label: str
    bbox: BoundingBox
    confidence: float
    attributes: Dict[str, Any] = field(default_factory=dict)

@dataclass
class ExtractedTable:
    """Extracted table from image"""
    headers: List[str]
    rows: List[List[str]]
    bbox: BoundingBox
    confidence: float

@dataclass
class ProgressMeasurement:
    """Progress measurement from image"""
    element_type: str
    total_count: int
    completed_count: int
    percent_complete: float
    area_sqft: Optional[float] = None
    volume_cuft: Optional[float] = None

@dataclass
class ImageAnalysisResult:
    """Complete image analysis result"""
    image_id: str
    image_type: ImageType
    text_regions: List[TextRegion]
    detected_objects: List[DetectedObject]
    tables: List[ExtractedTable]
    progress: Optional[ProgressMeasurement] = None
    metadata: Dict[str, Any] = field(default_factory=dict)
    processing_time: float = 0.0


class OCREngine:
    """OCR engine for text extraction"""

    def __init__(self, engine: str = "tesseract"):
        self.engine = engine
        self.supported_languages = ["en", "ru", "de", "fr", "es"]

    def extract_text(
        self,
        image_data: bytes,
        language: str = "en"
    ) -> List[TextRegion]:
        """Extract text from image"""
        # Simulated OCR extraction (use actual OCR library in production)
        # In production: pytesseract, EasyOCR, or cloud OCR services

        regions = []

        # Simulate detecting title block in drawing
        regions.append(TextRegion(
            text="PROJECT: OFFICE BUILDING",
            bbox=BoundingBox(x=100, y=50, width=300, height=30, confidence=0.95),
            confidence=0.95,
            language=language
        ))

        regions.append(TextRegion(
            text="DRAWING: A-101",
            bbox=BoundingBox(x=100, y=90, width=200, height=25, confidence=0.92),
            confidence=0.92,
            language=language
        ))

        regions.append(TextRegion(
            text="SCALE: 1:100",
            bbox=BoundingBox(x=100, y=120, width=150, height=20, confidence=0.88),
            confidence=0.88,
            language=language
        ))

        return regions

    def extract_structured_text(
        self,
        image_data: bytes,
        template: Optional[Dict] = None
    ) -> Dict[str, str]:
        """Extract structured text using template matching"""
        # Extract text regions
        regions = self.extract_text(image_data)

        # Match to template fields
        structured = {}

        if template:
            for field_name, field_config in template.items():
                # Find matching region
                for region in regions:
                    if field_config.get("keyword") in region.text.lower():
                        structured[field_name] = region.text
                        break
        else:
            # Default extraction
            for region in regions:
                if "PROJECT:" in region.text:
                    structured["project_name"] = region.text.split(":")[-1].strip()
                elif "DRAWING:" in region.text:
                    structured["drawing_number"] = region.text.split(":")[-1].strip()
                elif "SCALE:" in region.text:
                    structured["scale"] = region.text.split(":")[-1].strip()

        return structured


class ObjectDetector:
    """Object detection for construction images"""

    def __init__(self, model: str = "yolov8"):
        self.model = model
        self.construction_classes = self._load_construction_classes()

    def _load_construction_classes(self) -> Dict[str, Dict]:
        """Load construction-specific object classes"""
        return {
            # Equipment
            "excavator": {"category": "equipment", "safety_zone": 20},
            "crane": {"category": "equipment", "safety_zone": 30},
            "forklift": {"category": "equipment", "safety_zone": 10},
            "concrete_mixer": {"category": "equipment", "safety_zone": 5},
            "scaffolding": {"category": "equipment", "safety_zone": 5},

            # Safety
            "hard_hat": {"category": "ppe", "required": True},
            "safety_vest": {"category": "ppe", "required": True},
            "safety_glasses": {"category": "ppe", "required": False},
            "harness": {"category": "ppe", "required": False},

            # Materials
            "rebar_bundle": {"category": "material", "unit": "bundle"},
            "concrete_block": {"category": "material", "unit": "pallet"},
            "lumber_stack": {"category": "material", "unit": "bundle"},
            "pipe_stack": {"category": "material", "unit": "bundle"},

            # Workers
            "worker": {"category": "person", "track": True},

            # Building elements
            "column": {"category": "structure"},
            "beam": {"category": "structure"},
            "slab": {"category": "structure"},
            "wall": {"category": "structure"},
        }

    def detect(
        self,
        image_data: bytes,
        confidence_threshold: float = 0.5
    ) -> List[DetectedObject]:
        """Detect objects in image"""
        # Simulated detection (use actual model in production)
        # In production: YOLO, Faster R-CNN, etc.

        detected = []

        # Simulate detected objects
        sample_detections = [
            ("worker", 0.92, BoundingBox(200, 300, 80, 180, 0.92)),
            ("hard_hat", 0.88, BoundingBox(210, 300, 30, 25, 0.88)),
            ("safety_vest", 0.85, BoundingBox(210, 340, 60, 80, 0.85)),
            ("scaffolding", 0.78, BoundingBox(400, 100, 200, 400, 0.78)),
            ("concrete_block", 0.72, BoundingBox(50, 450, 100, 50, 0.72)),
        ]

        for label, conf, bbox in sample_detections:
            if conf >= confidence_threshold:
                class_info = self.construction_classes.get(label, {})
                detected.append(DetectedObject(
                    label=label,
                    bbox=bbox,
                    confidence=conf,
                    attributes=class_info
                ))

        return detected

    def detect_s