image-to-data
Extract data from construction images using AI Vision. Analyze site photos, scanned documents, drawings.
安装 / 下载方式
TotalClaw CLI推荐
totalclaw install github:LeoYeAI~openclaw-master-skills~image-to-datacURL直接下载,无需登录
curl -fsSL https://skills.taituai.com/api/skills/github%3ALeoYeAI~openclaw-master-skills~image-to-data/file -o image-to-data.md# Image To Data
## Overview
Based on DDC methodology (Chapter 2.4), this skill extracts structured data from construction images using computer vision, OCR, and AI models to analyze site photos, scanned documents, and drawings.
**Book Reference:** "Преобразование данных в структурированную форму" / "Data Transformation to Structured Form"
## Quick Start
```python
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Any, Tuple
from datetime import datetime
import json
import base64
class ImageType(Enum):
"""Types of construction images"""
SITE_PHOTO = "site_photo"
SCANNED_DOCUMENT = "scanned_document"
FLOOR_PLAN = "floor_plan"
ELEVATION = "elevation"
DETAIL_DRAWING = "detail_drawing"
PROGRESS_PHOTO = "progress_photo"
SAFETY_PHOTO = "safety_photo"
DEFECT_PHOTO = "defect_photo"
MATERIAL_PHOTO = "material_photo"
EQUIPMENT_PHOTO = "equipment_photo"
class ExtractionType(Enum):
"""Types of data extraction"""
OCR_TEXT = "ocr_text"
TABLE = "table"
OBJECT_DETECTION = "object_detection"
MEASUREMENT = "measurement"
CLASSIFICATION = "classification"
PROGRESS = "progress"
@dataclass
class BoundingBox:
"""Bounding box for detected region"""
x: int
y: int
width: int
height: int
confidence: float = 1.0
@dataclass
class TextRegion:
"""Extracted text region from image"""
text: str
bbox: BoundingBox
confidence: float
language: str = "en"
@dataclass
class DetectedObject:
"""Detected object in image"""
label: str
bbox: BoundingBox
confidence: float
attributes: Dict[str, Any] = field(default_factory=dict)
@dataclass
class ExtractedTable:
"""Extracted table from image"""
headers: List[str]
rows: List[List[str]]
bbox: BoundingBox
confidence: float
@dataclass
class ProgressMeasurement:
"""Progress measurement from image"""
element_type: str
total_count: int
completed_count: int
percent_complete: float
area_sqft: Optional[float] = None
volume_cuft: Optional[float] = None
@dataclass
class ImageAnalysisResult:
"""Complete image analysis result"""
image_id: str
image_type: ImageType
text_regions: List[TextRegion]
detected_objects: List[DetectedObject]
tables: List[ExtractedTable]
progress: Optional[ProgressMeasurement] = None
metadata: Dict[str, Any] = field(default_factory=dict)
processing_time: float = 0.0
class OCREngine:
"""OCR engine for text extraction"""
def __init__(self, engine: str = "tesseract"):
self.engine = engine
self.supported_languages = ["en", "ru", "de", "fr", "es"]
def extract_text(
self,
image_data: bytes,
language: str = "en"
) -> List[TextRegion]:
"""Extract text from image"""
# Simulated OCR extraction (use actual OCR library in production)
# In production: pytesseract, EasyOCR, or cloud OCR services
regions = []
# Simulate detecting title block in drawing
regions.append(TextRegion(
text="PROJECT: OFFICE BUILDING",
bbox=BoundingBox(x=100, y=50, width=300, height=30, confidence=0.95),
confidence=0.95,
language=language
))
regions.append(TextRegion(
text="DRAWING: A-101",
bbox=BoundingBox(x=100, y=90, width=200, height=25, confidence=0.92),
confidence=0.92,
language=language
))
regions.append(TextRegion(
text="SCALE: 1:100",
bbox=BoundingBox(x=100, y=120, width=150, height=20, confidence=0.88),
confidence=0.88,
language=language
))
return regions
def extract_structured_text(
self,
image_data: bytes,
template: Optional[Dict] = None
) -> Dict[str, str]:
"""Extract structured text using template matching"""
# Extract text regions
regions = self.extract_text(image_data)
# Match to template fields
structured = {}
if template:
for field_name, field_config in template.items():
# Find matching region
for region in regions:
if field_config.get("keyword") in region.text.lower():
structured[field_name] = region.text
break
else:
# Default extraction
for region in regions:
if "PROJECT:" in region.text:
structured["project_name"] = region.text.split(":")[-1].strip()
elif "DRAWING:" in region.text:
structured["drawing_number"] = region.text.split(":")[-1].strip()
elif "SCALE:" in region.text:
structured["scale"] = region.text.split(":")[-1].strip()
return structured
class ObjectDetector:
"""Object detection for construction images"""
def __init__(self, model: str = "yolov8"):
self.model = model
self.construction_classes = self._load_construction_classes()
def _load_construction_classes(self) -> Dict[str, Dict]:
"""Load construction-specific object classes"""
return {
# Equipment
"excavator": {"category": "equipment", "safety_zone": 20},
"crane": {"category": "equipment", "safety_zone": 30},
"forklift": {"category": "equipment", "safety_zone": 10},
"concrete_mixer": {"category": "equipment", "safety_zone": 5},
"scaffolding": {"category": "equipment", "safety_zone": 5},
# Safety
"hard_hat": {"category": "ppe", "required": True},
"safety_vest": {"category": "ppe", "required": True},
"safety_glasses": {"category": "ppe", "required": False},
"harness": {"category": "ppe", "required": False},
# Materials
"rebar_bundle": {"category": "material", "unit": "bundle"},
"concrete_block": {"category": "material", "unit": "pallet"},
"lumber_stack": {"category": "material", "unit": "bundle"},
"pipe_stack": {"category": "material", "unit": "bundle"},
# Workers
"worker": {"category": "person", "track": True},
# Building elements
"column": {"category": "structure"},
"beam": {"category": "structure"},
"slab": {"category": "structure"},
"wall": {"category": "structure"},
}
def detect(
self,
image_data: bytes,
confidence_threshold: float = 0.5
) -> List[DetectedObject]:
"""Detect objects in image"""
# Simulated detection (use actual model in production)
# In production: YOLO, Faster R-CNN, etc.
detected = []
# Simulate detected objects
sample_detections = [
("worker", 0.92, BoundingBox(200, 300, 80, 180, 0.92)),
("hard_hat", 0.88, BoundingBox(210, 300, 30, 25, 0.88)),
("safety_vest", 0.85, BoundingBox(210, 340, 60, 80, 0.85)),
("scaffolding", 0.78, BoundingBox(400, 100, 200, 400, 0.78)),
("concrete_block", 0.72, BoundingBox(50, 450, 100, 50, 0.72)),
]
for label, conf, bbox in sample_detections:
if conf >= confidence_threshold:
class_info = self.construction_classes.get(label, {})
detected.append(DetectedObject(
label=label,
bbox=bbox,
confidence=conf,
attributes=class_info
))
return detected
def detect_s