Data Silo Detection

ClawSkills 作者 datadrivenconstruction v2.1.0

Detect and map data silos in construction organizations. Identify disconnected data sources and integration opportunities

源码 ↗

安装 / 下载方式

TotalClaw CLI推荐
totalclaw install clawskills:datadrivenconstruction~data-silo-detection
cURL直接下载,无需登录
curl -fsSL https://skills.taituai.com/api/skills/clawskills%3Adatadrivenconstruction~data-silo-detection/file -o data-silo-detection.md
Git 仓库获取源码
git clone https://github.com/openclaw/skills/commit/eedb51b141f504a27c835665a978fd54e10a96d4
# Data Silo Detection

## Overview

Based on DDC methodology (Chapter 1.2), this skill detects and maps data silos in construction organizations, identifying disconnected data sources, duplicate data, and integration opportunities.

**Book Reference:** "Технологии и системы управления в современном строительстве" / "Technologies and Management Systems in Modern Construction"

## Quick Start

```python
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Set, Tuple
from datetime import datetime
import json
from collections import defaultdict

class DataDomain(Enum):
    """Construction data domains"""
    DESIGN = "design"
    COST = "cost"
    SCHEDULE = "schedule"
    QUALITY = "quality"
    SAFETY = "safety"
    PROCUREMENT = "procurement"
    SITE = "site"
    DOCUMENT = "document"
    FINANCIAL = "financial"
    HR = "hr"

class SiloSeverity(Enum):
    """Severity level of data silo"""
    CRITICAL = "critical"      # Major business impact
    HIGH = "high"              # Significant inefficiency
    MEDIUM = "medium"          # Noticeable issues
    LOW = "low"                # Minor inconvenience

class DataSourceType(Enum):
    """Types of data sources"""
    DATABASE = "database"
    SPREADSHEET = "spreadsheet"
    FILE_SHARE = "file_share"
    CLOUD_APP = "cloud_app"
    DESKTOP_APP = "desktop_app"
    PAPER = "paper"
    EMAIL = "email"
    PERSONAL = "personal"

@dataclass
class DataSource:
    """Represents a data source in the organization"""
    id: str
    name: str
    type: DataSourceType
    domain: DataDomain
    owner: str
    department: str
    users: List[str]
    data_entities: List[str]
    connections: List[str] = field(default_factory=list)
    update_frequency: str = "unknown"
    access_level: str = "department"  # personal, department, organization
    has_api: bool = False
    last_modified: Optional[datetime] = None

@dataclass
class DataSilo:
    """Detected data silo"""
    id: str
    sources: List[DataSource]
    domain: DataDomain
    severity: SiloSeverity
    issue_type: str
    description: str
    impact: str
    affected_users: int
    affected_processes: List[str]
    recommendations: List[str]
    estimated_cost: Optional[float] = None

@dataclass
class DuplicateData:
    """Detected duplicate data across sources"""
    entity_name: str
    sources: List[str]
    discrepancy_rate: float  # 0-1
    master_source: Optional[str] = None
    issues: List[str] = field(default_factory=list)

@dataclass
class SiloAnalysis:
    """Complete silo analysis results"""
    organization: str
    analysis_date: datetime
    total_sources: int
    silos_detected: List[DataSilo]
    duplicates: List[DuplicateData]
    connectivity_score: float
    data_flow_gaps: List[Dict]
    priority_actions: List[str]
    integration_roadmap: Dict


class DataSiloDetector:
    """
    Detect and analyze data silos in construction organizations.
    Based on DDC methodology Chapter 1.2.
    """

    def __init__(self):
        self.domain_relationships = self._define_domain_relationships()
        self.critical_entities = self._define_critical_entities()

    def _define_domain_relationships(self) -> Dict[DataDomain, List[DataDomain]]:
        """Define expected relationships between domains"""
        return {
            DataDomain.DESIGN: [
                DataDomain.COST, DataDomain.SCHEDULE,
                DataDomain.PROCUREMENT, DataDomain.QUALITY
            ],
            DataDomain.COST: [
                DataDomain.DESIGN, DataDomain.SCHEDULE,
                DataDomain.FINANCIAL, DataDomain.PROCUREMENT
            ],
            DataDomain.SCHEDULE: [
                DataDomain.DESIGN, DataDomain.COST,
                DataDomain.SITE, DataDomain.HR
            ],
            DataDomain.PROCUREMENT: [
                DataDomain.COST, DataDomain.DESIGN,
                DataDomain.SITE, DataDomain.FINANCIAL
            ],
            DataDomain.SITE: [
                DataDomain.SCHEDULE, DataDomain.SAFETY,
                DataDomain.QUALITY, DataDomain.HR
            ],
            DataDomain.QUALITY: [
                DataDomain.DESIGN, DataDomain.SITE,
                DataDomain.DOCUMENT
            ],
            DataDomain.SAFETY: [
                DataDomain.SITE, DataDomain.HR,
                DataDomain.DOCUMENT
            ],
            DataDomain.FINANCIAL: [
                DataDomain.COST, DataDomain.PROCUREMENT,
                DataDomain.HR
            ]
        }

    def _define_critical_entities(self) -> Dict[str, List[DataDomain]]:
        """Define entities that should be shared across domains"""
        return {
            "project": [DataDomain.DESIGN, DataDomain.COST, DataDomain.SCHEDULE],
            "budget": [DataDomain.COST, DataDomain.FINANCIAL, DataDomain.PROCUREMENT],
            "schedule": [DataDomain.SCHEDULE, DataDomain.SITE, DataDomain.PROCUREMENT],
            "material": [DataDomain.DESIGN, DataDomain.COST, DataDomain.PROCUREMENT],
            "labor": [DataDomain.HR, DataDomain.COST, DataDomain.SCHEDULE],
            "subcontractor": [DataDomain.PROCUREMENT, DataDomain.COST, DataDomain.SCHEDULE],
            "rfi": [DataDomain.DESIGN, DataDomain.DOCUMENT, DataDomain.SITE],
            "change_order": [DataDomain.COST, DataDomain.DESIGN, DataDomain.SCHEDULE]
        }

    def detect_silos(
        self,
        organization: str,
        data_sources: List[DataSource],
        process_flows: Optional[List[Dict]] = None
    ) -> SiloAnalysis:
        """
        Detect data silos in the organization.

        Args:
            organization: Organization name
            data_sources: List of data sources to analyze
            process_flows: Optional business process flows

        Returns:
            Complete silo analysis
        """
        # Build connectivity graph
        connectivity = self._build_connectivity_graph(data_sources)

        # Detect isolated sources
        isolated_silos = self._detect_isolated_sources(
            data_sources, connectivity
        )

        # Detect domain silos
        domain_silos = self._detect_domain_silos(data_sources)

        # Detect duplicate data
        duplicates = self._detect_duplicates(data_sources)

        # Detect data flow gaps
        flow_gaps = self._detect_flow_gaps(
            data_sources, process_flows
        )

        # Calculate connectivity score
        connectivity_score = self._calculate_connectivity_score(
            data_sources, connectivity
        )

        # Combine all silos
        all_silos = isolated_silos + domain_silos

        # Prioritize silos
        prioritized_silos = self._prioritize_silos(all_silos)

        # Generate priority actions
        priority_actions = self._generate_priority_actions(
            prioritized_silos, duplicates
        )

        # Create integration roadmap
        roadmap = self._create_integration_roadmap(
            prioritized_silos, flow_gaps
        )

        return SiloAnalysis(
            organization=organization,
            analysis_date=datetime.now(),
            total_sources=len(data_sources),
            silos_detected=prioritized_silos,
            duplicates=duplicates,
            connectivity_score=connectivity_score,
            data_flow_gaps=flow_gaps,
            priority_actions=priority_actions,
            integration_roadmap=roadmap
        )

    def _build_connectivity_graph(
        self,
        sources: List[DataSource]
    ) -> Dict[str, Set[str]]:
        """Build graph of source connections"""
        graph = defaultdict(set)

        for source in sources:
            for connection in source.connections:
                graph[source.id].a