Data Silo Detection
Detect and map data silos in construction organizations. Identify disconnected data sources and integration opportunities
安装 / 下载方式
TotalClaw CLI推荐
totalclaw install clawskills:datadrivenconstruction~data-silo-detectioncURL直接下载,无需登录
curl -fsSL https://skills.taituai.com/api/skills/clawskills%3Adatadrivenconstruction~data-silo-detection/file -o data-silo-detection.mdGit 仓库获取源码
git clone https://github.com/openclaw/skills/commit/eedb51b141f504a27c835665a978fd54e10a96d4# Data Silo Detection
## Overview
Based on DDC methodology (Chapter 1.2), this skill detects and maps data silos in construction organizations, identifying disconnected data sources, duplicate data, and integration opportunities.
**Book Reference:** "Технологии и системы управления в современном строительстве" / "Technologies and Management Systems in Modern Construction"
## Quick Start
```python
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Dict, Optional, Set, Tuple
from datetime import datetime
import json
from collections import defaultdict
class DataDomain(Enum):
"""Construction data domains"""
DESIGN = "design"
COST = "cost"
SCHEDULE = "schedule"
QUALITY = "quality"
SAFETY = "safety"
PROCUREMENT = "procurement"
SITE = "site"
DOCUMENT = "document"
FINANCIAL = "financial"
HR = "hr"
class SiloSeverity(Enum):
"""Severity level of data silo"""
CRITICAL = "critical" # Major business impact
HIGH = "high" # Significant inefficiency
MEDIUM = "medium" # Noticeable issues
LOW = "low" # Minor inconvenience
class DataSourceType(Enum):
"""Types of data sources"""
DATABASE = "database"
SPREADSHEET = "spreadsheet"
FILE_SHARE = "file_share"
CLOUD_APP = "cloud_app"
DESKTOP_APP = "desktop_app"
PAPER = "paper"
EMAIL = "email"
PERSONAL = "personal"
@dataclass
class DataSource:
"""Represents a data source in the organization"""
id: str
name: str
type: DataSourceType
domain: DataDomain
owner: str
department: str
users: List[str]
data_entities: List[str]
connections: List[str] = field(default_factory=list)
update_frequency: str = "unknown"
access_level: str = "department" # personal, department, organization
has_api: bool = False
last_modified: Optional[datetime] = None
@dataclass
class DataSilo:
"""Detected data silo"""
id: str
sources: List[DataSource]
domain: DataDomain
severity: SiloSeverity
issue_type: str
description: str
impact: str
affected_users: int
affected_processes: List[str]
recommendations: List[str]
estimated_cost: Optional[float] = None
@dataclass
class DuplicateData:
"""Detected duplicate data across sources"""
entity_name: str
sources: List[str]
discrepancy_rate: float # 0-1
master_source: Optional[str] = None
issues: List[str] = field(default_factory=list)
@dataclass
class SiloAnalysis:
"""Complete silo analysis results"""
organization: str
analysis_date: datetime
total_sources: int
silos_detected: List[DataSilo]
duplicates: List[DuplicateData]
connectivity_score: float
data_flow_gaps: List[Dict]
priority_actions: List[str]
integration_roadmap: Dict
class DataSiloDetector:
"""
Detect and analyze data silos in construction organizations.
Based on DDC methodology Chapter 1.2.
"""
def __init__(self):
self.domain_relationships = self._define_domain_relationships()
self.critical_entities = self._define_critical_entities()
def _define_domain_relationships(self) -> Dict[DataDomain, List[DataDomain]]:
"""Define expected relationships between domains"""
return {
DataDomain.DESIGN: [
DataDomain.COST, DataDomain.SCHEDULE,
DataDomain.PROCUREMENT, DataDomain.QUALITY
],
DataDomain.COST: [
DataDomain.DESIGN, DataDomain.SCHEDULE,
DataDomain.FINANCIAL, DataDomain.PROCUREMENT
],
DataDomain.SCHEDULE: [
DataDomain.DESIGN, DataDomain.COST,
DataDomain.SITE, DataDomain.HR
],
DataDomain.PROCUREMENT: [
DataDomain.COST, DataDomain.DESIGN,
DataDomain.SITE, DataDomain.FINANCIAL
],
DataDomain.SITE: [
DataDomain.SCHEDULE, DataDomain.SAFETY,
DataDomain.QUALITY, DataDomain.HR
],
DataDomain.QUALITY: [
DataDomain.DESIGN, DataDomain.SITE,
DataDomain.DOCUMENT
],
DataDomain.SAFETY: [
DataDomain.SITE, DataDomain.HR,
DataDomain.DOCUMENT
],
DataDomain.FINANCIAL: [
DataDomain.COST, DataDomain.PROCUREMENT,
DataDomain.HR
]
}
def _define_critical_entities(self) -> Dict[str, List[DataDomain]]:
"""Define entities that should be shared across domains"""
return {
"project": [DataDomain.DESIGN, DataDomain.COST, DataDomain.SCHEDULE],
"budget": [DataDomain.COST, DataDomain.FINANCIAL, DataDomain.PROCUREMENT],
"schedule": [DataDomain.SCHEDULE, DataDomain.SITE, DataDomain.PROCUREMENT],
"material": [DataDomain.DESIGN, DataDomain.COST, DataDomain.PROCUREMENT],
"labor": [DataDomain.HR, DataDomain.COST, DataDomain.SCHEDULE],
"subcontractor": [DataDomain.PROCUREMENT, DataDomain.COST, DataDomain.SCHEDULE],
"rfi": [DataDomain.DESIGN, DataDomain.DOCUMENT, DataDomain.SITE],
"change_order": [DataDomain.COST, DataDomain.DESIGN, DataDomain.SCHEDULE]
}
def detect_silos(
self,
organization: str,
data_sources: List[DataSource],
process_flows: Optional[List[Dict]] = None
) -> SiloAnalysis:
"""
Detect data silos in the organization.
Args:
organization: Organization name
data_sources: List of data sources to analyze
process_flows: Optional business process flows
Returns:
Complete silo analysis
"""
# Build connectivity graph
connectivity = self._build_connectivity_graph(data_sources)
# Detect isolated sources
isolated_silos = self._detect_isolated_sources(
data_sources, connectivity
)
# Detect domain silos
domain_silos = self._detect_domain_silos(data_sources)
# Detect duplicate data
duplicates = self._detect_duplicates(data_sources)
# Detect data flow gaps
flow_gaps = self._detect_flow_gaps(
data_sources, process_flows
)
# Calculate connectivity score
connectivity_score = self._calculate_connectivity_score(
data_sources, connectivity
)
# Combine all silos
all_silos = isolated_silos + domain_silos
# Prioritize silos
prioritized_silos = self._prioritize_silos(all_silos)
# Generate priority actions
priority_actions = self._generate_priority_actions(
prioritized_silos, duplicates
)
# Create integration roadmap
roadmap = self._create_integration_roadmap(
prioritized_silos, flow_gaps
)
return SiloAnalysis(
organization=organization,
analysis_date=datetime.now(),
total_sources=len(data_sources),
silos_detected=prioritized_silos,
duplicates=duplicates,
connectivity_score=connectivity_score,
data_flow_gaps=flow_gaps,
priority_actions=priority_actions,
integration_roadmap=roadmap
)
def _build_connectivity_graph(
self,
sources: List[DataSource]
) -> Dict[str, Set[str]]:
"""Build graph of source connections"""
graph = defaultdict(set)
for source in sources:
for connection in source.connections:
graph[source.id].a