Phy Test Data Factory
Schema-driven test data factory generator. Reads your database schema or model definitions — Prisma schema, SQLAlchemy models, Django models, TypeORM entities, Zod schemas, Pydantic models, or raw SQL DDL — and generates ready-to-use factory functions with realistic fake data. Outputs TypeScript factory files using Faker.js, Python conftest.py using factory_boy + Faker, or raw SQL INSERT seed scripts. Respects foreign key relationships (seeds parents before children), handles enums, nullable fields, unique constraints, and generates edge-case variants (empty strings, max-length values, boundary dates). Zero external API — pure local file analysis + code generation. Triggers on "generate test data", "seed database", "test fixtures", "factory functions", "fake data from schema", "/test-data-factory".
安装 / 下载方式
totalclaw install clawskills:phy041~phy-test-data-factorycurl -fsSL https://skills.taituai.com/api/skills/clawskills%3Aphy041~phy-test-data-factory/file -o phy-test-data-factory.mdgit clone https://github.com/openclaw/skills/commit/6d9f5f72d8553b784b186fe07d479da8a1094d4d# Test Data Factory
Writing test setup is slower than writing the test itself. You have a `User` model with 12 fields, a `Post` model that requires a User, and an `Order` model that requires both. Every test file re-invents the same `createTestUser()` boilerplate — with slightly different hardcoded values that don't cover edge cases.
Paste your schema and get a complete factory module: realistic Faker-powered defaults for every field, relationship-aware ordering, and one-line overrides for specific test scenarios.
**Reads any schema format. Outputs TypeScript, Python, or SQL. Zero external APIs.**
---
## Trigger Phrases
- "generate test data", "seed my database", "test fixtures"
- "factory functions", "fake data from schema", "test data setup"
- "create test factories", "Faker from schema", "factory_boy setup"
- "generate seed data", "populate test database"
- "I need fake users/orders/products for testing"
- "/test-data-factory"
---
## How to Provide Input
```bash
# Option 1: Prisma schema
/test-data-factory schema.prisma
/test-data-factory prisma/schema.prisma
# Option 2: SQLAlchemy / Django models file
/test-data-factory models.py
/test-data-factory app/models.py
# Option 3: TypeORM entities directory
/test-data-factory src/entities/
# Option 4: Zod schemas file
/test-data-factory src/schemas/user.schema.ts
# Option 5: Raw SQL DDL
/test-data-factory --sql migrations/001_initial.sql
# Option 6: Output format override
/test-data-factory schema.prisma --output typescript
/test-data-factory models.py --output python
/test-data-factory schema.prisma --output sql
# Option 7: Include edge-case variants
/test-data-factory schema.prisma --edge-cases
# Option 8: Specific count
/test-data-factory schema.prisma --count 50
```
---
## Step 1: Detect and Parse Schema
### Prisma Schema Parser
```python
import re
from dataclasses import dataclass, field
from typing import Any
@dataclass
class PrismaField:
name: str
type: str
is_optional: bool = False
is_list: bool = False
is_id: bool = False
is_unique: bool = False
is_auto: bool = False
default: Any = None
relation: str | None = None
enum_values: list[str] = field(default_factory=list)
def parse_prisma_schema(schema_text: str) -> dict:
"""Parse Prisma schema into model definitions."""
models = {}
enums = {}
# Parse enums first
for enum_match in re.finditer(r'enum\s+(\w+)\s*\{([^}]+)\}', schema_text, re.DOTALL):
enum_name = enum_match.group(1)
values = [v.strip() for v in enum_match.group(2).split('\n')
if v.strip() and not v.strip().startswith('//')]
enums[enum_name] = values
# Parse models
for model_match in re.finditer(r'model\s+(\w+)\s*\{([^}]+)\}', schema_text, re.DOTALL):
model_name = model_match.group(1)
body = model_match.group(2)
fields = []
for line in body.split('\n'):
line = line.strip()
if not line or line.startswith('//') or line.startswith('@@'):
continue
# Parse field: name type? modifiers
parts = line.split()
if len(parts) < 2:
continue
fname = parts[0]
ftype_raw = parts[1]
is_optional = ftype_raw.endswith('?')
is_list = ftype_raw.endswith('[]')
ftype = ftype_raw.rstrip('?').rstrip('[]')
is_id = '@id' in line
is_unique = '@unique' in line
is_auto = '@default(autoincrement())' in line or '@default(auto())' in line or '@default(uuid())' in line or '@default(cuid())' in line
is_relation = '@relation' in line
default_match = re.search(r'@default\((.+?)\)', line)
default_val = default_match.group(1) if default_match else None
fields.append(PrismaField(
name=fname,
type=ftype,
is_optional=is_optional,
is_list=is_list,
is_id=is_id,
is_unique=is_unique,
is_auto=is_auto,
default=default_val,
relation=ftype if is_relation and ftype[0].isupper() else None,
enum_values=enums.get(ftype, []),
))
models[model_name] = fields
return {'models': models, 'enums': enums}
```
### SQL DDL Parser
```python
def parse_sql_ddl(sql_text: str) -> dict:
"""Parse CREATE TABLE statements."""
models = {}
for table_match in re.finditer(
r'CREATE\s+TABLE\s+(?:IF\s+NOT\s+EXISTS\s+)?[`"]?(\w+)[`"]?\s*\(([^;]+)\)',
sql_text, re.IGNORECASE | re.DOTALL
):
table_name = table_match.group(1)
columns_text = table_match.group(2)
fields = []
for col_line in columns_text.split(','):
col_line = col_line.strip()
if not col_line or col_line.upper().startswith(('PRIMARY', 'FOREIGN', 'UNIQUE', 'INDEX', 'KEY', 'CONSTRAINT')):
continue
col_match = re.match(r'[`"]?(\w+)[`"]?\s+(\w+)(\(\d+\))?(.*)$', col_line, re.IGNORECASE)
if not col_match:
continue
fname = col_match.group(1)
ftype = col_match.group(2).upper()
rest = col_match.group(4).upper()
is_nullable = 'NOT NULL' not in rest
is_auto = 'AUTO_INCREMENT' in rest or 'SERIAL' in ftype
is_unique = 'UNIQUE' in rest
fields.append(PrismaField(
name=fname,
type=ftype,
is_optional=is_nullable,
is_auto=is_auto,
is_unique=is_unique,
))
models[table_name] = fields
return {'models': models, 'enums': {}}
```
---
## Step 2: Map Types to Faker Functions
```python
# Prisma/TypeScript type → Faker.js function
FAKER_JS_MAP = {
# Primitives
'String': 'faker.lorem.words(3)',
'Int': 'faker.number.int({ min: 1, max: 10000 })',
'Float': 'faker.number.float({ min: 0, max: 1000, fractionDigits: 2 })',
'Boolean': 'faker.datatype.boolean()',
'DateTime': 'faker.date.recent({ days: 30 })',
'BigInt': 'BigInt(faker.number.int({ min: 1, max: 1000000 }))',
'Json': '{}',
'Bytes': 'Buffer.from(faker.string.alphanumeric(16))',
# Semantic overrides (based on field name)
'email': 'faker.internet.email()',
'name': 'faker.person.fullName()',
'firstName': 'faker.person.firstName()',
'lastName': 'faker.person.lastName()',
'username': 'faker.internet.username()',
'password': 'faker.internet.password({ length: 12 })',
'phone': 'faker.phone.number()',
'address': 'faker.location.streetAddress()',
'city': 'faker.location.city()',
'country': 'faker.location.country()',
'zipCode': 'faker.location.zipCode()',
'url': 'faker.internet.url()',
'imageUrl': 'faker.image.url()',
'avatar': 'faker.image.avatar()',
'bio': 'faker.lorem.paragraph()',
'description': 'faker.lorem.sentences(2)',
'title': 'faker.lorem.sentence()',
'slug': 'faker.helpers.slugify(faker.lorem.words(3))',
'color': 'faker.color.human()',
'uuid': 'faker.string.uuid()',
'ip': 'faker.internet.ip()',
'createdAt': 'faker.date.past({ years: 1 })',
'updatedAt': 'new Date()',
'deletedAt': 'null',
'publishedAt': 'faker.date.recent({ days: 90 })',
'price': 'faker.number.float({ min: 0.99, max: 999.99, fractionDigits: 2 })',
'amount': 'faker.number.int({ min: 1, max: 10000 })',
'quantity': 'faker.number.int({ min: 1, max: 100 })',
'score': 'faker.number.float({ min: 0, max: 5, fractionDigits: 1 })',
'rating': 'faker.number.int({ min: 1, max: 5 })',
'status': None, # replaced by enum values
'role': None, # replaced by enum values
'type'