data-validation
使用跨语言和格式的架构验证数据。在定义 JSON 架构、使用 Zod (TypeScript) 或 Pydantic (Python)、验证 API 请求/响应形状、检查 CSV/JSON 数据完整性或在服务之间设置数据契约时使用。
安装 / 下载方式
TotalClaw CLI推荐
totalclaw install totalclaw:totalclaw~gitgoodordietrying-data-validationcURL直接下载,无需登录
curl -fsSL https://skills.taituai.com/api/skills/totalclaw%3Atotalclaw~gitgoodordietrying-data-validation/file -o gitgoodordietrying-data-validation.md## 概述(中文)
使用跨语言和格式的架构验证数据。在定义 JSON 架构、使用 Zod (TypeScript) 或 Pydantic (Python)、验证 API 请求/响应形状、检查 CSV/JSON 数据完整性或在服务之间设置数据契约时使用。
## 原文
# Data Validation
Schema-based data validation across languages and formats. Covers JSON Schema, Zod (TypeScript), Pydantic (Python), API boundary validation, data contracts, and integrity checking.
## When to Use
- Defining the shape of API request/response bodies
- Validating user input before processing
- Setting up data contracts between services
- Checking CSV/JSON file integrity before import
- Migrating data (did the ETL preserve everything?)
- Generating types or documentation from schemas
## JSON Schema
### Basic schema
```json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"required": ["name", "email", "age"],
"properties": {
"name": {
"type": "string",
"minLength": 1,
"maxLength": 100
},
"email": {
"type": "string",
"format": "email"
},
"age": {
"type": "integer",
"minimum": 0,
"maximum": 150
},
"role": {
"type": "string",
"enum": ["user", "admin", "moderator"],
"default": "user"
},
"tags": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"maxItems": 10
},
"address": {
"type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string" },
"zip": { "type": "string", "pattern": "^\\d{5}(-\\d{4})?$" }
},
"required": ["street", "city"]
}
},
"additionalProperties": false
}
```
### Common patterns
```json
// Nullable field
{ "type": ["string", "null"] }
// Union type (string or number)
{ "oneOf": [{ "type": "string" }, { "type": "number" }] }
// Conditional: if role is admin, require permissions
{
"if": { "properties": { "role": { "const": "admin" } } },
"then": { "required": ["permissions"] }
}
// Pattern properties (dynamic keys)
{
"type": "object",
"patternProperties": {
"^env_": { "type": "string" }
}
}
// Reusable definitions
{
"$defs": {
"address": {
"type": "object",
"properties": {
"street": { "type": "string" },
"city": { "type": "string" }
}
}
},
"properties": {
"home": { "$ref": "#/$defs/address" },
"work": { "$ref": "#/$defs/address" }
}
}
```
### Validate with command line
```bash
# Using ajv-cli (Node.js)
npx ajv-cli validate -s schema.json -d data.json
# Using jsonschema (Python)
pip install jsonschema
python3 -c "
import json, jsonschema
schema = json.load(open('schema.json'))
data = json.load(open('data.json'))
jsonschema.validate(data, schema)
print('Valid')
"
# Validate multiple files
for f in data/*.json; do
npx ajv-cli validate -s schema.json -d "$f" 2>&1 || echo "INVALID: $f"
done
```
## Zod (TypeScript)
### Basic schemas
```typescript
import { z } from 'zod';
// Primitives
const nameSchema = z.string().min(1).max(100);
const ageSchema = z.number().int().min(0).max(150);
const emailSchema = z.string().email();
const urlSchema = z.string().url();
// Objects
const userSchema = z.object({
name: z.string().min(1),
email: z.string().email(),
age: z.number().int().min(0),
role: z.enum(['user', 'admin', 'moderator']).default('user'),
tags: z.array(z.string()).max(10).default([]),
createdAt: z.string().datetime(),
});
// Infer TypeScript type from schema
type User = z.infer<typeof userSchema>;
// { name: string; email: string; age: number; role: "user" | "admin" | "moderator"; ... }
// Validate
const result = userSchema.safeParse(data);
if (result.success) {
console.log(result.data); // typed as User
} else {
console.log(result.error.issues); // validation errors
}
// Parse (throws on invalid)
const user = userSchema.parse(data);
```
### Advanced patterns
```typescript
// Optional and nullable
const schema = z.object({
name: z.string(),
nickname: z.string().optional(), // string | undefined
middleName: z.string().nullable(), // string | null
suffix: z.string().nullish(), // string | null | undefined
});
// Transforms (validate then transform)
const dateSchema = z.string().datetime().transform(s => new Date(s));
const trimmed = z.string().trim().toLowerCase();
const parsed = z.string().transform(s => parseInt(s, 10)).pipe(z.number().int());
// Discriminated unions (tagged unions)
const eventSchema = z.discriminatedUnion('type', [
z.object({ type: z.literal('click'), x: z.number(), y: z.number() }),
z.object({ type: z.literal('keypress'), key: z.string() }),
z.object({ type: z.literal('scroll'), delta: z.number() }),
]);
// Recursive types
const categorySchema: z.ZodType<Category> = z.object({
name: z.string(),
children: z.lazy(() => z.array(categorySchema)).default([]),
});
// Refinements (custom validation)
const passwordSchema = z.string()
.min(8)
.refine(s => /[A-Z]/.test(s), 'Must contain uppercase')
.refine(s => /[0-9]/.test(s), 'Must contain digit')
.refine(s => /[^a-zA-Z0-9]/.test(s), 'Must contain special character');
// Extend/merge objects
const baseUser = z.object({ name: z.string(), email: z.string() });
const adminUser = baseUser.extend({ permissions: z.array(z.string()) });
// Pick/omit
const createUser = userSchema.omit({ createdAt: true });
const userSummary = userSchema.pick({ name: true, email: true });
// Passthrough (allow extra fields)
const flexible = userSchema.passthrough();
// Strip unknown fields
const strict = userSchema.strict(); // Error on extra fields
```
### API validation with Zod
```typescript
// Express middleware
import { z } from 'zod';
const createUserBody = z.object({
name: z.string().min(1),
email: z.string().email(),
password: z.string().min(8),
});
app.post('/api/users', (req, res) => {
const result = createUserBody.safeParse(req.body);
if (!result.success) {
return res.status(400).json({ errors: result.error.issues });
}
const { name, email, password } = result.data;
// ... create user
});
// Query parameter validation
const listParams = z.object({
page: z.coerce.number().int().min(1).default(1),
limit: z.coerce.number().int().min(1).max(100).default(20),
sort: z.enum(['newest', 'oldest', 'name']).default('newest'),
q: z.string().optional(),
});
app.get('/api/users', (req, res) => {
const params = listParams.parse(req.query);
// params.page is a number, params.sort is typed
});
```
## Pydantic (Python)
### Basic models
```python
from pydantic import BaseModel, Field, EmailStr, field_validator
from typing import Optional
from datetime import datetime
from enum import Enum
class Role(str, Enum):
USER = "user"
ADMIN = "admin"
MODERATOR = "moderator"
class Address(BaseModel):
street: str
city: str
zip_code: str = Field(pattern=r"^\d{5}(-\d{4})?$")
class User(BaseModel):
name: str = Field(min_length=1, max_length=100)
email: EmailStr
age: int = Field(ge=0, le=150)
role: Role = Role.USER
tags: list[str] = Field(default_factory=list, max_length=10)
address: Optional[Address] = None
created_at: datetime = Field(default_factory=datetime.now)
@field_validator("name")
@classmethod
def name_must_not_be_empty(cls, v: str) -> str:
if not v.strip():
raise ValueError("name cannot be blank")
return v.strip()
# Validate
user = User(name="Alice", email="alice@example.com", age=30)
print(user.model_dump()) # dict
print(user.model_dump_json()) # JSON string
# Validation errors
try:
User(name="", email="bad", age=-1)
except Exception as e:
print(e) # Detailed validation errors
```
### Advanced patterns
```python
from pydantic import BaseModel, model_validator, ConfigDict
from typing import Literal, Union, Annotated
# Discriminated union
class ClickEvent(BaseModel):
type: Literal["click"]
x: int
y: int
class KeypressEvent(BaseModel):
type: Literal["keypress"]
key: s