Split View: LLM Structured Output 실전 가이드 — JSON Mode, Tool Use, Pydantic 스키마 검증
LLM Structured Output 실전 가이드 — JSON Mode, Tool Use, Pydantic 스키마 검증
- 들어가며
- 프로바이더별 Structured Output 비교
- Pydantic으로 스키마 검증 자동화
- LiteLLM으로 프로바이더 통합
- Instructor 라이브러리 활용
- 프로덕션 파이프라인 구축
- 마무리

들어가며
LLM의 출력을 프로그래밍적으로 처리하려면 **구조화된 형식(JSON, XML 등)**이 필수입니다. "응답을 JSON으로 줘"라고 프롬프트에 넣는 것만으로는 부족합니다 — 스키마 불일치, 누락 필드, 잘못된 타입 등 다양한 문제가 발생합니다.
이 글에서는 주요 LLM 프로바이더의 Structured Output 기능을 비교하고, 프로덕션에서 안정적으로 사용하는 방법을 다룹니다.
프로바이더별 Structured Output 비교
OpenAI: response_format + Structured Outputs
from openai import OpenAI
from pydantic import BaseModel
from typing import List, Optional
client = OpenAI()
# 방법 1: JSON Mode (기본)
response = client.chat.completions.create(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "응답을 JSON으로 반환하세요."},
{"role": "user", "content": "서울의 유명 맛집 3곳 추천해줘"}
],
response_format={"type": "json_object"}
)
# JSON은 보장되지만, 스키마는 보장되지 않음
# 방법 2: Structured Outputs (스키마 보장)
class Restaurant(BaseModel):
name: str
cuisine: str
price_range: str
rating: float
address: str
class RestaurantList(BaseModel):
restaurants: List[Restaurant]
total_count: int
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "서울의 유명 맛집을 추천해주세요."},
{"role": "user", "content": "한식 맛집 3곳"}
],
response_format=RestaurantList
)
result = response.choices[0].message.parsed
print(result.restaurants[0].name) # 타입 안전!
Anthropic: Tool Use로 Structured Output
import anthropic
from typing import List
client = anthropic.Anthropic()
# Anthropic은 Tool Use를 활용한 Structured Output
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
tools=[
{
"name": "extract_restaurants",
"description": "맛집 정보를 구조화된 형식으로 추출",
"input_schema": {
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"price_range": {
"type": "string",
"enum": ["$", "$$", "$$$", "$$$$"]
},
"rating": {"type": "number"},
"address": {"type": "string"}
},
"required": ["name", "cuisine", "price_range"]
}
},
"total_count": {"type": "integer"}
},
"required": ["restaurants", "total_count"]
}
}
],
tool_choice={"type": "tool", "name": "extract_restaurants"},
messages=[
{"role": "user", "content": "서울 한식 맛집 3곳 추천해줘"}
]
)
# Tool Use 결과에서 구조화된 데이터 추출
tool_use = next(
block for block in response.content
if block.type == "tool_use"
)
restaurants = tool_use.input["restaurants"]
Google Gemini: responseSchema
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel(
"gemini-2.0-flash",
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema={
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"rating": {"type": "number"}
}
}
}
}
}
)
)
response = model.generate_content("서울 한식 맛집 3곳")
import json
data = json.loads(response.text)
Pydantic으로 스키마 검증 자동화
기본 패턴
from pydantic import BaseModel, Field, validator
from typing import List, Optional, Literal
from enum import Enum
import json
class PriceRange(str, Enum):
CHEAP = "$"
MODERATE = "$$"
EXPENSIVE = "$$$"
VERY_EXPENSIVE = "$$$$"
class Restaurant(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
cuisine: str = Field(..., description="음식 종류")
price_range: PriceRange
rating: float = Field(..., ge=0.0, le=5.0)
address: Optional[str] = None
tags: List[str] = Field(default_factory=list, max_length=10)
@validator('rating')
def round_rating(cls, v):
return round(v, 1)
class RestaurantResponse(BaseModel):
restaurants: List[Restaurant] = Field(..., min_length=1, max_length=20)
query: str
total_count: int
# LLM 응답 파싱 + 검증
def parse_llm_response(raw_json: str) -> RestaurantResponse:
"""LLM 응답을 파싱하고 Pydantic으로 검증"""
try:
data = json.loads(raw_json)
return RestaurantResponse(**data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {e}")
except Exception as e:
raise ValueError(f"Schema validation failed: {e}")
재시도 패턴 (Self-Healing)
from tenacity import retry, stop_after_attempt, retry_if_exception_type
class StructuredOutputParser:
def __init__(self, client, model: str, schema: type[BaseModel]):
self.client = client
self.model = model
self.schema = schema
@retry(
stop=stop_after_attempt(3),
retry=retry_if_exception_type(ValueError)
)
def parse(self, prompt: str) -> BaseModel:
"""스키마 검증 실패 시 에러 메시지를 포함하여 재시도"""
schema_json = self.schema.model_json_schema()
messages = [
{
"role": "system",
"content": f"다음 JSON 스키마에 맞게 응답하세요:\n{json.dumps(schema_json, indent=2)}"
},
{"role": "user", "content": prompt}
]
# 이전 시도의 에러가 있으면 포함
if hasattr(self, '_last_error'):
messages.append({
"role": "user",
"content": f"이전 응답에서 에러가 발생했습니다: {self._last_error}\n올바른 JSON으로 다시 응답해주세요."
})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}
)
raw = response.choices[0].message.content
try:
data = json.loads(raw)
result = self.schema(**data)
if hasattr(self, '_last_error'):
del self._last_error
return result
except Exception as e:
self._last_error = str(e)
raise ValueError(str(e))
# 사용
parser = StructuredOutputParser(client, "gpt-4o", RestaurantResponse)
result = parser.parse("서울 한식 맛집 3곳 추천")
LiteLLM으로 프로바이더 통합
import litellm
from pydantic import BaseModel
class ExtractedInfo(BaseModel):
summary: str
key_points: list[str]
sentiment: str
confidence: float
# OpenAI
response = litellm.completion(
model="gpt-4o",
messages=[{"role": "user", "content": "Kubernetes 1.35 릴리스 요약해줘"}],
response_format=ExtractedInfo
)
# Anthropic (자동으로 Tool Use 변환)
response = litellm.completion(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "Kubernetes 1.35 릴리스 요약해줘"}],
response_format=ExtractedInfo
)
# Gemini
response = litellm.completion(
model="gemini/gemini-2.0-flash",
messages=[{"role": "user", "content": "Kubernetes 1.35 릴리스 요약해줘"}],
response_format=ExtractedInfo
)
# 동일한 코드로 3개 프로바이더 사용 가능!
Instructor 라이브러리 활용
# pip install instructor
import instructor
from openai import OpenAI
from pydantic import BaseModel
from typing import List
client = instructor.from_openai(OpenAI())
class Step(BaseModel):
explanation: str
output: str
class MathSolution(BaseModel):
steps: List[Step]
final_answer: str
confidence: float
# Pydantic 모델을 직접 response_model로 사용
solution = client.chat.completions.create(
model="gpt-4o",
response_model=MathSolution,
messages=[
{"role": "user", "content": "2x + 5 = 15를 풀어줘"}
],
max_retries=3 # 자동 재시도
)
print(solution.steps[0].explanation)
print(f"답: {solution.final_answer}")
# Anthropic도 동일하게 지원
import anthropic
anthropic_client = instructor.from_anthropic(anthropic.Anthropic())
solution = anthropic_client.messages.create(
model="claude-sonnet-4-20250514",
response_model=MathSolution,
max_tokens=1024,
messages=[
{"role": "user", "content": "3x - 7 = 20을 풀어줘"}
]
)
프로덕션 파이프라인 구축
FastAPI + Structured Output
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import instructor
from openai import OpenAI
app = FastAPI()
client = instructor.from_openai(OpenAI())
class ProductReview(BaseModel):
sentiment: str # positive, negative, neutral
score: float
key_phrases: List[str]
summary: str
language: str
class ReviewRequest(BaseModel):
text: str
model: str = "gpt-4o-mini"
@app.post("/analyze", response_model=ProductReview)
async def analyze_review(request: ReviewRequest):
try:
result = client.chat.completions.create(
model=request.model,
response_model=ProductReview,
messages=[
{
"role": "system",
"content": "제품 리뷰를 분석하세요."
},
{"role": "user", "content": request.text}
],
max_retries=2
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
배치 처리 파이프라인
import asyncio
from typing import List
from openai import AsyncOpenAI
import instructor
async_client = instructor.from_openai(AsyncOpenAI())
class ExtractedEntity(BaseModel):
name: str
entity_type: str
confidence: float
class EntityExtractionResult(BaseModel):
entities: List[ExtractedEntity]
text_length: int
async def extract_entities(text: str) -> EntityExtractionResult:
return await async_client.chat.completions.create(
model="gpt-4o-mini",
response_model=EntityExtractionResult,
messages=[
{"role": "system", "content": "텍스트에서 엔티티를 추출하세요."},
{"role": "user", "content": text}
]
)
async def batch_extract(texts: List[str], concurrency: int = 5):
"""동시성 제한으로 배치 처리"""
semaphore = asyncio.Semaphore(concurrency)
async def limited_extract(text):
async with semaphore:
return await extract_entities(text)
tasks = [limited_extract(text) for text in texts]
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = [r for r in results if not isinstance(r, Exception)]
failures = [r for r in results if isinstance(r, Exception)]
print(f"성공: {len(successes)}, 실패: {len(failures)}")
return successes
# 실행
texts = ["서울에 새로운 AI 스타트업이...", "삼성전자가 반도체...", ...]
results = asyncio.run(batch_extract(texts))
마무리
Structured Output은 LLM을 프로덕션 시스템에 통합하는 핵심 기술입니다:
- OpenAI:
response_format+ Structured Outputs로 스키마 100% 보장 - Anthropic: Tool Use를 활용한 간접적 방식이지만 안정적
- Instructor/LiteLLM: 프로바이더 통합으로 코드 재사용
- Pydantic: 스키마 정의 + 검증의 표준
- 재시도 패턴: Self-healing으로 안정성 확보
📝 퀴즈 (6문제)
Q1. OpenAI의 JSON Mode와 Structured Outputs의 차이는? JSON Mode는 유효한 JSON만 보장, Structured Outputs는 지정한 스키마까지 보장
Q2. Anthropic에서 Structured Output을 구현하는 방식은? Tool Use (Function Calling)를 활용하여 input_schema로 구조화된 출력을 받음
Q3. Pydantic의 Field(ge=0.0, le=5.0)은 무엇을 의미하는가? 값이 0.0 이상 5.0 이하여야 한다는 검증 조건
Q4. instructor 라이브러리의 max_retries 기능은 무엇인가? 스키마 검증 실패 시 자동으로 재시도하여 올바른 형식을 얻음
Q5. 배치 처리에서 asyncio.Semaphore의 역할은? 동시 API 호출 수를 제한하여 rate limit 초과를 방지
Q6. LiteLLM을 사용하면 얻는 가장 큰 이점은? 동일한 코드로 OpenAI, Anthropic, Gemini 등 여러 프로바이더를 전환 가능
LLM Structured Output Practical Guide — JSON Mode, Tool Use, Pydantic Schema Validation
- Introduction
- Structured Output Comparison by Provider
- Automating Schema Validation with Pydantic
- Unifying Providers with LiteLLM
- Using the Instructor Library
- Building a Production Pipeline
- Conclusion
- Quiz

Introduction
To programmatically process LLM outputs, structured formats (JSON, XML, etc.) are essential. Simply adding "respond in JSON" to a prompt is not enough — various issues arise such as schema mismatches, missing fields, and incorrect types.
In this article, we compare the Structured Output features of major LLM providers and cover how to use them reliably in production.
Structured Output Comparison by Provider
OpenAI: response_format + Structured Outputs
from openai import OpenAI
from pydantic import BaseModel
from typing import List, Optional
client = OpenAI()
# Method 1: JSON Mode (basic)
response = client.chat.completions.create(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Return your response as JSON."},
{"role": "user", "content": "Recommend 3 famous restaurants in Seoul"}
],
response_format={"type": "json_object"}
)
# JSON is guaranteed, but schema is not
# Method 2: Structured Outputs (schema guaranteed)
class Restaurant(BaseModel):
name: str
cuisine: str
price_range: str
rating: float
address: str
class RestaurantList(BaseModel):
restaurants: List[Restaurant]
total_count: int
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Recommend famous restaurants in Seoul."},
{"role": "user", "content": "3 Korean cuisine restaurants"}
],
response_format=RestaurantList
)
result = response.choices[0].message.parsed
print(result.restaurants[0].name) # Type-safe!
Anthropic: Structured Output via Tool Use
import anthropic
from typing import List
client = anthropic.Anthropic()
# Anthropic uses Tool Use for Structured Output
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
tools=[
{
"name": "extract_restaurants",
"description": "Extract restaurant information in a structured format",
"input_schema": {
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"price_range": {
"type": "string",
"enum": ["$", "$$", "$$$", "$$$$"]
},
"rating": {"type": "number"},
"address": {"type": "string"}
},
"required": ["name", "cuisine", "price_range"]
}
},
"total_count": {"type": "integer"}
},
"required": ["restaurants", "total_count"]
}
}
],
tool_choice={"type": "tool", "name": "extract_restaurants"},
messages=[
{"role": "user", "content": "Recommend 3 Korean restaurants in Seoul"}
]
)
# Extract structured data from Tool Use result
tool_use = next(
block for block in response.content
if block.type == "tool_use"
)
restaurants = tool_use.input["restaurants"]
Google Gemini: responseSchema
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel(
"gemini-2.0-flash",
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema={
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"rating": {"type": "number"}
}
}
}
}
}
)
)
response = model.generate_content("3 Korean restaurants in Seoul")
import json
data = json.loads(response.text)
Automating Schema Validation with Pydantic
Basic Pattern
from pydantic import BaseModel, Field, validator
from typing import List, Optional, Literal
from enum import Enum
import json
class PriceRange(str, Enum):
CHEAP = "$"
MODERATE = "$$"
EXPENSIVE = "$$$"
VERY_EXPENSIVE = "$$$$"
class Restaurant(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
cuisine: str = Field(..., description="Type of cuisine")
price_range: PriceRange
rating: float = Field(..., ge=0.0, le=5.0)
address: Optional[str] = None
tags: List[str] = Field(default_factory=list, max_length=10)
@validator('rating')
def round_rating(cls, v):
return round(v, 1)
class RestaurantResponse(BaseModel):
restaurants: List[Restaurant] = Field(..., min_length=1, max_length=20)
query: str
total_count: int
# Parse + validate LLM response
def parse_llm_response(raw_json: str) -> RestaurantResponse:
"""Parse LLM response and validate with Pydantic"""
try:
data = json.loads(raw_json)
return RestaurantResponse(**data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {e}")
except Exception as e:
raise ValueError(f"Schema validation failed: {e}")
Retry Pattern (Self-Healing)
from tenacity import retry, stop_after_attempt, retry_if_exception_type
class StructuredOutputParser:
def __init__(self, client, model: str, schema: type[BaseModel]):
self.client = client
self.model = model
self.schema = schema
@retry(
stop=stop_after_attempt(3),
retry=retry_if_exception_type(ValueError)
)
def parse(self, prompt: str) -> BaseModel:
"""Retry with error message included on schema validation failure"""
schema_json = self.schema.model_json_schema()
messages = [
{
"role": "system",
"content": f"Respond according to the following JSON schema:\n{json.dumps(schema_json, indent=2)}"
},
{"role": "user", "content": prompt}
]
# Include error from previous attempt if available
if hasattr(self, '_last_error'):
messages.append({
"role": "user",
"content": f"An error occurred in the previous response: {self._last_error}\nPlease respond again with valid JSON."
})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}
)
raw = response.choices[0].message.content
try:
data = json.loads(raw)
result = self.schema(**data)
if hasattr(self, '_last_error'):
del self._last_error
return result
except Exception as e:
self._last_error = str(e)
raise ValueError(str(e))
# Usage
parser = StructuredOutputParser(client, "gpt-4o", RestaurantResponse)
result = parser.parse("Recommend 3 Korean restaurants in Seoul")
Unifying Providers with LiteLLM
import litellm
from pydantic import BaseModel
class ExtractedInfo(BaseModel):
summary: str
key_points: list[str]
sentiment: str
confidence: float
# OpenAI
response = litellm.completion(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Anthropic (automatically converts to Tool Use)
response = litellm.completion(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Gemini
response = litellm.completion(
model="gemini/gemini-2.0-flash",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Same code works across all 3 providers!
Using the Instructor Library
# pip install instructor
import instructor
from openai import OpenAI
from pydantic import BaseModel
from typing import List
client = instructor.from_openai(OpenAI())
class Step(BaseModel):
explanation: str
output: str
class MathSolution(BaseModel):
steps: List[Step]
final_answer: str
confidence: float
# Use Pydantic model directly as response_model
solution = client.chat.completions.create(
model="gpt-4o",
response_model=MathSolution,
messages=[
{"role": "user", "content": "Solve 2x + 5 = 15"}
],
max_retries=3 # Automatic retries
)
print(solution.steps[0].explanation)
print(f"Answer: {solution.final_answer}")
# Anthropic is also supported the same way
import anthropic
anthropic_client = instructor.from_anthropic(anthropic.Anthropic())
solution = anthropic_client.messages.create(
model="claude-sonnet-4-20250514",
response_model=MathSolution,
max_tokens=1024,
messages=[
{"role": "user", "content": "Solve 3x - 7 = 20"}
]
)
Building a Production Pipeline
FastAPI + Structured Output
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import instructor
from openai import OpenAI
app = FastAPI()
client = instructor.from_openai(OpenAI())
class ProductReview(BaseModel):
sentiment: str # positive, negative, neutral
score: float
key_phrases: List[str]
summary: str
language: str
class ReviewRequest(BaseModel):
text: str
model: str = "gpt-4o-mini"
@app.post("/analyze", response_model=ProductReview)
async def analyze_review(request: ReviewRequest):
try:
result = client.chat.completions.create(
model=request.model,
response_model=ProductReview,
messages=[
{
"role": "system",
"content": "Analyze the product review."
},
{"role": "user", "content": request.text}
],
max_retries=2
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
Batch Processing Pipeline
import asyncio
from typing import List
from openai import AsyncOpenAI
import instructor
async_client = instructor.from_openai(AsyncOpenAI())
class ExtractedEntity(BaseModel):
name: str
entity_type: str
confidence: float
class EntityExtractionResult(BaseModel):
entities: List[ExtractedEntity]
text_length: int
async def extract_entities(text: str) -> EntityExtractionResult:
return await async_client.chat.completions.create(
model="gpt-4o-mini",
response_model=EntityExtractionResult,
messages=[
{"role": "system", "content": "Extract entities from the text."},
{"role": "user", "content": text}
]
)
async def batch_extract(texts: List[str], concurrency: int = 5):
"""Batch processing with concurrency limits"""
semaphore = asyncio.Semaphore(concurrency)
async def limited_extract(text):
async with semaphore:
return await extract_entities(text)
tasks = [limited_extract(text) for text in texts]
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = [r for r in results if not isinstance(r, Exception)]
failures = [r for r in results if isinstance(r, Exception)]
print(f"Successes: {len(successes)}, Failures: {len(failures)}")
return successes
# Run
texts = ["A new AI startup in Seoul...", "Samsung Electronics semiconductor...", ...]
results = asyncio.run(batch_extract(texts))
Conclusion
Structured Output is a key technology for integrating LLMs into production systems:
- OpenAI: 100% schema guarantee with
response_format+ Structured Outputs - Anthropic: An indirect approach via Tool Use, but reliable
- Instructor/LiteLLM: Code reuse through provider unification
- Pydantic: The standard for schema definition + validation
- Retry Pattern: Stability through self-healing
Quiz (6 Questions)
Q1. What is the difference between OpenAI's JSON Mode and Structured Outputs? JSON Mode only guarantees valid JSON, while Structured Outputs also guarantees conformance to the specified schema
Q2. How does Anthropic implement Structured Output? It uses Tool Use (Function Calling) with input_schema to receive structured output
Q3. What does Pydantic's Field(ge=0.0, le=5.0) mean? A validation constraint that the value must be greater than or equal to 0.0 and less than or equal to 5.0
Q4. What is the max_retries feature in the instructor library? It automatically retries on schema validation failure to obtain the correct format
Q5. What is the role of asyncio.Semaphore in batch processing? It limits the number of concurrent API calls to prevent exceeding rate limits
Q6. What is the biggest benefit of using LiteLLM? The ability to switch between multiple providers like OpenAI, Anthropic, and Gemini with the same code
Quiz
Q1: What is the main topic covered in "LLM Structured Output Practical Guide — JSON Mode, Tool
Use, Pydantic Schema Validation"?
Compare Structured Output approaches across OpenAI, Anthropic, and Google, covering Pydantic schema validation to production pipeline construction with practical code examples.
Q2: What is Structured Output Comparison by Provider?
OpenAI: response_format + Structured Outputs Anthropic: Structured Output via Tool Use Google
Gemini: responseSchema
Q3: Explain the core concept of Automating Schema Validation with Pydantic.
Basic Pattern Retry Pattern (Self-Healing)
Q4: What are the key aspects of Building a Production Pipeline?
FastAPI + Structured Output Batch Processing Pipeline