- Authors
- Name
- Introduction
- Structured Output Comparison by Provider
- Automating Schema Validation with Pydantic
- Unifying Providers with LiteLLM
- Using the Instructor Library
- Building a Production Pipeline
- Conclusion

Introduction
To programmatically process LLM outputs, structured formats (JSON, XML, etc.) are essential. Simply adding "respond in JSON" to a prompt is not enough — various issues arise such as schema mismatches, missing fields, and incorrect types.
In this article, we compare the Structured Output features of major LLM providers and cover how to use them reliably in production.
Structured Output Comparison by Provider
OpenAI: response_format + Structured Outputs
from openai import OpenAI
from pydantic import BaseModel
from typing import List, Optional
client = OpenAI()
# Method 1: JSON Mode (basic)
response = client.chat.completions.create(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Return your response as JSON."},
{"role": "user", "content": "Recommend 3 famous restaurants in Seoul"}
],
response_format={"type": "json_object"}
)
# JSON is guaranteed, but schema is not
# Method 2: Structured Outputs (schema guaranteed)
class Restaurant(BaseModel):
name: str
cuisine: str
price_range: str
rating: float
address: str
class RestaurantList(BaseModel):
restaurants: List[Restaurant]
total_count: int
response = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "system", "content": "Recommend famous restaurants in Seoul."},
{"role": "user", "content": "3 Korean cuisine restaurants"}
],
response_format=RestaurantList
)
result = response.choices[0].message.parsed
print(result.restaurants[0].name) # Type-safe!
Anthropic: Structured Output via Tool Use
import anthropic
from typing import List
client = anthropic.Anthropic()
# Anthropic uses Tool Use for Structured Output
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
tools=[
{
"name": "extract_restaurants",
"description": "Extract restaurant information in a structured format",
"input_schema": {
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"price_range": {
"type": "string",
"enum": ["$", "$$", "$$$", "$$$$"]
},
"rating": {"type": "number"},
"address": {"type": "string"}
},
"required": ["name", "cuisine", "price_range"]
}
},
"total_count": {"type": "integer"}
},
"required": ["restaurants", "total_count"]
}
}
],
tool_choice={"type": "tool", "name": "extract_restaurants"},
messages=[
{"role": "user", "content": "Recommend 3 Korean restaurants in Seoul"}
]
)
# Extract structured data from Tool Use result
tool_use = next(
block for block in response.content
if block.type == "tool_use"
)
restaurants = tool_use.input["restaurants"]
Google Gemini: responseSchema
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel(
"gemini-2.0-flash",
generation_config=genai.GenerationConfig(
response_mime_type="application/json",
response_schema={
"type": "object",
"properties": {
"restaurants": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"cuisine": {"type": "string"},
"rating": {"type": "number"}
}
}
}
}
}
)
)
response = model.generate_content("3 Korean restaurants in Seoul")
import json
data = json.loads(response.text)
Automating Schema Validation with Pydantic
Basic Pattern
from pydantic import BaseModel, Field, validator
from typing import List, Optional, Literal
from enum import Enum
import json
class PriceRange(str, Enum):
CHEAP = "$"
MODERATE = "$$"
EXPENSIVE = "$$$"
VERY_EXPENSIVE = "$$$$"
class Restaurant(BaseModel):
name: str = Field(..., min_length=1, max_length=100)
cuisine: str = Field(..., description="Type of cuisine")
price_range: PriceRange
rating: float = Field(..., ge=0.0, le=5.0)
address: Optional[str] = None
tags: List[str] = Field(default_factory=list, max_length=10)
@validator('rating')
def round_rating(cls, v):
return round(v, 1)
class RestaurantResponse(BaseModel):
restaurants: List[Restaurant] = Field(..., min_length=1, max_length=20)
query: str
total_count: int
# Parse + validate LLM response
def parse_llm_response(raw_json: str) -> RestaurantResponse:
"""Parse LLM response and validate with Pydantic"""
try:
data = json.loads(raw_json)
return RestaurantResponse(**data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {e}")
except Exception as e:
raise ValueError(f"Schema validation failed: {e}")
Retry Pattern (Self-Healing)
from tenacity import retry, stop_after_attempt, retry_if_exception_type
class StructuredOutputParser:
def __init__(self, client, model: str, schema: type[BaseModel]):
self.client = client
self.model = model
self.schema = schema
@retry(
stop=stop_after_attempt(3),
retry=retry_if_exception_type(ValueError)
)
def parse(self, prompt: str) -> BaseModel:
"""Retry with error message included on schema validation failure"""
schema_json = self.schema.model_json_schema()
messages = [
{
"role": "system",
"content": f"Respond according to the following JSON schema:\n{json.dumps(schema_json, indent=2)}"
},
{"role": "user", "content": prompt}
]
# Include error from previous attempt if available
if hasattr(self, '_last_error'):
messages.append({
"role": "user",
"content": f"An error occurred in the previous response: {self._last_error}\nPlease respond again with valid JSON."
})
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
response_format={"type": "json_object"}
)
raw = response.choices[0].message.content
try:
data = json.loads(raw)
result = self.schema(**data)
if hasattr(self, '_last_error'):
del self._last_error
return result
except Exception as e:
self._last_error = str(e)
raise ValueError(str(e))
# Usage
parser = StructuredOutputParser(client, "gpt-4o", RestaurantResponse)
result = parser.parse("Recommend 3 Korean restaurants in Seoul")
Unifying Providers with LiteLLM
import litellm
from pydantic import BaseModel
class ExtractedInfo(BaseModel):
summary: str
key_points: list[str]
sentiment: str
confidence: float
# OpenAI
response = litellm.completion(
model="gpt-4o",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Anthropic (automatically converts to Tool Use)
response = litellm.completion(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Gemini
response = litellm.completion(
model="gemini/gemini-2.0-flash",
messages=[{"role": "user", "content": "Summarize the Kubernetes 1.35 release"}],
response_format=ExtractedInfo
)
# Same code works across all 3 providers!
Using the Instructor Library
# pip install instructor
import instructor
from openai import OpenAI
from pydantic import BaseModel
from typing import List
client = instructor.from_openai(OpenAI())
class Step(BaseModel):
explanation: str
output: str
class MathSolution(BaseModel):
steps: List[Step]
final_answer: str
confidence: float
# Use Pydantic model directly as response_model
solution = client.chat.completions.create(
model="gpt-4o",
response_model=MathSolution,
messages=[
{"role": "user", "content": "Solve 2x + 5 = 15"}
],
max_retries=3 # Automatic retries
)
print(solution.steps[0].explanation)
print(f"Answer: {solution.final_answer}")
# Anthropic is also supported the same way
import anthropic
anthropic_client = instructor.from_anthropic(anthropic.Anthropic())
solution = anthropic_client.messages.create(
model="claude-sonnet-4-20250514",
response_model=MathSolution,
max_tokens=1024,
messages=[
{"role": "user", "content": "Solve 3x - 7 = 20"}
]
)
Building a Production Pipeline
FastAPI + Structured Output
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import instructor
from openai import OpenAI
app = FastAPI()
client = instructor.from_openai(OpenAI())
class ProductReview(BaseModel):
sentiment: str # positive, negative, neutral
score: float
key_phrases: List[str]
summary: str
language: str
class ReviewRequest(BaseModel):
text: str
model: str = "gpt-4o-mini"
@app.post("/analyze", response_model=ProductReview)
async def analyze_review(request: ReviewRequest):
try:
result = client.chat.completions.create(
model=request.model,
response_model=ProductReview,
messages=[
{
"role": "system",
"content": "Analyze the product review."
},
{"role": "user", "content": request.text}
],
max_retries=2
)
return result
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
Batch Processing Pipeline
import asyncio
from typing import List
from openai import AsyncOpenAI
import instructor
async_client = instructor.from_openai(AsyncOpenAI())
class ExtractedEntity(BaseModel):
name: str
entity_type: str
confidence: float
class EntityExtractionResult(BaseModel):
entities: List[ExtractedEntity]
text_length: int
async def extract_entities(text: str) -> EntityExtractionResult:
return await async_client.chat.completions.create(
model="gpt-4o-mini",
response_model=EntityExtractionResult,
messages=[
{"role": "system", "content": "Extract entities from the text."},
{"role": "user", "content": text}
]
)
async def batch_extract(texts: List[str], concurrency: int = 5):
"""Batch processing with concurrency limits"""
semaphore = asyncio.Semaphore(concurrency)
async def limited_extract(text):
async with semaphore:
return await extract_entities(text)
tasks = [limited_extract(text) for text in texts]
results = await asyncio.gather(*tasks, return_exceptions=True)
successes = [r for r in results if not isinstance(r, Exception)]
failures = [r for r in results if isinstance(r, Exception)]
print(f"Successes: {len(successes)}, Failures: {len(failures)}")
return successes
# Run
texts = ["A new AI startup in Seoul...", "Samsung Electronics semiconductor...", ...]
results = asyncio.run(batch_extract(texts))
Conclusion
Structured Output is a key technology for integrating LLMs into production systems:
- OpenAI: 100% schema guarantee with
response_format+ Structured Outputs - Anthropic: An indirect approach via Tool Use, but reliable
- Instructor/LiteLLM: Code reuse through provider unification
- Pydantic: The standard for schema definition + validation
- Retry Pattern: Stability through self-healing
Quiz (6 Questions)
Q1. What is the difference between OpenAI's JSON Mode and Structured Outputs? JSON Mode only guarantees valid JSON, while Structured Outputs also guarantees conformance to the specified schema
Q2. How does Anthropic implement Structured Output? It uses Tool Use (Function Calling) with input_schema to receive structured output
Q3. What does Pydantic's Field(ge=0.0, le=5.0) mean? A validation constraint that the value must be greater than or equal to 0.0 and less than or equal to 5.0
Q4. What is the max_retries feature in the instructor library? It automatically retries on schema validation failure to obtain the correct format
Q5. What is the role of asyncio.Semaphore in batch processing? It limits the number of concurrent API calls to prevent exceeding rate limits
Q6. What is the biggest benefit of using LiteLLM? The ability to switch between multiple providers like OpenAI, Anthropic, and Gemini with the same code