MCP Servers
How GoPie exposes datasets through the Model Context Protocol
GoPie implements the Model Context Protocol (MCP) to expose datasets as context sources for AI applications. This enables seamless integration with AI assistants, IDEs, and other MCP-compatible tools, making your data available for AI-powered analysis and code generation.
What is MCP?
Overview
The Model Context Protocol is an open standard that allows AI systems to access contextual information from various sources. GoPie acts as an MCP server, exposing:
- Dataset schemas and metadata
- Query interfaces
- Data samples
- API documentation
- Real-time data access
Benefits
For AI Applications
- Direct access to your data context
- Rich metadata for better understanding
- Type-safe query generation
- Real-time data exploration
For Developers
- IDE integration for data-aware coding
- Automatic API client generation
- Context-aware code completion
- Documentation at your fingertips
Architecture
MCP Server Implementation
Protocol Stack
class GoPieMCPServer:
def __init__(self):
self.protocol_version = "1.0"
self.capabilities = {
"resources": True,
"tools": True,
"prompts": True,
"sampling": True
}
Resources
Dataset Resources
Schema Exposure
{
"uri": "gopie://dataset/sales_data/schema",
"name": "Sales Data Schema",
"description": "Complete schema for sales dataset",
"mimeType": "application/json",
"content": {
"tables": {
"orders": {
"columns": {
"order_id": "INTEGER PRIMARY KEY",
"customer_id": "INTEGER",
"order_date": "DATE",
"total_amount": "DECIMAL(10,2)"
}
}
}
}
}
Sample Data
{
"uri": "gopie://dataset/sales_data/sample",
"name": "Sales Data Sample",
"description": "Sample rows from sales dataset",
"mimeType": "application/json",
"content": {
"rows": [
{
"order_id": 1001,
"customer_id": 5432,
"order_date": "2024-01-15",
"total_amount": 1299.99
}
]
}
}
Metadata Resources
Dataset Information
@mcp.resource("gopie://datasets")
async def list_datasets(context: MCPContext):
datasets = await get_user_datasets(context.user_id)
return {
"datasets": [
{
"id": ds.id,
"name": ds.name,
"description": ds.description,
"created_at": ds.created_at,
"row_count": ds.row_count,
"columns": len(ds.schema.columns)
}
for ds in datasets
]
}
Tools
Query Execution Tool
@mcp.tool("gopie_query")
async def execute_query(
dataset_id: str,
query: str,
limit: int = 100
) -> QueryResult:
"""Execute SQL query against a GoPie dataset"""
# Validate access
if not await can_access_dataset(context.user_id, dataset_id):
raise PermissionError("Access denied")
# Execute query
result = await query_engine.execute(
dataset_id=dataset_id,
query=query,
limit=limit
)
return {
"columns": result.columns,
"rows": result.rows,
"row_count": result.row_count,
"execution_time_ms": result.execution_time
}
Schema Search Tool
@mcp.tool("gopie_search_schema")
async def search_schema(
query: str,
dataset_id: Optional[str] = None
) -> List[SchemaMatch]:
"""Search for tables and columns using natural language"""
# Vector search for semantic matches
matches = await vector_search.search(
query=query,
dataset_id=dataset_id,
limit=10
)
return [
{
"dataset": match.dataset_name,
"table": match.table_name,
"column": match.column_name,
"description": match.description,
"relevance_score": match.score
}
for match in matches
]
Data Analysis Tool
@mcp.tool("gopie_analyze")
async def analyze_data(
dataset_id: str,
analysis_type: str = "summary"
) -> AnalysisResult:
"""Perform automated data analysis"""
if analysis_type == "summary":
return await get_data_summary(dataset_id)
elif analysis_type == "quality":
return await check_data_quality(dataset_id)
elif analysis_type == "profiling":
return await profile_dataset(dataset_id)
Prompts
Query Generation Prompts
@mcp.prompt("generate_query")
async def generate_query_prompt(
dataset_id: str,
question: str
) -> str:
"""Generate a prompt for SQL query generation"""
schema = await get_dataset_schema(dataset_id)
return f"""
Given the following database schema:
{format_schema(schema)}
Generate a SQL query to answer: {question}
Requirements:
- Use only tables and columns from the schema
- Include appropriate JOINs if needed
- Add reasonable LIMIT for large results
- Use standard SQL syntax
"""
Analysis Prompts
@mcp.prompt("analyze_results")
async def analyze_results_prompt(
query_results: dict,
analysis_goal: str
) -> str:
"""Generate a prompt for result analysis"""
return f"""
Analyze the following query results:
Columns: {', '.join(query_results['columns'])}
Row count: {query_results['row_count']}
Sample data: {format_sample(query_results['rows'][:5])}
Analysis goal: {analysis_goal}
Provide insights on:
- Key patterns or trends
- Anomalies or outliers
- Statistical summary
- Actionable recommendations
"""
Security
Authentication
API Key Authentication
class MCPAuthHandler:
async def authenticate(self, request: MCPRequest) -> User:
# Extract API key from headers
api_key = request.headers.get("X-GoPie-API-Key")
if not api_key:
raise AuthenticationError("API key required")
# Validate and get user
user = await validate_api_key(api_key)
# Check MCP permissions
if not user.has_permission("mcp:access"):
raise PermissionError("MCP access not allowed")
return user
Token-Based Auth
@mcp.middleware
async def jwt_auth_middleware(request: MCPRequest, handler):
token = request.headers.get("Authorization", "").replace("Bearer ", "")
try:
payload = jwt.decode(token, settings.JWT_SECRET)
request.user_id = payload["sub"]
request.org_id = payload["org"]
except jwt.InvalidTokenError:
raise AuthenticationError("Invalid token")
return await handler(request)
Access Control
Resource-Level Permissions
class MCPPermissions:
def can_access_resource(
self,
user: User,
resource_uri: str
) -> bool:
# Parse resource URI
resource = parse_mcp_uri(resource_uri)
# Check dataset access
if resource.type == "dataset":
return self.check_dataset_access(
user.id,
resource.dataset_id
)
# Check organization resources
if resource.type == "organization":
return user.organization_id == resource.org_id
Rate Limiting
@mcp.middleware
async def rate_limit_middleware(request: MCPRequest, handler):
# Get rate limit for user
limit = get_rate_limit(request.user_id)
# Check current usage
current = await redis.incr(f"mcp:rate:{request.user_id}")
if current > limit.requests_per_minute:
raise RateLimitError(
f"Rate limit exceeded: {limit.requests_per_minute}/min"
)
# Set expiry
await redis.expire(f"mcp:rate:{request.user_id}", 60)
return await handler(request)
Integration Examples
IDE Integration
VS Code Extension
// GoPie MCP client for VS Code
class GoPieMCPClient {
constructor() {
this.client = new MCPClient({
serverUrl: 'https://api.gopie.io/mcp',
apiKey: vscode.workspace.getConfiguration('gopie').apiKey
});
}
async getDatasetCompletions(document: vscode.TextDocument) {
// Get available datasets
const datasets = await this.client.listResources('gopie://datasets');
// Generate completions
return datasets.map(ds => new vscode.CompletionItem(
ds.name,
vscode.CompletionItemKind.Reference
));
}
}
AI Assistant Integration
Claude Desktop
# Claude Desktop MCP configuration
mcp_servers:
gopie:
command: "gopie-mcp"
args: ["--api-key", "${GOPIE_API_KEY}"]
env:
GOPIE_API_URL: "https://api.gopie.io"
Custom AI Application
# Using GoPie MCP in AI app
class DataAwareAssistant:
def __init__(self):
self.mcp_client = GoPieMCPClient(
api_key=os.getenv("GOPIE_API_KEY")
)
async def answer_data_question(self, question: str):
# Search for relevant datasets
datasets = await self.mcp_client.search_datasets(question)
# Get schema context
schema = await self.mcp_client.get_schema(datasets[0].id)
# Generate and execute query
query = await self.generate_sql(question, schema)
results = await self.mcp_client.execute_query(
dataset_id=datasets[0].id,
query=query
)
# Analyze results
return await self.analyze_results(results, question)
Advanced Features
Streaming Support
@mcp.tool("gopie_stream_query")
async def stream_query_results(
dataset_id: str,
query: str
) -> AsyncIterator[dict]:
"""Stream query results for large datasets"""
async for batch in query_engine.stream_execute(
dataset_id=dataset_id,
query=query,
batch_size=1000
):
yield {
"batch_number": batch.number,
"rows": batch.rows,
"has_more": batch.has_more
}
Sampling Capabilities
@mcp.sampling("gopie_sample")
async def get_data_sample(
dataset_id: str,
sample_size: int = 1000,
strategy: str = "random"
) -> DataSample:
"""Get representative data sample"""
if strategy == "random":
return await random_sample(dataset_id, sample_size)
elif strategy == "stratified":
return await stratified_sample(dataset_id, sample_size)
elif strategy == "systematic":
return await systematic_sample(dataset_id, sample_size)
Monitoring
Usage Analytics
class MCPAnalytics:
async def track_request(self, request: MCPRequest):
await self.metrics.increment(
"mcp.requests",
tags={
"method": request.method,
"resource_type": request.resource_type,
"user_id": request.user_id,
"client": request.client_id
}
)
Performance Metrics
mcp_metrics:
request_latency:
p50: 45ms
p90: 120ms
p99: 500ms
tool_execution:
query_execution:
avg: 230ms
max: 5000ms
schema_search:
avg: 35ms
max: 200ms
Best Practices
For MCP Server Implementation
- Efficient Resource Loading: Cache frequently accessed resources
- Clear Naming: Use descriptive resource URIs
- Rich Metadata: Include detailed descriptions
- Error Handling: Provide helpful error messages
For MCP Clients
- Resource Discovery: Use list operations to discover available data
- Caching: Cache schema and metadata locally
- Batch Operations: Group multiple requests when possible
- Error Recovery: Handle network and permission errors gracefully
Future Roadmap
Planned Features
- Bi-directional Sync: Real-time data updates
- Advanced Sampling: ML-based intelligent sampling
- Query Optimization: AI-powered query suggestions
- Multi-modal Support: Images and documents in datasets
Ecosystem Integration
- LangChain Integration: Native LangChain tools
- AutoGPT Support: Autonomous data analysis
- Jupyter Integration: MCP kernel for notebooks
- BI Tool Bridges: Connect to Tableau, PowerBI
Next Steps
- Review AI Agents for query generation
- Learn about Vector Search for semantic capabilities
- Explore Database Architecture for data access