Cohere Deploy Integration
Overview
Deploy Cohere API v2 applications to Vercel, Fly.io, and Google Cloud Run with proper secrets management and health checks.
Prerequisites
- Cohere production API key (not trial)
- Platform CLI installed (
vercel,fly, orgcloud) - Application tested locally with real API calls
Instructions
Vercel Deployment
# Add Cohere API key as Vercel environment variable
vercel env add CO_API_KEY production
# Paste your production key when prompted
# Deploy
vercel --prod
vercel.json:
{
"env": {
"CO_API_KEY": "@co_api_key"
},
"functions": {
"api/**/*.ts": {
"maxDuration": 30
}
}
}
Vercel API Route (streaming chat):
// api/chat/route.ts
import { CohereClientV2 } from 'cohere-ai';
const cohere = new CohereClientV2();
export async function POST(req: Request) {
const { message } = await req.json();
const stream = await cohere.chatStream({
model: 'command-a-03-2025',
messages: [{ role: 'user', content: message }],
});
const encoder = new TextEncoder();
const readable = new ReadableStream({
async start(controller) {
for await (const event of stream) {
if (event.type === 'content-delta') {
const text = event.delta?.message?.content?.text ?? '';
controller.enqueue(encoder.encode(`data: ${JSON.stringify({ text })}\n\n`));
}
}
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
controller.close();
},
});
return new Response(readable, {
headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache' },
});
}
Fly.io Deployment
# Set Cohere API key
fly secrets set CO_API_KEY="your-production-key"
# Deploy
fly deploy
fly.toml:
app = "my-cohere-app"
primary_region = "iad"
[env]
NODE_ENV = "production"
[http_service]
internal_port = 3000
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 1
[[services.http_checks]]
interval = 30000
timeout = 5000
path = "/api/health"
Google Cloud Run Deployment
#!/bin/bash
PROJECT_ID="${GOOGLE_CLOUD_PROJECT}"
SERVICE="cohere-app"
REGION="us-central1"
# Store key in Secret Manager
echo -n "$CO_API_KEY" | gcloud secrets create cohere-api-key --data-file=-
# Build and deploy
gcloud builds submit --tag gcr.io/$PROJECT_ID/$SERVICE
gcloud run deploy $SERVICE \
--image gcr.io/$PROJECT_ID/$SERVICE \
--region $REGION \
--platform managed \
--set-secrets=CO_API_KEY=cohere-api-key:latest \
--max-instances 10 \
--min-instances 1 \
--timeout 30
Dockerfile:
FROM node:20-slim AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
FROM node:20-slim
WORKDIR /app
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package.json ./
EXPOSE 3000
CMD ["node", "dist/index.js"]
Health Check (All Platforms)
// api/health.ts — works on Vercel, Fly, Cloud Run
import { CohereClientV2, CohereError } from 'cohere-ai';
export async function GET() {
const start = Date.now();
let status: 'healthy' | 'degraded' | 'down';
try {
const cohere = new CohereClientV2();
await cohere.chat({
model: 'command-r7b-12-2024',
messages: [{ role: 'user', content: 'ping' }],
maxTokens: 1,
});
status = 'healthy';
} catch (err) {
status = err instanceof CohereError && err.statusCode === 429
? 'degraded'
: 'down';
}
return Response.json({
status,
cohere: { latencyMs: Date.now() - start },
timestamp: new Date().toISOString(),
});
}
Environment Configuration
// config/cohere.ts
interface CohereConfig {
model: string;
maxTokens: number;
timeout: number;
}
const configs: Record<string, CohereConfig> = {
development: {
model: 'command-r7b-12-2024', // cheap for dev
maxTokens: 500,
timeout: 30,
},
production: {
model: 'command-a-03-2025', // best for prod
maxTokens: 4096,
timeout: 60,
},
};
export function getCohereConfig(): CohereConfig {
const env = process.env.NODE_ENV ?? 'development';
return configs[env] ?? configs.development;
}
Output
- Application deployed with Cohere API key in platform secret store
- Health check endpoint verifying Cohere connectivity
- Streaming chat endpoint for user-facing applications
- Environment-specific model selection
Error Handling
| Issue | Cause | Solution |
|-------|-------|----------|
| 401 after deploy | Wrong key env var name | Verify CO_API_KEY is set |
| Timeout on Vercel | Default 10s limit | Set maxDuration: 30 in vercel.json |
| Cold start latency | Serverless spin-up | Set min-instances: 1 (Cloud Run/Fly) |
| Stream breaks | Platform timeout | Use chunked transfer encoding |
Resources
Next Steps
For structured output and connectors, see cohere-webhooks-events.