Groq Deploy Integration
Overview
Deploy applications using Groq's inference API to Vercel Edge, Cloud Run, Docker, and other platforms. Groq's sub-200ms latency makes it ideal for edge deployments and real-time applications.
Prerequisites
- Groq API key stored in
GROQ_API_KEY - Application using
groq-sdkpackage - Platform CLI installed (vercel, docker, or gcloud)
Instructions
Step 1: Vercel Edge Function
// app/api/chat/route.ts (Next.js App Router)
import Groq from "groq-sdk";
export const runtime = "edge";
export async function POST(req: Request) {
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY! });
const { messages, stream: useStream } = await req.json();
if (useStream) {
const stream = await groq.chat.completions.create({
model: "llama-3.3-70b-versatile",
messages,
stream: true,
max_tokens: 2048,
});
const encoder = new TextEncoder();
const readable = new ReadableStream({
async start(controller) {
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({ content })}\n\n`)
);
}
}
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
controller.close();
},
});
return new Response(readable, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
});
}
const completion = await groq.chat.completions.create({
model: "llama-3.3-70b-versatile",
messages,
max_tokens: 2048,
});
return Response.json(completion);
}
Step 2: Vercel Deployment
set -euo pipefail
# Set secret
vercel env add GROQ_API_KEY production
# Deploy
vercel --prod
Step 3: Docker Container
FROM node:20-slim AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci
COPY . .
RUN npm run build
FROM node:20-slim
WORKDIR /app
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY --from=builder /app/package.json .
EXPOSE 3000
HEALTHCHECK --interval=30s --timeout=5s CMD curl -sf http://localhost:3000/health || exit 1
CMD ["node", "dist/index.js"]
Step 4: Cloud Run Deployment
set -euo pipefail
# Store API key in Secret Manager
echo -n "$GROQ_API_KEY" | gcloud secrets create groq-api-key --data-file=-
# Deploy with streaming support
gcloud run deploy groq-api \
--source . \
--region us-central1 \
--set-secrets=GROQ_API_KEY=groq-api-key:latest \
--min-instances=1 \
--max-instances=10 \
--cpu=1 --memory=512Mi \
--allow-unauthenticated \
--timeout=60s
Step 5: Express Server with Health Check
import express from "express";
import Groq from "groq-sdk";
const app = express();
const groq = new Groq();
app.use(express.json());
// Health check -- uses cheapest model with minimal tokens
app.get("/health", async (_req, res) => {
try {
const start = performance.now();
await groq.chat.completions.create({
model: "llama-3.1-8b-instant",
messages: [{ role: "user", content: "OK" }],
max_tokens: 1,
});
res.json({
status: "healthy",
groq: { connected: true, latencyMs: Math.round(performance.now() - start) },
});
} catch (err: any) {
res.status(503).json({
status: "unhealthy",
groq: { connected: false, error: err.message },
});
}
});
// Chat endpoint with streaming
app.post("/api/chat", async (req, res) => {
const { messages, model = "llama-3.3-70b-versatile" } = req.body;
if (req.headers.accept === "text/event-stream") {
res.writeHead(200, {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
});
const stream = await groq.chat.completions.create({
model,
messages,
stream: true,
max_tokens: 2048,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (content) {
res.write(`data: ${JSON.stringify({ content })}\n\n`);
}
}
res.write("data: [DONE]\n\n");
res.end();
} else {
const completion = await groq.chat.completions.create({
model,
messages,
max_tokens: 2048,
});
res.json(completion);
}
});
app.listen(3000, () => console.log("Groq API server on :3000"));
Step 6: Vercel AI SDK Integration
// Using @ai-sdk/groq for Vercel AI SDK
import { createGroq } from "@ai-sdk/groq";
import { streamText } from "ai";
const groq = createGroq({ apiKey: process.env.GROQ_API_KEY });
export async function POST(req: Request) {
const { messages } = await req.json();
const result = streamText({
model: groq("llama-3.3-70b-versatile"),
messages,
});
return result.toDataStreamResponse();
}
Environment Variable Config
| Platform | Command |
|----------|---------|
| Vercel | vercel env add GROQ_API_KEY production |
| Cloud Run | gcloud secrets create groq-api-key --data-file=- |
| Fly.io | fly secrets set GROQ_API_KEY=gsk_... |
| Railway | railway variables set GROQ_API_KEY=gsk_... |
| Docker | -e GROQ_API_KEY=gsk_... or Docker secrets |
Error Handling
| Issue | Cause | Solution |
|-------|-------|----------|
| Rate limited (429) | Too many requests | Implement request queuing with backoff |
| Edge timeout | Response > 25s | Use streaming for long completions |
| Model unavailable | Capacity or deprecation | Fall back to llama-3.1-8b-instant |
| Cold start latency | Serverless function init | Set min-instances=1 on Cloud Run |
| API key not found | Secret not configured | Check platform secret config |
Resources
Next Steps
For multi-environment setup, see groq-multi-env-setup.