Groq Multi-Environment Setup Skill

Groq Multi-Environment Setup

Overview

Configure Groq API access across development, staging, and production with the right model, rate limit strategy, and secret management per environment. Key insight: use llama-3.1-8b-instant in development (cheapest, fastest), match production model in staging, and harden production with retries and fallbacks.

Environment Strategy

| Environment | API Key Source | Default Model | Retry | Logging | |-------------|---------------|---------------|-------|---------| | Development | .env.local | llama-3.1-8b-instant | 1 | Verbose | | Staging | CI/CD secrets | llama-3.3-70b-versatile | 3 | Standard | | Production | Secret manager | llama-3.3-70b-versatile | 5 | Structured |

Instructions

Step 1: Configuration Module

// config/groq.ts
import Groq from "groq-sdk";

interface GroqEnvConfig {
  apiKey: string;
  model: string;
  maxTokens: number;
  temperature: number;
  maxRetries: number;
  timeout: number;
  logRequests: boolean;
}

const configs: Record<string, GroqEnvConfig> = {
  development: {
    apiKey: process.env.GROQ_API_KEY || "",
    model: "llama-3.1-8b-instant",       // Cheapest, fastest for iteration
    maxTokens: 512,
    temperature: 0.7,
    maxRetries: 1,
    timeout: 15_000,
    logRequests: true,                     // Verbose in dev
  },
  staging: {
    apiKey: process.env.GROQ_API_KEY_STAGING || process.env.GROQ_API_KEY || "",
    model: "llama-3.3-70b-versatile",    // Match production model
    maxTokens: 2048,
    temperature: 0.3,
    maxRetries: 3,
    timeout: 30_000,
    logRequests: false,
  },
  production: {
    apiKey: process.env.GROQ_API_KEY_PROD || process.env.GROQ_API_KEY || "",
    model: "llama-3.3-70b-versatile",    // Quality model
    maxTokens: 2048,
    temperature: 0.3,
    maxRetries: 5,                        // More retries in prod
    timeout: 30_000,
    logRequests: false,
  },
};

function getEnv(): string {
  return process.env.NODE_ENV || "development";
}

export function getGroqConfig(): GroqEnvConfig {
  const env = getEnv();
  const config = configs[env] || configs.development;

  if (!config.apiKey) {
    throw new Error(
      `GROQ_API_KEY not set for ${env}. ` +
      (env === "development"
        ? "Copy .env.example to .env.local and add your key from console.groq.com/keys"
        : `Set GROQ_API_KEY_${env.toUpperCase()} in your secret manager`)
    );
  }

  return config;
}

let _client: Groq | null = null;

export function getGroqClient(): Groq {
  if (!_client) {
    const config = getGroqConfig();
    _client = new Groq({
      apiKey: config.apiKey,
      maxRetries: config.maxRetries,
      timeout: config.timeout,
    });
  }
  return _client;
}

Step 2: Environment-Aware Service

// services/groq-service.ts
import { getGroqClient, getGroqConfig } from "../config/groq";

export async function complete(
  messages: any[],
  options?: { model?: string; maxTokens?: number }
): Promise<string> {
  const groq = getGroqClient();
  const config = getGroqConfig();

  const model = options?.model || config.model;
  const maxTokens = options?.maxTokens || config.maxTokens;

  if (config.logRequests) {
    console.log(`[groq:${model}] ${messages.length} messages, max_tokens=${maxTokens}`);
  }

  try {
    const completion = await groq.chat.completions.create({
      model,
      messages,
      max_tokens: maxTokens,
      temperature: config.temperature,
    });

    if (config.logRequests) {
      const u = completion.usage!;
      console.log(`[groq:${model}] ${u.prompt_tokens}+${u.completion_tokens} tokens, ${((u as any).total_time * 1000).toFixed(0)}ms`);
    }

    return completion.choices[0].message.content || "";
  } catch (err: any) {
    if (err.status === 429) {
      const retryAfter = err.headers?.["retry-after"] || "?";
      console.error(`[groq:${model}] Rate limited. Retry after ${retryAfter}s`);
    }
    throw err;
  }
}

Step 3: Secret Management by Platform

set -euo pipefail

# === Development ===
# .env.local (git-ignored)
cat > .env.example << 'EOF'
# Get your API key at https://console.groq.com/keys
GROQ_API_KEY=gsk_your_dev_key_here
EOF

# === Staging (GitHub Actions) ===
gh secret set GROQ_API_KEY_STAGING --body "gsk_staging_key"

# === Production (Cloud Platforms) ===
# AWS Secrets Manager
aws secretsmanager create-secret \
  --name groq/prod/api-key \
  --secret-string "gsk_prod_key"

# GCP Secret Manager
echo -n "gsk_prod_key" | gcloud secrets create groq-api-key-prod --data-file=-

# HashiCorp Vault
vault kv put secret/groq/prod api_key="gsk_prod_key"

Step 4: Docker Compose Multi-Env

# docker-compose.yml
services:
  app-dev:
    build: .
    environment:
      - NODE_ENV=development
      - GROQ_API_KEY=${GROQ_API_KEY}
    profiles: ["dev"]

  app-staging:
    build: .
    environment:
      - NODE_ENV=staging
      - GROQ_API_KEY=${GROQ_API_KEY_STAGING}
    profiles: ["staging"]

  app-prod:
    build: .
    environment:
      - NODE_ENV=production
    secrets:
      - groq_api_key
    profiles: ["prod"]

secrets:
  groq_api_key:
    external: true

Step 5: Verify Environment Config

// scripts/verify-groq-env.ts
import { getGroqConfig, getGroqClient } from "../config/groq";

async function verify() {
  const config = getGroqConfig();
  console.log(`Environment: ${process.env.NODE_ENV || "development"}`);
  console.log(`Model: ${config.model}`);
  console.log(`Max retries: ${config.maxRetries}`);
  console.log(`API key prefix: ${config.apiKey.slice(0, 8)}...`);

  const groq = getGroqClient();
  const start = performance.now();
  const result = await groq.chat.completions.create({
    model: config.model,
    messages: [{ role: "user", content: "Reply: OK" }],
    max_tokens: 5,
    temperature: 0,
  });

  console.log(`Connection: OK (${Math.round(performance.now() - start)}ms)`);
  console.log(`Model response: ${result.choices[0].message.content}`);
}

verify().catch((err) => {
  console.error(`FAILED: ${err.message}`);
  process.exit(1);
});

Step 6: Rate Limit Awareness by Environment

set -euo pipefail
# Check current rate limits for your key
curl -si https://api.groq.com/openai/v1/chat/completions \
  -H "Authorization: Bearer $GROQ_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{"model":"llama-3.1-8b-instant","messages":[{"role":"user","content":"ping"}],"max_tokens":1}' \
  2>/dev/null | grep -iE "^x-ratelimit"

Error Handling

| Issue | Cause | Solution | |-------|-------|----------| | GROQ_API_KEY not set | Missing env var | Check .env.local (dev) or secret manager (prod) | | Wrong model in env | Config mismatch | Verify with verify-groq-env.ts script | | Rate limited in dev | Free tier limits | Use llama-3.1-8b-instant with low max_tokens | | Staging/prod key in dev | Key leak risk | Use separate Groq organizations per environment |

Resources

Next Steps

For deployment configuration, see groq-deploy-integration.

Agent Skills: Groq Multi-Environment Setup

Install this agent skill to your local

Skill Files