Langfuse Deploy Integration Skill

Langfuse Deploy Integration

Overview

Deploy Langfuse LLM observability alongside your application. Covers integrating the SDK for serverless (Vercel/Lambda), Docker, Cloud Run, and self-hosting the Langfuse server itself.

Prerequisites

Langfuse API keys (cloud or self-hosted)
Application using Langfuse SDK
Target platform CLI installed

Instructions

Step 1: Vercel / Next.js Deployment

set -euo pipefail
# Add secrets to Vercel
vercel env add LANGFUSE_PUBLIC_KEY production
vercel env add LANGFUSE_SECRET_KEY production
vercel env add LANGFUSE_BASE_URL production

// app/api/chat/route.ts (Next.js App Router)
import { NextRequest, NextResponse } from "next/server";
import { LangfuseClient } from "@langfuse/client";
import { startActiveObservation, updateActiveObservation } from "@langfuse/tracing";
import OpenAI from "openai";

const langfuse = new LangfuseClient();
const openai = new OpenAI();

export async function POST(req: NextRequest) {
  const { messages } = await req.json();

  const response = await startActiveObservation(
    { name: "chat-api", asType: "generation" },
    async () => {
      updateActiveObservation({
        model: "gpt-4o",
        input: messages,
        metadata: { endpoint: "/api/chat" },
      });

      const result = await openai.chat.completions.create({
        model: "gpt-4o",
        messages,
      });

      updateActiveObservation({
        output: result.choices[0].message,
        usage: {
          promptTokens: result.usage?.prompt_tokens,
          completionTokens: result.usage?.completion_tokens,
        },
      });

      return result.choices[0].message;
    }
  );

  return NextResponse.json(response);
}

Serverless note: Langfuse SDK v4+ uses OTel which handles flushing asynchronously. For v3, always call await langfuse.flushAsync() before the response returns -- serverless functions may freeze after response.

Step 2: AWS Lambda / Serverless

// handler.ts
import { LangfuseSpanProcessor } from "@langfuse/otel";
import { NodeSDK } from "@opentelemetry/sdk-node";
import { startActiveObservation, updateActiveObservation } from "@langfuse/tracing";

// Initialize OUTSIDE handler for connection reuse
const sdk = new NodeSDK({
  spanProcessors: [
    new LangfuseSpanProcessor({
      exportIntervalMillis: 1000, // Flush fast in serverless
    }),
  ],
});
sdk.start();

export const handler = async (event: any) => {
  return await startActiveObservation("lambda-handler", async () => {
    updateActiveObservation({ input: event });

    const result = await processRequest(event);

    updateActiveObservation({ output: result });

    // Force flush before Lambda freezes
    await sdk.shutdown();

    return { statusCode: 200, body: JSON.stringify(result) };
  });
};

Step 3: Self-Hosted Langfuse Server (Docker)

# docker-compose.yml
services:
  langfuse:
    image: langfuse/langfuse:latest
    ports:
      - "3000:3000"
    environment:
      - DATABASE_URL=postgresql://langfuse:${DB_PASSWORD}@postgres:5432/langfuse
      - NEXTAUTH_SECRET=${NEXTAUTH_SECRET}
      - NEXTAUTH_URL=https://langfuse.your-domain.com
      - SALT=${SALT}
      - ENCRYPTION_KEY=${ENCRYPTION_KEY}
      - AUTH_DISABLE_SIGNUP=true
      - LANGFUSE_DEFAULT_PROJECT_ROLE=VIEWER
    depends_on:
      postgres:
        condition: service_healthy

  postgres:
    image: postgres:16-alpine
    environment:
      POSTGRES_USER: langfuse
      POSTGRES_PASSWORD: ${DB_PASSWORD}
      POSTGRES_DB: langfuse
    volumes:
      - pgdata:/var/lib/postgresql/data
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U langfuse"]
      interval: 5s
      timeout: 5s
      retries: 5

volumes:
  pgdata:

set -euo pipefail
# Generate secrets
export DB_PASSWORD=$(openssl rand -hex 16)
export NEXTAUTH_SECRET=$(openssl rand -hex 32)
export SALT=$(openssl rand -hex 16)
export ENCRYPTION_KEY=$(openssl rand -hex 32)

# Start
docker compose up -d

# Wait and verify
sleep 10
curl -s http://localhost:3000/api/public/health

Step 4: Google Cloud Run

set -euo pipefail
# Build and push
gcloud builds submit --tag gcr.io/$PROJECT_ID/my-llm-app

# Deploy with Langfuse env vars from Secret Manager
gcloud run deploy my-llm-app \
  --image gcr.io/$PROJECT_ID/my-llm-app \
  --set-secrets="LANGFUSE_PUBLIC_KEY=langfuse-public-key:latest" \
  --set-secrets="LANGFUSE_SECRET_KEY=langfuse-secret-key:latest" \
  --set-env-vars="LANGFUSE_BASE_URL=https://cloud.langfuse.com"

Step 5: Health Check Endpoint

// app/api/health/route.ts
import { LangfuseClient } from "@langfuse/client";

const langfuse = new LangfuseClient();

export async function GET() {
  try {
    // Quick connectivity check
    await langfuse.prompt.get("__health__").catch(() => {});
    return Response.json({ status: "healthy", tracing: "enabled" });
  } catch {
    return Response.json(
      { status: "degraded", tracing: "disabled" },
      { status: 503 }
    );
  }
}

Platform-Specific Considerations

| Platform | Key Concern | Solution | |----------|-------------|----------| | Vercel/Edge | Function timeout | Flush before response; use v4+ | | AWS Lambda | Cold starts | Initialize SDK outside handler | | Cloud Run | Concurrency | Singleton client, shared OTel SDK | | Docker | Self-hosted networking | Ensure app can reach Langfuse host | | Kubernetes | Pod lifecycle | Shutdown hook on SIGTERM |

Error Handling

| Issue | Cause | Solution | |-------|-------|----------| | Traces missing in serverless | Not flushed before freeze | sdk.shutdown() before response | | Auth error after deploy | Wrong env for environment | Verify secrets match deployment | | Self-hosted 502 | DB not ready | Add healthcheck + depends_on | | High latency in prod | Small batch size | Increase flushAt / maxExportBatchSize |

Agent Skills: Langfuse Deploy Integration

Install this agent skill to your local

Skill Files