Mistral Security Basics Skill

Mistral Security Basics

Overview

Security practices for Mistral AI integrations: API key management, prompt injection defense, output sanitization, content moderation with mistral-moderation-latest, request logging without secrets, and key rotation.

Prerequisites

Mistral API key provisioned
Understanding of OWASP LLM Top 10 risks
Secret management infrastructure

Instructions

Step 1: API Key Management

import os

# NEVER: api_key = "sk-abc123"

# Development — env vars
api_key = os.environ.get("MISTRAL_API_KEY")
if not api_key:
    raise RuntimeError("MISTRAL_API_KEY not set")

# Production — secret manager
from google.cloud import secretmanager

def get_api_key() -> str:
    client = secretmanager.SecretManagerServiceClient()
    response = client.access_secret_version(
        name="projects/my-project/secrets/mistral-api-key/versions/latest"
    )
    return response.payload.data.decode("UTF-8")

Step 2: Prompt Injection Defense

function sanitizeUserInput(input: string): string {
  // Strip common injection patterns
  const patterns = [
    /ignore (?:previous|all|above) instructions/gi,
    /you are now/gi,
    /system prompt/gi,
    /\boverride\b/gi,
    /\bforget\b.*\binstructions\b/gi,
  ];

  let sanitized = input;
  for (const pattern of patterns) {
    sanitized = sanitized.replace(pattern, '[FILTERED]');
  }

  // Limit length to prevent context stuffing
  return sanitized.slice(0, 4000);
}

function buildSafeMessages(system: string, userInput: string) {
  return [
    { role: 'system', content: system },
    {
      role: 'user',
      content: `<user_query>\n${sanitizeUserInput(userInput)}\n</user_query>`,
    },
  ];
}

Step 3: Content Moderation with Mistral API

import { Mistral } from '@mistralai/mistralai';

const client = new Mistral({ apiKey: process.env.MISTRAL_API_KEY });

async function moderateContent(text: string): Promise<{ safe: boolean; flags: string[] }> {
  const result = await client.classifiers.moderate({
    model: 'mistral-moderation-latest',
    inputs: [text],
  });

  const categories = result.results[0].categories;
  const flags = Object.entries(categories)
    .filter(([, flagged]) => flagged)
    .map(([category]) => category);

  return { safe: flags.length === 0, flags };
}

// Gate user input before processing
async function safeChatFlow(userInput: string) {
  const inputCheck = await moderateContent(userInput);
  if (!inputCheck.safe) {
    throw new Error(`Input flagged: ${inputCheck.flags.join(', ')}`);
  }

  const response = await client.chat.complete({
    model: 'mistral-small-latest',
    messages: [{ role: 'user', content: userInput }],
    safePrompt: true, // Built-in safety system prompt
  });

  const output = response.choices?.[0]?.message?.content ?? '';
  const outputCheck = await moderateContent(output);
  if (!outputCheck.safe) {
    return 'I cannot provide that response.';
  }

  return output;
}

Step 4: Output Sanitization

function sanitizeOutput(response: string): string {
  let cleaned = response;

  // Remove leaked system prompts
  cleaned = cleaned.replace(/(?:system prompt|instructions):?\s*.*/gi, '[REDACTED]');

  // Remove script tags (XSS prevention)
  cleaned = cleaned.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');

  // Remove PII patterns
  cleaned = cleaned.replace(/\b\d{3}-\d{2}-\d{4}\b/g, '[SSN]');
  cleaned = cleaned.replace(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z]{2,}\b/gi, '[EMAIL]');

  return cleaned;
}

Step 5: Request Logging Without Secrets

function logRequest(model: string, messages: any[], response: any): void {
  // Log metadata ONLY — never log content (may contain PII)
  console.log(JSON.stringify({
    timestamp: new Date().toISOString(),
    model,
    messageCount: messages.length,
    inputChars: messages.reduce((sum, m) => sum + (m.content?.length ?? 0), 0),
    outputChars: response.choices?.[0]?.message?.content?.length ?? 0,
    usage: {
      promptTokens: response.usage?.promptTokens,
      completionTokens: response.usage?.completionTokens,
    },
    // NEVER log: API keys, message content, user identifiers
  }));
}

Step 6: API Key Rotation

class KeyRotator {
  private keys: string[];
  private current = 0;
  private lastRotated = Date.now();
  private readonly rotationIntervalMs = 3_600_000; // 1 hour

  constructor(keys: string[]) {
    if (keys.length === 0) throw new Error('At least one API key required');
    this.keys = keys;
  }

  getKey(): string {
    if (Date.now() - this.lastRotated > this.rotationIntervalMs) {
      this.rotate();
    }
    return this.keys[this.current];
  }

  reportAuthFailure(): void {
    console.error(`Key ${this.current} failed auth, rotating`);
    this.rotate();
  }

  private rotate(): void {
    this.current = (this.current + 1) % this.keys.length;
    this.lastRotated = Date.now();
  }
}

Security Audit Checklist

def audit_mistral_security():
    checks = {
        "api_key_from_env": bool(os.environ.get("MISTRAL_API_KEY")),
        "gitignore_has_env": ".env" in open(".gitignore").read() if os.path.exists(".gitignore") else False,
        "no_hardcoded_keys": True,  # scan src/ for patterns
        "moderation_enabled": True,  # verify in code
        "output_sanitization": True,  # verify in code
        "audit_logging": True,  # verify in code
    }
    passed = all(checks.values())
    return {"passed": passed, "checks": checks}

Error Handling

| Issue | Cause | Solution | |-------|-------|----------| | Key in logs | Logging full request | Log metadata only | | Prompt injection | Unsanitized user input | Filter + XML-wrap user content | | PII in responses | Model generating PII | Sanitize output + use moderation | | Key compromise | Hardcoded or leaked | Use secret manager, rotate immediately | | XSS via output | Model generating HTML/JS | Strip script tags before rendering |

Resources

Mistral Guardrails
Mistral Moderation API
OWASP LLM Top 10

Output

API key management via secret managers
Prompt injection defense layer
Content moderation with mistral-moderation-latest
Output sanitization pipeline
Secure audit logging
Key rotation automation

Agent Skills: Mistral Security Basics

Install this agent skill to your local

Skill Files