Perplexity Reference Architecture
Overview
Production architecture for AI-powered search with Perplexity Sonar API. Three tiers: search service (model routing + caching), citation pipeline (extract, validate, store), and research orchestrator (multi-query synthesis).
Architecture
┌─────────────────────────────────────────────┐
│ Application Layer │
│ (Search Widget, Research Agent, Fact Check) │
└──────────────────────┬──────────────────────┘
│
┌──────────────────────▼──────────────────────┐
│ Search Service Layer │
│ ┌──────────┐ ┌──────────┐ ┌─────────────┐ │
│ │ Model │ │ Query │ │ Response │ │
│ │ Router │ │ Cache │ │ Parser │ │
│ └──────────┘ └──────────┘ └─────────────┘ │
└──────────────────────┬──────────────────────┘
│
┌──────────────────────▼──────────────────────┐
│ api.perplexity.ai/chat/completions │
│ sonar | sonar-pro | sonar-reasoning-pro │
└─────────────────────────────────────────────┘
Prerequisites
- Perplexity API key with Sonar access
- OpenAI-compatible client library (
openaipackage) - Redis for production caching (LRU for development)
Instructions
Step 1: Search Service with Model Routing
// src/perplexity/search-service.ts
import OpenAI from "openai";
import { createHash } from "crypto";
type SearchDepth = "quick" | "standard" | "deep" | "reasoning";
const MODEL_MAP: Record<SearchDepth, { model: string; maxTokens: number; timeout: number }> = {
quick: { model: "sonar", maxTokens: 256, timeout: 10000 },
standard: { model: "sonar", maxTokens: 1024, timeout: 15000 },
deep: { model: "sonar-pro", maxTokens: 4096, timeout: 30000 },
reasoning: { model: "sonar-reasoning-pro", maxTokens: 4096, timeout: 45000 },
};
export class SearchService {
constructor(
private client: OpenAI,
private cache: Map<string, { result: any; expiry: number }> = new Map()
) {}
async search(query: string, depth: SearchDepth = "standard", opts: {
recencyFilter?: "hour" | "day" | "week" | "month";
domainFilter?: string[];
systemPrompt?: string;
} = {}) {
const config = MODEL_MAP[depth];
const cacheKey = this.hashQuery(query, config.model, opts);
// Check cache
const cached = this.cache.get(cacheKey);
if (cached && cached.expiry > Date.now()) {
return { ...cached.result, cached: true };
}
const response = await this.client.chat.completions.create({
model: config.model,
messages: [
...(opts.systemPrompt ? [{ role: "system" as const, content: opts.systemPrompt }] : []),
{ role: "user" as const, content: query },
],
max_tokens: config.maxTokens,
...(opts.recencyFilter && { search_recency_filter: opts.recencyFilter }),
...(opts.domainFilter && { search_domain_filter: opts.domainFilter }),
} as any);
const result = {
answer: response.choices[0].message.content || "",
citations: (response as any).citations || [],
searchResults: (response as any).search_results || [],
model: response.model,
usage: response.usage,
};
// Cache with TTL based on query type
const ttl = opts.recencyFilter === "hour" ? 900_000 : 3600_000;
this.cache.set(cacheKey, { result, expiry: Date.now() + ttl });
return { ...result, cached: false };
}
private hashQuery(query: string, model: string, opts: any): string {
return createHash("sha256")
.update(JSON.stringify({ query: query.toLowerCase().trim(), model, ...opts }))
.digest("hex");
}
}
Step 2: Citation Pipeline
// src/perplexity/citation-pipeline.ts
export interface Citation {
url: string;
domain: string;
index: number;
}
export function extractCitations(answer: string, citationUrls: string[]): Citation[] {
return citationUrls.map((url, i) => ({
url,
domain: new URL(url).hostname,
index: i + 1,
}));
}
export function renderCitationsAsMarkdown(answer: string, citations: Citation[]): string {
let rendered = answer;
for (const c of citations) {
rendered = rendered.replaceAll(`[${c.index}]`, `[${c.index}](${c.url})`);
}
return rendered;
}
export function deduplicateCitations(citations: Citation[]): Citation[] {
const seen = new Set<string>();
return citations.filter((c) => {
const normalized = c.url.split("?")[0].replace(/\/$/, "");
if (seen.has(normalized)) return false;
seen.add(normalized);
return true;
});
}
Step 3: Research Orchestrator
// src/perplexity/research-orchestrator.ts
export class ResearchOrchestrator {
constructor(private searchService: SearchService) {}
async research(topic: string): Promise<{
sections: Array<{ question: string; answer: string; citations: string[] }>;
bibliography: string[];
}> {
// Phase 1: Decompose topic (fast model)
const overview = await this.searchService.search(
`Break "${topic}" into 4-5 key research questions. List one per line.`,
"quick"
);
const questions = overview.answer.split("\n").filter((q) => q.trim().length > 10);
// Phase 2: Deep dive each question
const sections = [];
const allCitations = new Set<string>();
for (const question of questions.slice(0, 5)) {
const result = await this.searchService.search(question, "deep", {
systemPrompt: `Research context: ${topic}. Provide detailed, well-cited answer.`,
});
sections.push({
question: question.trim(),
answer: result.answer,
citations: result.citations,
});
result.citations.forEach((url: string) => allCitations.add(url));
// Rate limit protection
await new Promise((r) => setTimeout(r, 2000));
}
return { sections, bibliography: [...allCitations] };
}
}
Step 4: Fact-Check Service
export async function factCheck(
claim: string,
searchService: SearchService
): Promise<{ verdict: string; confidence: string; sources: string[] }> {
const result = await searchService.search(
`Verify this claim with sources. State whether it is accurate, partially accurate, or inaccurate: "${claim}"`,
"deep",
{ systemPrompt: "You are a fact-checker. Be precise and cite sources." }
);
return {
verdict: result.answer,
confidence: result.citations.length > 3 ? "high" : result.citations.length > 1 ? "medium" : "low",
sources: result.citations,
};
}
Error Handling
| Issue | Cause | Solution |
|-------|-------|----------|
| No citations returned | Using sonar for complex query | Upgrade to sonar-pro |
| Stale information | No recency filter | Add search_recency_filter |
| High cost | sonar-pro for simple queries | Route by depth |
| Rate limit on research | Too many sequential queries | Add 2s delay between calls |
Output
- Search service with model routing by query depth
- Citation extraction and rendering pipeline
- Multi-query research orchestrator
- Fact-checking service