ClickHouse Reference Architecture Skill

ClickHouse Reference Architecture

Overview

Production-grade architecture for ClickHouse analytics platforms covering project layout, data flow, multi-tenancy, and operational patterns.

Prerequisites

Understanding of ClickHouse fundamentals (engines, ORDER BY, partitioning)
TypeScript/Node.js project

Instructions

Step 1: Project Structure

my-analytics-platform/
├── src/
│   ├── clickhouse/
│   │   ├── client.ts           # Singleton client with health checks
│   │   ├── schemas/            # SQL DDL files (source of truth)
│   │   │   ├── 001-events.sql
│   │   │   ├── 002-users.sql
│   │   │   └── 003-materialized-views.sql
│   │   ├── queries/            # Named query functions
│   │   │   ├── events.ts
│   │   │   ├── users.ts
│   │   │   └── dashboards.ts
│   │   └── migrations/         # Schema migrations
│   │       ├── runner.ts
│   │       └── 001-add-country.sql
│   ├── ingestion/
│   │   ├── webhook-receiver.ts # HTTP webhook endpoint
│   │   ├── kafka-consumer.ts   # Kafka consumer (if applicable)
│   │   └── buffer.ts           # Insert batching buffer
│   ├── api/
│   │   ├── routes.ts           # API endpoints
│   │   └── middleware.ts       # Auth, rate limiting
│   └── jobs/
│       ├── daily-rollup.ts     # Scheduled aggregations
│       └── cleanup.ts          # TTL enforcement
├── tests/
│   ├── unit/
│   └── integration/
├── docker-compose.yml          # Local ClickHouse
├── init-db/                    # Docker init scripts
└── config/
    ├── development.env
    ├── staging.env
    └── production.env

Step 2: Data Flow Architecture

                    ┌─────────────────┐
                    │   Data Sources   │
                    │  (Webhooks, API, │
                    │   Kafka, S3)     │
                    └────────┬────────┘
                             │
                    ┌────────▼────────┐
                    │  Ingestion Layer │
                    │  (Buffer + Batch │
                    │   10K+ rows/ins) │
                    └────────┬────────┘
                             │
              ┌──────────────▼──────────────┐
              │       ClickHouse Server      │
              │                              │
              │  ┌────────────────────────┐  │
              │  │   Raw Event Tables     │  │
              │  │   (MergeTree, append)  │  │
              │  └───────────┬────────────┘  │
              │              │               │
              │  ┌───────────▼────────────┐  │
              │  │  Materialized Views    │  │
              │  │  (Auto-aggregate on    │  │
              │  │   INSERT — hourly,     │  │
              │  │   daily, tenant-level) │  │
              │  └───────────┬────────────┘  │
              │              │               │
              │  ┌───────────▼────────────┐  │
              │  │  Aggregate Tables      │  │
              │  │  (AggregatingMergeTree)│  │
              │  └────────────────────────┘  │
              └──────────────┬──────────────┘
                             │
                    ┌────────▼────────┐
                    │    API Layer     │
                    │  (Query aggregate│
                    │   tables, not    │
                    │   raw events)    │
                    └────────┬────────┘
                             │
                    ┌────────▼────────┐
                    │   Dashboards /   │
                    │   Client Apps    │
                    └─────────────────┘

Step 3: Schema Design (3-Layer Pattern)

-- Layer 1: Raw events (append-only, full fidelity)
CREATE TABLE analytics.events_raw (
    event_id    UUID DEFAULT generateUUIDv4(),
    tenant_id   UInt32,
    event_type  LowCardinality(String),
    user_id     UInt64,
    properties  String CODEC(ZSTD(3)),
    created_at  DateTime64(3) DEFAULT now64(3)
)
ENGINE = MergeTree()
ORDER BY (tenant_id, event_type, toDate(created_at), user_id)
PARTITION BY toYYYYMM(created_at)
TTL created_at + INTERVAL 90 DAY;

-- Layer 2: Hourly aggregation (auto-populated via materialized view)
CREATE TABLE analytics.events_hourly (
    hour        DateTime,
    tenant_id   UInt32,
    event_type  LowCardinality(String),
    cnt         UInt64,
    users       AggregateFunction(uniq, UInt64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (tenant_id, event_type, hour);

CREATE MATERIALIZED VIEW analytics.events_hourly_mv TO analytics.events_hourly AS
SELECT toStartOfHour(created_at) AS hour, tenant_id, event_type,
       count() AS cnt, uniqState(user_id) AS users
FROM analytics.events_raw GROUP BY hour, tenant_id, event_type;

-- Layer 3: Daily rollup for dashboards
CREATE TABLE analytics.events_daily (
    date        Date,
    tenant_id   UInt32,
    total       UInt64,
    users       AggregateFunction(uniq, UInt64)
)
ENGINE = AggregatingMergeTree()
ORDER BY (tenant_id, date);

CREATE MATERIALIZED VIEW analytics.events_daily_mv TO analytics.events_daily AS
SELECT toDate(created_at) AS date, tenant_id,
       count() AS total, uniqState(user_id) AS users
FROM analytics.events_raw GROUP BY date, tenant_id;

Step 4: Multi-Tenant Patterns

Approach A: Shared table with tenant_id in ORDER BY (recommended)

-- Tenant_id first in ORDER BY = queries filter on tenant efficiently
ORDER BY (tenant_id, event_type, created_at)

-- Query: only scans data for this tenant
SELECT count() FROM events_raw WHERE tenant_id = 42;

Approach B: Database per tenant (for strict isolation)

CREATE DATABASE tenant_42;
CREATE TABLE tenant_42.events (...) ENGINE = MergeTree() ...;

-- Pros: Full isolation, easy to drop tenant
-- Cons: Schema changes need per-tenant DDL, more operational overhead

Approach C: Row-level security (ClickHouse RBAC)

CREATE ROW POLICY tenant_isolation ON analytics.events_raw
    FOR SELECT USING tenant_id = getSetting('custom_tenant_id')
    TO app_user;

Step 5: Client Module

// src/clickhouse/client.ts
import { createClient, ClickHouseClient } from '@clickhouse/client';

let instance: ClickHouseClient | null = null;

export function getClient(): ClickHouseClient {
  if (!instance) {
    instance = createClient({
      url: process.env.CLICKHOUSE_HOST!,
      username: process.env.CLICKHOUSE_USER!,
      password: process.env.CLICKHOUSE_PASSWORD!,
      database: process.env.CLICKHOUSE_DATABASE ?? 'analytics',
      max_open_connections: Number(process.env.CH_MAX_CONNECTIONS ?? 10),
      request_timeout: 30_000,
      compression: { request: true, response: true },
    });
  }
  return instance;
}

// src/clickhouse/queries/dashboards.ts
export async function getTenantDashboard(tenantId: number, days = 30) {
  const client = getClient();
  const rs = await client.query({
    query: `
      SELECT date, sum(total) AS events, uniqMerge(users) AS unique_users
      FROM analytics.events_daily
      WHERE tenant_id = {tid:UInt32} AND date >= today() - {days:UInt32}
      GROUP BY date ORDER BY date
    `,
    query_params: { tid: tenantId, days },
    format: 'JSONEachRow',
  });
  return rs.json<{ date: string; events: string; unique_users: string }>();
}

Architecture Decision Records

| Decision | Choice | Why | |----------|--------|-----| | Engine | MergeTree (raw) + AggregatingMergeTree (rollups) | Best for append + pre-agg | | Multi-tenant | Shared table + tenant_id in ORDER BY | Scales to 10K+ tenants | | Ingestion | Buffer + batch INSERT | Avoids "too many parts" | | Aggregation | Materialized views (not cron) | Real-time, zero-lag | | Format | JSONEachRow | Client support, debugging | | Compression | ZSTD(3) for strings, Delta for ints | 10-20x compression |

Error Handling

| Issue | Cause | Solution | |-------|-------|----------| | Cross-tenant data leak | Missing WHERE tenant_id | Use row policies or middleware | | Stale dashboard data | MV not created | Verify MV exists and is attached | | Schema drift | Manual DDL changes | Use migration runner | | Slow dashboard queries | Querying raw table | Query aggregate tables instead |

Resources

Next Steps

For multi-environment configuration, see clickhouse-multi-env-setup.

Agent Skills: ClickHouse Reference Architecture

Install this agent skill to your local

Skill Files