diff --git a/images/otel-clickhouse-trace.png b/images/otel-clickhouse-trace.png new file mode 100644 index 0000000..5e941ac Binary files /dev/null and b/images/otel-clickhouse-trace.png differ diff --git a/packages/otel-clickhouse/CHANGELOG.md b/packages/otel-clickhouse/CHANGELOG.md new file mode 100644 index 0000000..c6265e9 --- /dev/null +++ b/packages/otel-clickhouse/CHANGELOG.md @@ -0,0 +1,14 @@ +# @kubiks/otel-clickhouse + +## 1.0.0 + +### Major Changes + +- Initial release of ClickHouse instrumentation for OpenTelemetry +- Automatic query tracing with detailed execution metrics +- Capture read/written rows, bytes, and timing information from ClickHouse response headers +- Support for all query types (SELECT, INSERT, UPDATE, DELETE, etc.) +- Configurable query text capture with length limits +- Network metadata tracking (hostname and port) +- Full OpenTelemetry semantic conventions compliance +- Zero-overhead instrumentation with idempotent design diff --git a/packages/otel-clickhouse/LICENSE b/packages/otel-clickhouse/LICENSE new file mode 100644 index 0000000..55f20b0 --- /dev/null +++ b/packages/otel-clickhouse/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Kubiks + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/otel-clickhouse/README.md b/packages/otel-clickhouse/README.md new file mode 100644 index 0000000..5b806f8 --- /dev/null +++ b/packages/otel-clickhouse/README.md @@ -0,0 +1,365 @@ +# @kubiks/otel-clickhouse + +OpenTelemetry instrumentation for [ClickHouse](https://clickhouse.com/). Add distributed tracing to your database queries with detailed execution metrics including read/written rows, bytes, and timing information. + +![ClickHouse Trace Visualization](https://github.com/kubiks-inc/otel/blob/main/images/otel-clickhouse-trace.png) + +_Visualize your ClickHouse queries with detailed span information including operation type, execution metrics, and performance statistics._ + +## Features + +- ?? **Automatic Query Tracing** - All queries are automatically traced with detailed span information +- ?? **Rich Execution Metrics** - Capture read/written rows, bytes, elapsed time, and more from ClickHouse response headers +- ?? **Operation Detection** - Automatically detects query operation types (SELECT, INSERT, etc.) +- ?? **Configurable Query Capture** - Control whether to include full SQL queries in traces +- ?? **Network Metadata** - Track database server hostname and port +- ? **Zero Overhead** - Minimal performance impact with efficient instrumentation +- ?? **Idempotent** - Safe to call multiple times on the same client + +## Installation + +```bash +npm install @kubiks/otel-clickhouse +# or +pnpm add @kubiks/otel-clickhouse +# or +yarn add @kubiks/otel-clickhouse +``` + +**Peer Dependencies:** `@opentelemetry/api` >= 1.9.0, `@clickhouse/client` >= 0.2.0 + +## Supported Frameworks + +Works with any TypeScript framework and Node.js runtime including: + +- Next.js +- Express +- Fastify +- NestJS +- Nuxt +- And many more... + +## Supported Platforms + +Works with any observability platform that supports OpenTelemetry including: + +- [Kubiks](https://kubiks.ai) +- [Sentry](https://sentry.io) +- [Axiom](https://axiom.co) +- [Datadog](https://www.datadoghq.com) +- [New Relic](https://newrelic.com) +- [SigNoz](https://signoz.io) +- And others ... + +## Usage + +### Basic Usage + +```typescript +import { createClient } from '@clickhouse/client'; +import { instrumentClickHouse } from '@kubiks/otel-clickhouse'; + +// Create your ClickHouse client as usual +const client = createClient({ + host: 'http://localhost:8123', + username: 'default', + password: '', +}); + +// Add instrumentation with a single line +instrumentClickHouse(client); + +// That's it! All queries are now traced automatically +const result = await client.query({ + query: 'SELECT * FROM users WHERE id = {id:UInt32}', + query_params: { id: 1 }, +}); +``` + +### With Configuration + +```typescript +import { createClient } from '@clickhouse/client'; +import { instrumentClickHouse } from '@kubiks/otel-clickhouse'; + +const client = createClient({ + host: 'http://localhost:8123', + username: 'default', + password: '', +}); + +instrumentClickHouse(client, { + dbName: 'default', // Database name for spans + captureQueryText: true, // Include SQL in traces (default: true) + maxQueryTextLength: 1000, // Max SQL length (default: 1000) + captureExecutionStats: true, // Capture execution metrics (default: true) + peerName: 'localhost', // Database server hostname + peerPort: 8123, // Database server port +}); +``` + +### ClickHouse Cloud + +```typescript +import { createClient } from '@clickhouse/client'; +import { instrumentClickHouse } from '@kubiks/otel-clickhouse'; + +const client = createClient({ + host: 'https://your-instance.clickhouse.cloud:8443', + username: 'default', + password: 'your-password', +}); + +instrumentClickHouse(client, { + dbName: 'default', + peerName: 'your-instance.clickhouse.cloud', + peerPort: 8443, +}); + +// All queries are now traced with detailed metrics +const result = await client.query({ + query: 'SELECT count() FROM system.tables', +}); +``` + +### With Query Parameters + +```typescript +// Parameterized queries are fully supported +const result = await client.query({ + query: ` + SELECT * + FROM users + WHERE age > {minAge:UInt8} + AND city = {city:String} + `, + query_params: { + minAge: 18, + city: 'New York', + }, +}); +``` + +### Insert Operations + +```typescript +// Inserts are automatically traced +await client.insert({ + table: 'users', + values: [ + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 25 }, + ], + format: 'JSONEachRow', +}); +``` + +## Configuration Options + +```typescript +interface InstrumentClickHouseConfig { + /** + * Custom tracer name. Defaults to "@kubiks/otel-clickhouse". + */ + tracerName?: string; + + /** + * Database name to include in spans. + */ + dbName?: string; + + /** + * Whether to capture full SQL query text in spans. + * Defaults to true. + */ + captureQueryText?: boolean; + + /** + * Maximum length for captured query text. Queries longer than this + * will be truncated. Defaults to 1000 characters. + */ + maxQueryTextLength?: number; + + /** + * Remote hostname or IP address of the ClickHouse server. + * Example: "clickhouse.example.com" or "192.168.1.100" + */ + peerName?: string; + + /** + * Remote port number of the ClickHouse server. + * Example: 8123 for HTTP, 9000 for native protocol + */ + peerPort?: number; + + /** + * Whether to capture ClickHouse execution statistics from response headers. + * This includes read/written rows, bytes, elapsed time, etc. + * Defaults to true. + */ + captureExecutionStats?: boolean; +} +``` + +## What You Get + +Each database query automatically creates a span with rich telemetry data: + +### Basic Attributes + +- **Span name**: `clickhouse.select`, `clickhouse.insert`, `clickhouse.update`, etc. +- **Operation type**: `db.operation` attribute (SELECT, INSERT, UPDATE, DELETE, ALTER, etc.) +- **SQL query text**: Full query statement captured in `db.statement` (configurable) +- **Database system**: `db.system` attribute (always "clickhouse") +- **Database name**: `db.name` attribute (if configured) +- **Network info**: `net.peer.name` and `net.peer.port` attributes (if configured) + +### ClickHouse Execution Metrics + +When `captureExecutionStats` is enabled (default), the following metrics are captured from ClickHouse response headers: + +| Attribute | Description | Example | +| -------------------------------------- | ------------------------------------------------ | --------- | +| `clickhouse.read_rows` | Number of rows read from tables | `1000` | +| `clickhouse.read_bytes` | Number of bytes read from tables | `8192` | +| `clickhouse.written_rows` | Number of rows written to tables | `100` | +| `clickhouse.written_bytes` | Number of bytes written to tables | `4096` | +| `clickhouse.total_rows_to_read` | Total number of rows to be read | `1000` | +| `clickhouse.result_rows` | Number of rows in the result set | `50` | +| `clickhouse.result_bytes` | Number of bytes in the result set | `2048` | +| `clickhouse.elapsed_ns` | Query execution time in nanoseconds | `1500000` | +| `clickhouse.real_time_microseconds` | Real execution time in microseconds (CH 24.9+) | `1500` | + +### Error Tracking + +- Exceptions are recorded with stack traces +- Proper span status codes (OK or ERROR) +- Full error context for debugging + +### Performance Metrics + +- Duration and timing information for every query +- Detailed execution statistics from ClickHouse +- Network latency insights + +## Span Attributes Reference + +The instrumentation adds the following attributes to each span following [OpenTelemetry semantic conventions](https://opentelemetry.io/docs/specs/semconv/database/): + +### Standard Database Attributes + +| Attribute | Description | Example | +| ---------------- | --------------------- | ------------------------------------------ | +| `db.system` | Database system | `clickhouse` | +| `db.operation` | SQL operation type | `SELECT` | +| `db.statement` | Full SQL query | `SELECT * FROM users WHERE id = 1` | +| `db.name` | Database name | `default` | +| `net.peer.name` | Server hostname | `clickhouse.example.com` | +| `net.peer.port` | Server port | `8123` | + +### ClickHouse-Specific Attributes + +All ClickHouse execution metrics are captured as attributes (see table above). + +## Example Trace Output + +```json +{ + "name": "clickhouse.select", + "kind": "CLIENT", + "status": "OK", + "attributes": { + "db.system": "clickhouse", + "db.operation": "SELECT", + "db.statement": "SELECT * FROM users WHERE age > 18", + "db.name": "default", + "net.peer.name": "localhost", + "net.peer.port": 8123, + "clickhouse.read_rows": 1000, + "clickhouse.read_bytes": 8192, + "clickhouse.result_rows": 50, + "clickhouse.result_bytes": 2048, + "clickhouse.elapsed_ns": 1500000 + } +} +``` + +## Best Practices + +### 1. Configure Database Name + +Always set the `dbName` option to help identify which database queries are targeting: + +```typescript +instrumentClickHouse(client, { + dbName: 'analytics', +}); +``` + +### 2. Set Network Information + +Include `peerName` and `peerPort` for better observability: + +```typescript +instrumentClickHouse(client, { + peerName: 'clickhouse.prod.example.com', + peerPort: 8123, +}); +``` + +### 3. Control Query Text Capture + +For sensitive queries, you can disable query text capture: + +```typescript +instrumentClickHouse(client, { + captureQueryText: false, +}); +``` + +Or limit the query length: + +```typescript +instrumentClickHouse(client, { + maxQueryTextLength: 500, +}); +``` + +### 4. Use with OpenTelemetry SDK + +Make sure to set up the OpenTelemetry SDK before instrumenting: + +```typescript +import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node'; +import { registerInstrumentations } from '@opentelemetry/instrumentation'; + +// Set up the tracer provider +const provider = new NodeTracerProvider(); +provider.register(); + +// Then instrument your ClickHouse client +instrumentClickHouse(client); +``` + +## Troubleshooting + +### No spans are being created + +Make sure you have: +1. Set up the OpenTelemetry SDK properly +2. Registered a tracer provider +3. Configured an exporter +4. Called `instrumentClickHouse()` after creating the client + +### Execution stats are not captured + +The ClickHouse client must return response headers with the query summary. This is the default behavior for the official `@clickhouse/client` package. + +If you're not seeing execution stats: +1. Verify you're using `@clickhouse/client` >= 0.2.0 +2. Check that `captureExecutionStats` is not set to `false` +3. Ensure the query is actually executing (not cached or erroring) + +## License + +MIT diff --git a/packages/otel-clickhouse/package.json b/packages/otel-clickhouse/package.json new file mode 100644 index 0000000..08292d1 --- /dev/null +++ b/packages/otel-clickhouse/package.json @@ -0,0 +1,57 @@ +{ + "name": "@kubiks/otel-clickhouse", + "version": "1.0.0", + "private": false, + "publishConfig": { + "access": "public" + }, + "description": "OpenTelemetry instrumentation for ClickHouse - Add distributed tracing to your database queries with detailed execution metrics", + "author": "Kubiks", + "license": "MIT", + "repository": "kubiks-inc/otel", + "sideEffects": false, + "type": "module", + "exports": { + ".": { + "types": "./dist/types/index.d.ts", + "import": "./dist/index.js", + "default": "./dist/index.js" + } + }, + "main": "./dist/index.js", + "types": "./dist/types/index.d.ts", + "files": [ + "dist", + "LICENSE", + "README.md" + ], + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "scripts": { + "build": "pnpm clean && tsc", + "clean": "rimraf dist", + "prepublishOnly": "pnpm build", + "type-check": "tsc --noEmit", + "unit-test": "vitest --run", + "unit-test-watch": "vitest" + }, + "devDependencies": { + "@clickhouse/client": "^1.12.1", + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/sdk-trace-base": "^2.1.0", + "@types/node": "18.15.11", + "rimraf": "3.0.2", + "typescript": "^5", + "vitest": "0.33.0" + }, + "peerDependencies": { + "@clickhouse/client": ">=0.2.0", + "@opentelemetry/api": ">=1.9.0 <2.0.0" + }, + "peerDependenciesMeta": { + "@clickhouse/client": { + "optional": false + } + } +} diff --git a/packages/otel-clickhouse/src/index.test.ts b/packages/otel-clickhouse/src/index.test.ts new file mode 100644 index 0000000..fd1fd2f --- /dev/null +++ b/packages/otel-clickhouse/src/index.test.ts @@ -0,0 +1,288 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SpanStatusCode, trace } from "@opentelemetry/api"; +import { + BasicTracerProvider, + InMemorySpanExporter, + SimpleSpanProcessor, +} from "@opentelemetry/sdk-trace-base"; +import { instrumentClickHouse, type InstrumentClickHouseConfig } from "./index"; + +interface MockClickHouseClient { + query: (params: any) => Promise; +} + +describe("instrumentClickHouse", () => { + let provider: BasicTracerProvider; + let exporter: InMemorySpanExporter; + + beforeEach(() => { + exporter = new InMemorySpanExporter(); + provider = new BasicTracerProvider({ + spanProcessors: [new SimpleSpanProcessor(exporter)], + }); + trace.setGlobalTracerProvider(provider); + }); + + afterEach(async () => { + await provider.shutdown(); + exporter.reset(); + trace.disable(); + }); + + it("wraps the query method only once", () => { + const client = { + query: vi.fn(), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any); + + expect(instrumented.query).not.toBeUndefined(); + const wrappedQuery = instrumented.query; + + instrumentClickHouse(client as any); + + expect(instrumented.query).toBe(wrappedQuery); + }); + + it("records a successful query", async () => { + const client = { + query: vi.fn(() => + Promise.resolve({ + response_headers: { + "x-clickhouse-summary": JSON.stringify({ + read_rows: "1000", + read_bytes: "8192", + written_rows: "0", + written_bytes: "0", + total_rows_to_read: "1000", + result_rows: "50", + result_bytes: "2048", + elapsed_ns: "1500000", + }), + }, + }) + ), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.name).toBe("clickhouse.select"); + expect(span.status.code).toBe(SpanStatusCode.OK); + expect(span.attributes["db.system"]).toBe("clickhouse"); + expect(span.attributes["db.operation"]).toBe("SELECT"); + expect(span.attributes["db.statement"]).toBe("SELECT * FROM users"); + }); + + it("captures execution statistics", async () => { + const client = { + query: vi.fn(() => + Promise.resolve({ + response_headers: { + "x-clickhouse-summary": JSON.stringify({ + read_rows: "1000", + read_bytes: "8192", + written_rows: "0", + written_bytes: "0", + total_rows_to_read: "1000", + result_rows: "50", + result_bytes: "2048", + elapsed_ns: "1500000", + }), + }, + }) + ), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + captureExecutionStats: true, + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.attributes["clickhouse.read_rows"]).toBe(1000); + expect(span.attributes["clickhouse.read_bytes"]).toBe(8192); + expect(span.attributes["clickhouse.result_rows"]).toBe(50); + expect(span.attributes["clickhouse.result_bytes"]).toBe(2048); + expect(span.attributes["clickhouse.elapsed_ns"]).toBe(1500000); + }); + + it("records a failed query", async () => { + const error = new Error("Query failed"); + const client = { + query: vi.fn(() => Promise.reject(error)), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any); + + await expect( + instrumented.query({ query: "SELECT * FROM users" }) + ).rejects.toThrow("Query failed"); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.name).toBe("clickhouse.select"); + expect(span.status.code).toBe(SpanStatusCode.ERROR); + expect(span.events).toHaveLength(1); + expect(span.events[0].name).toBe("exception"); + }); + + it("respects captureQueryText option", async () => { + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + captureQueryText: false, + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.attributes["db.statement"]).toBeUndefined(); + }); + + it("truncates long queries", async () => { + const longQuery = "SELECT * FROM users WHERE " + "a = 1 AND ".repeat(200); + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + maxQueryTextLength: 100, + }); + + await instrumented.query({ query: longQuery }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + const statement = span.attributes["db.statement"] as string; + expect(statement.length).toBeLessThanOrEqual(103); // 100 + "..." + expect(statement).toContain("..."); + }); + + it("includes database name when configured", async () => { + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + dbName: "analytics", + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.attributes["db.name"]).toBe("analytics"); + }); + + it("includes network metadata when configured", async () => { + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + peerName: "clickhouse.example.com", + peerPort: 8123, + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.attributes["net.peer.name"]).toBe("clickhouse.example.com"); + expect(span.attributes["net.peer.port"]).toBe(8123); + }); + + it("detects different operation types", async () => { + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any); + + await instrumented.query({ query: "INSERT INTO users VALUES (1, 'test')" }); + await instrumented.query({ query: "UPDATE users SET name = 'test'" }); + await instrumented.query({ query: "DELETE FROM users WHERE id = 1" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(3); + + expect(spans[0].name).toBe("clickhouse.insert"); + expect(spans[0].attributes["db.operation"]).toBe("INSERT"); + + expect(spans[1].name).toBe("clickhouse.update"); + expect(spans[1].attributes["db.operation"]).toBe("UPDATE"); + + expect(spans[2].name).toBe("clickhouse.delete"); + expect(spans[2].attributes["db.operation"]).toBe("DELETE"); + }); + + it("handles queries without execution stats", async () => { + const client = { + query: vi.fn(() => Promise.resolve({ response_headers: {} })), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + captureExecutionStats: true, + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.status.code).toBe(SpanStatusCode.OK); + // Stats should not be present + expect(span.attributes["clickhouse.read_rows"]).toBeUndefined(); + }); + + it("skips execution stats when disabled", async () => { + const client = { + query: vi.fn(() => + Promise.resolve({ + response_headers: { + "x-clickhouse-summary": JSON.stringify({ + read_rows: "1000", + read_bytes: "8192", + }), + }, + }) + ), + } as unknown as MockClickHouseClient; + + const instrumented = instrumentClickHouse(client as any, { + captureExecutionStats: false, + }); + + await instrumented.query({ query: "SELECT * FROM users" }); + + const spans = exporter.getFinishedSpans(); + expect(spans).toHaveLength(1); + + const span = spans[0]; + expect(span.attributes["clickhouse.read_rows"]).toBeUndefined(); + }); +}); diff --git a/packages/otel-clickhouse/src/index.ts b/packages/otel-clickhouse/src/index.ts new file mode 100644 index 0000000..0f6494c --- /dev/null +++ b/packages/otel-clickhouse/src/index.ts @@ -0,0 +1,410 @@ +import { + context, + SpanKind, + SpanStatusCode, + trace, + type Span, +} from "@opentelemetry/api"; +import type { + ClickHouseClient, + DataFormat, + QueryParams, +} from "@clickhouse/client"; + +const DEFAULT_TRACER_NAME = "@kubiks/otel-clickhouse"; +const INSTRUMENTED_FLAG = Symbol("kubiksOtelClickHouseInstrumented"); + +// Semantic conventions for database attributes +export const SEMATTRS_DB_SYSTEM = "db.system" as const; +export const SEMATTRS_DB_OPERATION = "db.operation" as const; +export const SEMATTRS_DB_STATEMENT = "db.statement" as const; +export const SEMATTRS_DB_NAME = "db.name" as const; +export const SEMATTRS_NET_PEER_NAME = "net.peer.name" as const; +export const SEMATTRS_NET_PEER_PORT = "net.peer.port" as const; + +// ClickHouse-specific attributes +export const SEMATTRS_CLICKHOUSE_READ_ROWS = "clickhouse.read_rows" as const; +export const SEMATTRS_CLICKHOUSE_READ_BYTES = "clickhouse.read_bytes" as const; +export const SEMATTRS_CLICKHOUSE_WRITTEN_ROWS = + "clickhouse.written_rows" as const; +export const SEMATTRS_CLICKHOUSE_WRITTEN_BYTES = + "clickhouse.written_bytes" as const; +export const SEMATTRS_CLICKHOUSE_TOTAL_ROWS_TO_READ = + "clickhouse.total_rows_to_read" as const; +export const SEMATTRS_CLICKHOUSE_RESULT_ROWS = + "clickhouse.result_rows" as const; +export const SEMATTRS_CLICKHOUSE_RESULT_BYTES = + "clickhouse.result_bytes" as const; +export const SEMATTRS_CLICKHOUSE_ELAPSED_NS = "clickhouse.elapsed_ns" as const; +export const SEMATTRS_CLICKHOUSE_REAL_TIME_MICROSECONDS = + "clickhouse.real_time_microseconds" as const; + +/** + * ClickHouse query summary from response headers + */ +export interface ClickHouseSummary { + read_rows: string; + read_bytes: string; + written_rows: string; + written_bytes: string; + total_rows_to_read: string; + result_rows: string; + result_bytes: string; + elapsed_ns: string; + /** Available only after ClickHouse 24.9 */ + real_time_microseconds?: string; +} + +/** + * Configuration options for ClickHouse instrumentation. + */ +export interface InstrumentClickHouseConfig { + /** + * Custom tracer name. Defaults to "@kubiks/otel-clickhouse". + */ + tracerName?: string; + + /** + * Database name to include in spans. + */ + dbName?: string; + + /** + * Whether to capture full SQL query text in spans. + * Defaults to true. + */ + captureQueryText?: boolean; + + /** + * Maximum length for captured query text. Queries longer than this + * will be truncated. Defaults to 1000 characters. + */ + maxQueryTextLength?: number; + + /** + * Remote hostname or IP address of the ClickHouse server. + * Example: "clickhouse.example.com" or "192.168.1.100" + */ + peerName?: string; + + /** + * Remote port number of the ClickHouse server. + * Example: 8123 for HTTP, 9000 for native protocol + */ + peerPort?: number; + + /** + * Whether to capture ClickHouse execution statistics from response headers. + * This includes read/written rows, bytes, elapsed time, etc. + * Defaults to true. + */ + captureExecutionStats?: boolean; +} + +interface InstrumentedClient extends ClickHouseClient { + [INSTRUMENTED_FLAG]?: true; +} + +/** + * Sanitizes and truncates query text for safe inclusion in spans. + */ +function sanitizeQueryText(queryText: string, maxLength: number): string { + if (queryText.length <= maxLength) { + return queryText; + } + return `${queryText.substring(0, maxLength)}...`; +} + +/** + * Extracts the SQL operation (SELECT, INSERT, etc.) from query text. + */ +function extractOperation(queryText: string): string | undefined { + const trimmed = queryText.trimStart(); + const match = /^(?\w+)/u.exec(trimmed); + return match?.groups?.op?.toUpperCase(); +} + +/** + * Finalizes a span with status, timing, and optional error. + */ +function finalizeSpan(span: Span, error?: unknown): void { + if (error) { + if (error instanceof Error) { + span.recordException(error); + } else { + span.recordException(new Error(String(error))); + } + span.setStatus({ code: SpanStatusCode.ERROR }); + } else { + span.setStatus({ code: SpanStatusCode.OK }); + } + span.end(); +} + +/** + * Extracts ClickHouse summary from response headers. + */ +function extractSummary(headers: any): ClickHouseSummary | undefined { + if (!headers) { + return undefined; + } + + // The ClickHouse client provides summary in the response headers + const summary = headers["x-clickhouse-summary"]; + if (summary && typeof summary === "string") { + try { + return JSON.parse(summary) as ClickHouseSummary; + } catch { + return undefined; + } + } + + // Fallback: check if headers already contain the summary fields + if ( + "read_rows" in headers || + "result_rows" in headers || + "elapsed_ns" in headers + ) { + return headers as ClickHouseSummary; + } + + return undefined; +} + +/** + * Adds ClickHouse execution statistics to span attributes. + */ +function addExecutionStats(span: Span, summary: ClickHouseSummary): void { + try { + // Add all available statistics as attributes + if (summary.read_rows !== undefined) { + const readRows = parseInt(summary.read_rows, 10); + if (!isNaN(readRows)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_READ_ROWS, readRows); + } + } + + if (summary.read_bytes !== undefined) { + const readBytes = parseInt(summary.read_bytes, 10); + if (!isNaN(readBytes)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_READ_BYTES, readBytes); + } + } + + if (summary.written_rows !== undefined) { + const writtenRows = parseInt(summary.written_rows, 10); + if (!isNaN(writtenRows)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_WRITTEN_ROWS, writtenRows); + } + } + + if (summary.written_bytes !== undefined) { + const writtenBytes = parseInt(summary.written_bytes, 10); + if (!isNaN(writtenBytes)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_WRITTEN_BYTES, writtenBytes); + } + } + + if (summary.total_rows_to_read !== undefined) { + const totalRowsToRead = parseInt(summary.total_rows_to_read, 10); + if (!isNaN(totalRowsToRead)) { + span.setAttribute( + SEMATTRS_CLICKHOUSE_TOTAL_ROWS_TO_READ, + totalRowsToRead + ); + } + } + + if (summary.result_rows !== undefined) { + const resultRows = parseInt(summary.result_rows, 10); + if (!isNaN(resultRows)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_RESULT_ROWS, resultRows); + } + } + + if (summary.result_bytes !== undefined) { + const resultBytes = parseInt(summary.result_bytes, 10); + if (!isNaN(resultBytes)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_RESULT_BYTES, resultBytes); + } + } + + if (summary.elapsed_ns !== undefined) { + const elapsedNs = parseInt(summary.elapsed_ns, 10); + if (!isNaN(elapsedNs)) { + span.setAttribute(SEMATTRS_CLICKHOUSE_ELAPSED_NS, elapsedNs); + } + } + + // Available only after ClickHouse 24.9 + if (summary.real_time_microseconds !== undefined) { + const realTimeMicroseconds = parseInt( + summary.real_time_microseconds, + 10 + ); + if (!isNaN(realTimeMicroseconds)) { + span.setAttribute( + SEMATTRS_CLICKHOUSE_REAL_TIME_MICROSECONDS, + realTimeMicroseconds + ); + } + } + } catch (error) { + // Silently ignore errors in stats extraction + // to avoid disrupting the application + } +} + +/** + * Instruments a ClickHouse client with OpenTelemetry tracing. + * + * This function wraps the client's `query` method to create spans for each database + * operation, including detailed execution statistics from ClickHouse response headers. + * + * The instrumentation is idempotent - calling it multiple times on the same client will only + * instrument it once. + * + * @typeParam TClient - The type of the ClickHouse client + * @param client - The ClickHouse client to instrument + * @param config - Optional configuration for instrumentation behavior + * @returns The instrumented client (same instance, modified in place) + * + * @example + * ```typescript + * import { createClient } from '@clickhouse/client'; + * import { instrumentClickHouse } from '@kubiks/otel-clickhouse'; + * + * const client = createClient({ + * host: 'http://localhost:8123', + * username: 'default', + * password: '', + * }); + * + * instrumentClickHouse(client, { + * dbName: 'default', + * captureQueryText: true, + * captureExecutionStats: true, + * peerName: 'localhost', + * peerPort: 8123, + * }); + * + * // All queries are now traced with detailed metrics + * const result = await client.query({ + * query: 'SELECT * FROM users WHERE id = {id:UInt32}', + * query_params: { id: 1 }, + * }); + * ``` + * + * @example + * ```typescript + * // With ClickHouse Cloud + * import { createClient } from '@clickhouse/client'; + * import { instrumentClickHouse } from '@kubiks/otel-clickhouse'; + * + * const client = createClient({ + * host: 'https://your-instance.clickhouse.cloud:8443', + * username: 'default', + * password: 'your-password', + * }); + * + * instrumentClickHouse(client, { + * dbName: 'default', + * peerName: 'your-instance.clickhouse.cloud', + * peerPort: 8443, + * }); + * ``` + */ +export function instrumentClickHouse( + client: ClickHouseClient, + config?: InstrumentClickHouseConfig +): ClickHouseClient { + if (!client) { + return client; + } + + // Check if already instrumented + const instrumentedClient = client as InstrumentedClient; + if (instrumentedClient[INSTRUMENTED_FLAG]) { + return client; + } + + const { + tracerName = DEFAULT_TRACER_NAME, + dbName, + captureQueryText = true, + maxQueryTextLength = 1000, + peerName, + peerPort, + captureExecutionStats = true, + } = config ?? {}; + + const tracer = trace.getTracer(tracerName); + + // Store the original query method + const originalQuery = client.query.bind(client); + + // Create instrumented query method + client.query = async function instrumentedQuery( + params: QueryParams + ): Promise { + // Extract query text from params + const queryText = params.query; + + const operation = queryText ? extractOperation(queryText) : undefined; + const spanName = operation + ? `clickhouse.${operation.toLowerCase()}` + : "clickhouse.query"; + + // Start span + const span = tracer.startSpan(spanName, { kind: SpanKind.CLIENT }); + span.setAttribute(SEMATTRS_DB_SYSTEM, "clickhouse"); + + if (operation) { + span.setAttribute(SEMATTRS_DB_OPERATION, operation); + } + + if (dbName) { + span.setAttribute(SEMATTRS_DB_NAME, dbName); + } + + if (captureQueryText && queryText !== undefined) { + const sanitized = sanitizeQueryText(queryText, maxQueryTextLength); + span.setAttribute(SEMATTRS_DB_STATEMENT, sanitized); + } + + if (peerName) { + span.setAttribute(SEMATTRS_NET_PEER_NAME, peerName); + } + + if (peerPort) { + span.setAttribute(SEMATTRS_NET_PEER_PORT, peerPort); + } + + const activeContext = trace.setSpan(context.active(), span); + + try { + const result = await context.with(activeContext, () => + originalQuery(params) + ); + + // Extract and add execution statistics from response headers + if (captureExecutionStats) { + const summary = extractSummary(result.response_headers); + if (summary) { + addExecutionStats(span, summary); + } + } + + finalizeSpan(span); + return result; + } catch (error) { + finalizeSpan(span, error); + throw error; + } + }; + + // Mark as instrumented + instrumentedClient[INSTRUMENTED_FLAG] = true; + + return client; +} diff --git a/packages/otel-clickhouse/tsconfig.json b/packages/otel-clickhouse/tsconfig.json new file mode 100644 index 0000000..b3f9d2e --- /dev/null +++ b/packages/otel-clickhouse/tsconfig.json @@ -0,0 +1,21 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "ESNext", + "moduleResolution": "bundler", + "lib": ["ES2020"], + "outDir": "dist", + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declarationDir": "dist/types", + "stripInternal": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "**/*.test.ts"] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b4e2b6b..cdf0e74 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -72,6 +72,30 @@ importers: specifier: 0.33.0 version: 0.33.0(less@4.2.0)(sass@1.69.7)(stylus@0.59.0) + packages/otel-clickhouse: + devDependencies: + '@clickhouse/client': + specifier: ^1.12.1 + version: 1.12.1 + '@opentelemetry/api': + specifier: ^1.9.0 + version: 1.9.0 + '@opentelemetry/sdk-trace-base': + specifier: ^2.1.0 + version: 2.1.0(@opentelemetry/api@1.9.0) + '@types/node': + specifier: 18.15.11 + version: 18.15.11 + rimraf: + specifier: 3.0.2 + version: 3.0.2 + typescript: + specifier: ^5 + version: 5.3.3 + vitest: + specifier: 0.33.0 + version: 0.33.0(less@4.2.0)(sass@1.69.7)(stylus@0.59.0) + packages/otel-drizzle: devDependencies: '@opentelemetry/api': @@ -372,6 +396,13 @@ packages: '@changesets/write@0.3.0': resolution: {integrity: sha512-slGLb21fxZVUYbyea+94uFiD6ntQW0M2hIKNznFizDhZPDgn2c/fv1UzzlW43RVzh1BEDuIqW6hzlJ1OflNmcw==} + '@clickhouse/client-common@1.12.1': + resolution: {integrity: sha512-ccw1N6hB4+MyaAHIaWBwGZ6O2GgMlO99FlMj0B0UEGfjxM9v5dYVYql6FpP19rMwrVAroYs/IgX2vyZEBvzQLg==} + + '@clickhouse/client@1.12.1': + resolution: {integrity: sha512-7ORY85rphRazqHzImNXMrh4vsaPrpetFoTWpZYueCO2bbO6PXYDXp/GQ4DgxnGIqbWB/Di1Ai+Xuwq2o7DJ36A==} + engines: {node: '>=16'} + '@connectrpc/connect-web@2.0.0-rc.3': resolution: {integrity: sha512-w88P8Lsn5CCsA7MFRl2e6oLY4J/5toiNtJns/YJrlyQaWOy3RO8pDgkz+iIkG98RPMhj2thuBvsd3Cn4DKKCkw==} peerDependencies: @@ -3088,6 +3119,12 @@ snapshots: human-id: 1.0.2 prettier: 2.8.8 + '@clickhouse/client-common@1.12.1': {} + + '@clickhouse/client@1.12.1': + dependencies: + '@clickhouse/client-common': 1.12.1 + '@connectrpc/connect-web@2.0.0-rc.3(@bufbuild/protobuf@2.10.0)(@connectrpc/connect@2.0.0-rc.3(@bufbuild/protobuf@2.10.0))': dependencies: '@bufbuild/protobuf': 2.10.0