Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Fixes

- Fixed index corruption that could happen when the same project was opened through two different path spellings — a symlinked checkout, or upper/lowercase variants of one path on a case-insensitive drive (Windows NTFS, or a WSL `/mnt` drive). CodeGraph now recognizes these as the same project and shares a single database connection instead of opening a second one that could corrupt the index. (#1057)


## [1.1.6] - 2026-06-30

Expand Down
76 changes: 76 additions & 0 deletions __tests__/root-identity.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { canonicalRootKey, findNearestCodeGraphRoot } from '../src/directory';

/**
* Regression coverage for #1057: the MCP server keyed its open-DB connection
* cache by the resolved-root PATH STRING, so two spellings of one physical repo
* — a symlinked checkout, or a case-variant on a case-insensitive mount (NTFS,
* WSL DrvFs `/mnt/c`) — each opened a SEPARATE SQLite connection to the same
* `.codegraph/codegraph.db` and corrupted the index.
*
* `canonicalRootKey` keys on filesystem identity (dev:ino), which is identical
* for every spelling, so the cache dedupes them onto one connection. The
* symlink case below is the deterministic, filesystem-agnostic proxy for the
* case-insensitive-mount scenario (both produce two path strings for one inode);
* it fails against the pre-fix `findNearestCodeGraphRoot`, which returned the
* un-canonicalized symlink path.
*/
describe('index root identity (#1057)', () => {
let tmp: string;

beforeEach(() => {
tmp = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-rootid-'));
});
afterEach(() => {
fs.rmSync(tmp, { recursive: true, force: true });
});

function makeProject(name: string): string {
const proj = path.join(tmp, name);
fs.mkdirSync(path.join(proj, '.codegraph'), { recursive: true });
fs.writeFileSync(path.join(proj, '.codegraph', 'codegraph.db'), 'x');
return proj;
}

it('gives one identity key to a directory and a symlink that points at it', () => {
const real = makeProject('proj');
const link = path.join(tmp, 'projLink');
fs.symlinkSync(real, link);

// Two distinct path strings for one physical directory...
expect(path.resolve(real)).not.toBe(path.resolve(link));
// ...but ONE filesystem identity, so the connection cache dedupes them.
expect(canonicalRootKey(link)).toBe(canonicalRootKey(real));
});

it('maps both spellings of a resolved root to one cache identity', () => {
const real = makeProject('proj');
const link = path.join(tmp, 'projLink');
fs.symlinkSync(real, link);

// findNearestCodeGraphRoot resolves each spelling to its own (cased) string,
const fromReal = findNearestCodeGraphRoot(real);
const fromLink = findNearestCodeGraphRoot(link);
expect(fromReal).not.toBeNull();
expect(fromLink).not.toBeNull();

// ...but the connection cache keys on identity, so both converge — which is
// what stops the second SQLite connection that pre-fix corrupted the index.
expect(canonicalRootKey(fromLink!)).toBe(canonicalRootKey(fromReal!));
});

it('keeps distinct projects on distinct identity keys', () => {
const a = makeProject('a');
const b = makeProject('b');
expect(canonicalRootKey(a)).not.toBe(canonicalRootKey(b));
});

it('falls back to a stable string key when the root cannot be stat-ed', () => {
const gone = path.join(tmp, 'does-not-exist');
// No throw, and deterministic for a given input.
expect(canonicalRootKey(gone)).toBe(canonicalRootKey(gone));
});
});
34 changes: 34 additions & 0 deletions src/directory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,40 @@ export function unsafeIndexRootReason(projectRoot: string): string | null {
return null;
}

/**
* Resolve `dir` to its `realpathSync` form (symlinks + `.`/`..` collapsed),
* falling back to the input on failure (e.g. a path that vanished mid-call).
* Used as the stat-failure fallback of {@link canonicalRootKey}.
*/
function canonicalizeRoot(dir: string): string {
try {
return fs.realpathSync(dir);
} catch {
return dir;
}
}

/**
* A stable filesystem-IDENTITY key for an index root: `"<dev>:<ino>"`. Unlike a
* path string — even a realpath'd one — this is identical for EVERY spelling of
* the same physical directory, including a case-variant on a case-insensitive
* mount (Windows NTFS, or WSL's DrvFs `/mnt/c`) where `realpathSync` preserves
* the caller's casing and so cannot dedupe. The MCP server keys its open-DB
* connection cache by this so two spellings of one repo share ONE SQLite
* connection instead of opening a second that corrupts the shared
* `.codegraph/codegraph.db` (#1057, same second-connection mechanism as #238).
* Falls back to the realpath'd path when the directory can't be stat'd (e.g. it
* vanished mid-call), so the key stays usable and stable-enough.
*/
export function canonicalRootKey(root: string): string {
try {
const s = fs.statSync(root);
return `${s.dev}:${s.ino}`;
} catch {
return canonicalizeRoot(root);
}
}

export function findNearestCodeGraphRoot(startPath: string): string | null {
let current = path.resolve(startPath);
const root = path.parse(current).root;
Expand Down
25 changes: 18 additions & 7 deletions src/mcp/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import type CodeGraph from '../index';
import type { QueryPool } from './query-pool';
import { findNearestCodeGraphRoot } from '../directory';
import { findNearestCodeGraphRoot, canonicalRootKey } from '../directory';
// Lazy-load the heavy CodeGraph chain off the MCP startup path — see the same
// helper in engine.ts. ToolHandler must load to answer tools/list (static
// schemas), but it must NOT drag in sqlite/query layers before the daemon binds;
Expand Down Expand Up @@ -1052,25 +1052,36 @@ export class ToolHandler {
);
}

// Identity-key the open connection by FILESYSTEM identity (dev:ino), not the
// path string. Two spellings of one repo — a symlinked checkout, or a
// case-variant on a case-insensitive mount (NTFS, or WSL DrvFs `/mnt/c`) —
// must share ONE connection; a second connection to the same
// `.codegraph/codegraph.db` corrupts the index (#1057, same second-connection
// mechanism as #238 below). realpath alone can't dedupe case-variants (it
// preserves the caller's casing), so we key on the inode, which is identical
// for every spelling.
const rootKey = canonicalRootKey(resolvedRoot);

// If the path resolves to the default project, reuse the already-open
// default instance rather than opening a SECOND connection to the same DB.
// A duplicate connection serializes reads against the watcher's auto-sync
// writes; when WAL isn't in effect (e.g. a filesystem without shared-memory
// support) that surfaces as intermittent
// "database is locked" on concurrent tool calls. See issue #238. The
// default instance is owned/closed by the server, so it's never cached.
if (this.cg && this.cg.getProjectRoot() === resolvedRoot) {
if (this.cg && canonicalRootKey(this.cg.getProjectRoot()) === rootKey) {
return this.freshen(this.cg);
}

// Cache the open DB connection by RESOLVED ROOT only — never by the input
// path. One key per instance means closeAll() closes each exactly once, and
// a changed resolution maps to a different entry instead of a stale hit.
const cached = this.projectCache.get(resolvedRoot);
// Cache the open DB connection by ROOT IDENTITY only — never by the input
// path. One key per physical index means closeAll() closes each exactly
// once, and a changed resolution maps to a different entry instead of a
// stale hit.
const cached = this.projectCache.get(rootKey);
if (cached) return this.freshen(cached);

const cg = loadCodeGraph().openSync(resolvedRoot);
this.projectCache.set(resolvedRoot, cg);
this.projectCache.set(rootKey, cg);
return cg;
}

Expand Down