diff --git a/script/text-import-plugin.ts b/script/text-import-plugin.ts index b61b14b0c..9533075dd 100644 --- a/script/text-import-plugin.ts +++ b/script/text-import-plugin.ts @@ -1,23 +1,13 @@ /** * esbuild plugin that polyfills Bun's `with { type: "text" }` import - * attribute. + * attribute (esbuild only supports `json`). Intercepts matching + * imports, reads the file, and default-exports its contents as a + * string. Runtime behavior matches Bun's native handling. * - * esbuild doesn't natively support the `text` import attribute (only - * `json`), but Bun does. Our CLI code uses it to load the grep worker - * source as a string at bundle time (see - * `src/lib/scan/worker-pool.ts`). Without this plugin, esbuild errors - * with `Importing with a type attribute of "text" is not supported` - * on any file that imports a sibling `.js` as text. - * - * The plugin intercepts imports whose `with` attribute matches - * `{ type: "text" }`, reads the file from disk, and emits it as a JS - * module that default-exports the file's contents as a string. - * Runtime behavior matches Bun's native handling, so the same source - * works in dev (via `bun run`) and in compiled binaries (esbuild + - * `bun build --compile` two-step). - * - * Used by both `script/build.ts` (single-file executable) and - * `script/bundle.ts` (CJS library bundle for npm). + * Used by `script/build.ts` (single-file executable) and + * `script/bundle.ts` (CJS library bundle) so the grep-worker source + * in `src/lib/scan/worker-pool.ts` loads correctly in both dev and + * compiled builds. */ import { readFileSync } from "node:fs"; @@ -25,7 +15,6 @@ import { resolve as resolvePath } from "node:path"; import type { Plugin } from "esbuild"; const TEXT_IMPORT_NS = "text-import"; -/** Match-any filter for esbuild's plugin API. Hoisted for top-level-regex lint. */ const ANY_FILTER = /.*/; export const textImportPlugin: Plugin = { diff --git a/src/lib/dsn/code-scanner.ts b/src/lib/dsn/code-scanner.ts index 606e05cab..74b6a59a0 100644 --- a/src/lib/dsn/code-scanner.ts +++ b/src/lib/dsn/code-scanner.ts @@ -1,51 +1,29 @@ /** - * Language-Agnostic DSN Code Scanner (policy layer). + * Language-agnostic DSN code scanner. * - * This module owns the DSN-specific policy (URL regex, comment-line - * filtering, host validation, package-path inference, stop-on-first - * semantics). All file walking, `.gitignore` handling, extension - * filtering, bounded concurrency, AND worker-pool dispatch are - * delegated to the shared `src/lib/scan/` module via `collectGrep`. + * Owns the DSN-specific policy (URL regex, comment-line filtering, + * host validation, package-path inference). File walking, gitignore + * handling, extension filtering, bounded concurrency, and worker-pool + * dispatch are delegated to `src/lib/scan/`. * - * Flow: - * 1. `scanDirectory(cwd, stopOnFirst)` calls `collectGrep` with the - * DSN pattern and preset (`dsnScanOptions()`), plus - * `recordMtimes: true` and an `onDirectoryVisit` hook so the - * cache-invalidation maps are populated in one traversal. - * 2. `collectGrep` dispatches per-file work to the worker pool (when - * available) or a concurrent-async fallback. Each yielded - * `GrepMatch` represents one line containing a DSN URL; the - * grep engine handles the file-level literal gate (`http`) for - * free, so we skip files that can't possibly match before any - * regex runs. - * 3. Main thread post-filters each match: - * - Skip commented lines (language-aware comment prefixes) - * - Re-run `DSN_PATTERN` on `match.line` to recover all DSNs - * (grep emits one match per line regardless of how many - * hits the line contains — rare for DSNs but the contract - * predates this refactor) - * - Validate host (`isValidDsnHost`) - * - Dedup on raw DSN string - * - Early-exit on first unique DSN when `stopOnFirst: true` - * - Build `DetectedDsn` with inferred package path - * 4. `sourceMtimes` records mtime per file that contributed a - * validated DSN; `dirMtimes` records mtime per visited dir via - * the hook. Both are used by `src/lib/db/dsn-cache.ts` for - * cache invalidation. + * ### Flow * - * Behavior change landed in PR 3: the walker's `nestedGitignore: true` - * default (via `dsnScanOptions()`) means nested `.gitignore` files are - * now honored. Pre-PR-3 code only read the project-root `.gitignore`. - * This is a correctness improvement matching git's cumulative semantics; - * DSNs in files covered by a subdir `.gitignore` are no longer detected. + * `scanCodeForDsns` routes through `collectGrep(DSN_PATTERN, ...)`. + * Each emitted `GrepMatch` is one line containing a DSN-like URL; + * the scanner post-filters matches on the main thread (comment-line + * check, host validation, dedup). `sourceMtimes` / `dirMtimes` are + * populated via `recordMtimes: true` + the `onDirectoryVisit` hook + * in a single traversal. * - * Behavior change landed in PR 6 (this one): the DSN scanner now shares - * the grep pipeline and gets worker-pool parallelism for free. - * End-to-end time on the 10k-file fixture drops from ~330ms → ~200ms. - * Correctness is unchanged — `extractDsnsFromContent` is still - * exported for `src/lib/dsn/detector.ts::isDsnStillPresent` (the - * cache-verify fast path for a single file) and internally we still - * go through the same comment/host-validation filter. + * `scanCodeForFirstDsn` deliberately avoids the worker pool — the + * pool's ~20ms startup cost dwarfs the work for a stop-on-first scan + * that typically finds its target in the first few files. Uses a + * direct `walkFiles` loop instead. + * + * Both the `CodeScanResult` shape and the result-map semantics are + * cache-contract-stable — `src/lib/db/dsn-cache.ts` verifies entries + * against the filesystem, so changing keys/values requires bumping + * the cache schema. */ import path from "node:path"; @@ -58,152 +36,94 @@ import { createDetectedDsn, inferPackagePath, parseDsn } from "./parser.js"; import { DSN_MAX_DEPTH, dsnScanOptions } from "./scan-options.js"; import type { DetectedDsn } from "./types.js"; -/** Scoped logger for DSN code scanning. */ const log = logger.withTag("dsn-scan"); /** - * Result of scanning code for DSNs, including mtimes for caching. - * - * Shape is stable — `src/lib/db/dsn-cache.ts` stores this via - * `setCachedDetection` and verifies `sourceMtimes` / `dirMtimes` - * against the filesystem. Do NOT change keys/values without also - * bumping the cache schema. + * Result of scanning code for DSNs. Shape is cache-contract-stable + * — `src/lib/db/dsn-cache.ts` uses it directly. */ export type CodeScanResult = { - /** All detected DSNs */ dsns: DetectedDsn[]; /** - * Map of source file paths (POSIX, relative to cwd) to their mtimes. - * Only files that contained at least one DSN are present — the cache + * Map of source file paths (POSIX, relative to cwd) → mtime. + * Only files containing at least one validated DSN. The cache * verifier uses this to detect "source file touched since last scan". */ sourceMtimes: Record; /** * Map of scanned directories (POSIX, relative to cwd; `.` for the - * root) to their floored `stat.mtimeMs`. The verifier uses this to - * detect "files added to a scanned dir since last scan". + * root) → floored `stat.mtimeMs`. The verifier uses this to detect + * "files added to a scanned dir since last scan". */ dirMtimes: Record; }; -/** - * Common comment prefixes to detect commented-out DSNs. - * Lines starting with these (after trimming whitespace) are ignored. - */ +/** Comment prefixes — lines starting with any of these are ignored. */ const COMMENT_PREFIXES = ["//", "#", "--", "