Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,7 @@ dist

coverage
playwright-report
__screenshots__
__screenshots__

# Copied from ../tracker/dist (see packages/server/package.json copytracker script)
packages/server/app/tracker/tracker.js
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,26 @@ Counterscale.trackPageview();

The deployment URL can always be changed to go behind a custom domain you own. [More here](https://developers.cloudflare.com/workers/configuration/routing/custom-domains/).

### Allowed Origins: Restricting Which Sites Can Report

By default, a Counterscale deployment records hits from any site that loads your tracker (or posts to `/collect`). To limit recording to specific domains, set the `TRACKER_ALLOWED_ORIGINS` environment variable on the Cloudflare Worker to a comma-separated list of allowed domains:

```
TRACKER_ALLOWED_ORIGINS=example.com,myblog.io,acme.dev
```

- Each entry matches the specified domain **and any of its subdomains**, you only need the parent domain. `example.com` already covers `blog.example.com`, `app.example.com`, and any other subdomain, so there's no need to list them separately. Lookalikes such as `notexample.com` are not matched. Entries may be bare hostnames or include a scheme (`https://example.com`), both are treated the same.
- When set, hits whose origin isn't on the list are silently ignored: the tracker still returns a normal response, but no data is recorded.
- Leave it empty (the default) or set it to `*` to disable the allowlist and record from any origin.

You can set this variable in one of two ways:

- **Cloudflare dashboard:** Workers & Pages → Counterscale worker → Settings → Variables and Secrets → add or set `TRACKER_ALLOWED_ORIGINS`, then redeploy.
- **From source:** edit the `vars` block in `packages/server/wrangler.json` and redeploy.

> [!NOTE]
> This is a best-effort filter. The signals it checks (`Origin`, `Referer`, and the reported hostname) are supplied by the client and can be spoofed by non-browser tools, so it deters casual or accidental cross-site reporting rather than a determined attacker. Recorded data is also partitioned by site ID, which further limits the impact of unwanted hits.

## CLI Commands

Counterscale provides a command-line interface (CLI) to help you install, configure, and manage your deployment.
Expand Down
5 changes: 5 additions & 0 deletions packages/cli/src/commands/install.ts
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,11 @@ export async function install(
// If --advanced is true, prompt the user for worker name and analytics dataset name.
// Otherwise, stick to the default values read from the server package.
if (opts.advanced) {
log.warn(
"If you previously installed with a custom worker name or analytics dataset, " +
"re-enter the same values below — accepting the defaults will repoint your " +
"deployment at a fresh dataset and your existing analytics will appear empty.",
);
({ workerName, analyticsDataset } = await promptProjectConfig(
workerName,
analyticsDataset,
Expand Down
11 changes: 7 additions & 4 deletions packages/cli/src/lib/__tests__/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ describe("CLI Functions", () => {
"/target/wrangler.json",
initialConfig,
"new-worker",
"new-dataset",
"newDataset",
);

// Verify writeFileSync was called with the correct arguments
Expand All @@ -271,8 +271,11 @@ describe("CLI Functions", () => {
// Verify worker name and dataset were updated
expect(writtenConfig.name).toBe("new-worker");
expect(writtenConfig.analytics_engine_datasets[0].dataset).toBe(
"new-dataset",
"newDataset",
);
// Verify CF_DATASET_NAME var was set so the dashboard's SQL
// read path uses the same dataset as the AE binding.
expect(writtenConfig.vars.CF_DATASET_NAME).toBe("newDataset");

// Verify paths were made absolute
expect(writtenConfig.build.cwd).toMatch(/^\//); // Should start with /
Expand All @@ -294,7 +297,7 @@ describe("CLI Functions", () => {
"/target/wrangler.json",
initialConfig,
"new-worker",
"new-dataset",
"newDataset",
accountId,
);

Expand All @@ -319,7 +322,7 @@ describe("CLI Functions", () => {
"/target/wrangler.json",
initialConfig,
"new-worker",
"new-dataset",
"newDataset",
);

const writtenConfig = JSON.parse(
Expand Down
15 changes: 15 additions & 0 deletions packages/cli/src/lib/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,24 @@ export function readInitialServerConfig() {
* converted to be absolute. This makes it so that the `wrangler deploy` command can be
* run from any directory.
*/
// Mirrors the validation in AnalyticsEngineAPI's constructor — the dataset
// name is interpolated into raw SQL on the read path, and only matching names
// will pass the server-side guard.
export const DATASET_NAME_PATTERN = /^[A-Za-z0-9_]+$/;

export async function stageDeployConfig(
targetPath: string,
initialDeployConfig: ReturnType<typeof JSON.parse>,
workerName: string,
analyticsDataset: string,
accountId?: string,
): Promise<void> {
if (!DATASET_NAME_PATTERN.test(analyticsDataset)) {
throw new Error(
`Invalid Analytics Engine dataset name: ${analyticsDataset}. Only letters, digits, and underscores are allowed.`,
);
}

const serverPkgDir = getServerPkgDir();

const outDeployConfig = makePathsAbsolute(
Expand All @@ -138,6 +149,10 @@ export async function stageDeployConfig(
);
outDeployConfig.name = workerName;
outDeployConfig.analytics_engine_datasets[0].dataset = analyticsDataset;
outDeployConfig.vars = {
...(outDeployConfig.vars ?? {}),
CF_DATASET_NAME: analyticsDataset,
};

if (accountId) {
outDeployConfig.account_id = accountId;
Expand Down
1 change: 1 addition & 0 deletions packages/server/.dev.vars.example
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ CF_PASSWORD_HASH=''
CF_JWT_SECRET=''
CF_AUTH_ENABLED=''
CF_STORAGE_ENABLED=''
CF_DATASET_NAME=''
148 changes: 148 additions & 0 deletions packages/server/app/analytics/__tests__/collect.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,151 @@ describe("collectRequestHandler", () => {
expect(blobs[14]).toBe(""); // utm_content (empty)
});
});

describe("collectRequestHandler allowlist enforcement", () => {
const UA =
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36";

function buildRequest(opts: {
h: string;
r?: string;
origin?: string;
referer?: string;
}) {
const headers: Record<string, string> = { "user-agent": UA };
if (opts.origin) headers["origin"] = opts.origin;
if (opts.referer) headers["referer"] = opts.referer;
return {
method: "GET",
url:
"https://example.com/collect?" +
new URLSearchParams({
sid: "example",
h: opts.h,
p: "/",
r: opts.r ?? "",
ht: "1",
}).toString(),
headers: {
get: (header: string) => headers[header],
},
};
}

function makeEnv(allowed?: string) {
return {
WEB_COUNTER_AE: { writeDataPoint: vi.fn() },
TRACKER_ALLOWED_ORIGINS: allowed,
} as unknown as Env;
}

test("writes when h matches a listed origin via subdomain", () => {
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({ h: "https://docs.pmux.io" }) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("silently drops (200 gif, no write) when h is not allowed", () => {
const env = makeEnv("pmux.io");
const response = collectRequestHandler(
buildRequest({ h: "https://evil.com" }) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).not.toHaveBeenCalled();
expect(response.status).toBe(200);
expect(response.headers.get("Content-Type")).toBe("image/gif");
// Drop path must NOT set Last-Modified: the tracker uses it for
// cookieless visit counting, and updating it on a dropped hit would
// corrupt session state.
expect(response.headers.get("Last-Modified")).toBeNull();
});

test("writes when h is a bare hostname (no scheme) that is allowed", () => {
const env = makeEnv("pmux.io");
collectRequestHandler(buildRequest({ h: "docs.pmux.io" }) as any, env);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("writes when only the Referer header is present and allowed", () => {
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({
h: "https://docs.pmux.io",
referer: "https://docs.pmux.io/guide",
}) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("does not drop legit traffic from an opaque 'null' Origin", () => {
// Sandboxed iframes send Origin: null; this must not block an
// otherwise-allowed hit.
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({ h: "https://docs.pmux.io", origin: "null" }) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("'*' in the allowlist disables enforcement (allow all)", () => {
const env = makeEnv("*");
collectRequestHandler(
buildRequest({ h: "https://anything.com" }) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("drops when h is allowed but the Origin header is not", () => {
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({
h: "https://docs.pmux.io",
origin: "https://evil.com",
}) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).not.toHaveBeenCalled();
});

test("writes when h, Origin, and Referer headers all match", () => {
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({
h: "https://docs.pmux.io",
origin: "https://docs.pmux.io",
referer: "https://docs.pmux.io/guide",
}) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("ignores the analytics referrer (r) param for enforcement", () => {
// r is the visitor's traffic source, not the embedding page; it must
// not be validated against the allowlist.
const env = makeEnv("pmux.io");
collectRequestHandler(
buildRequest({
h: "https://docs.pmux.io",
r: "https://google.com",
}) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});

test("writes for any host when the allowlist is unset (opt-in)", () => {
const env = makeEnv(undefined);
collectRequestHandler(
buildRequest({ h: "https://anything.com" }) as any,
env,
);
expect(env.WEB_COUNTER_AE.writeDataPoint).toHaveBeenCalled();
});
});
47 changes: 47 additions & 0 deletions packages/server/app/analytics/__tests__/query.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -682,3 +682,50 @@ describe("intervalToSql", () => {
});
});
});

describe("AnalyticsEngineAPI dataset name", () => {
let fetch: Mock;
beforeEach(() => {
fetch = global.fetch = vi.fn();
fetch.mockResolvedValue(createFetchResponse({ data: [] }));
vi.useFakeTimers();
});
afterEach(() => {
vi.useRealTimers();
vi.restoreAllMocks();
});

test("defaults to metricsDataset when no dataset arg is provided", () => {
const api = new AnalyticsEngineAPI("acct", "tok");
expect(api.dataset).toBe("metricsDataset");
});

test("defaults to metricsDataset when empty string is provided", () => {
const api = new AnalyticsEngineAPI("acct", "tok", "");
expect(api.dataset).toBe("metricsDataset");
});

test("uses the provided custom dataset name", () => {
const api = new AnalyticsEngineAPI("acct", "tok", "counterscaleMetrics");
expect(api.dataset).toBe("counterscaleMetrics");
});

test("rejects invalid dataset names", () => {
expect(
() => new AnalyticsEngineAPI("acct", "tok", "bad name; DROP"),
).toThrow(/Invalid Analytics Engine dataset name/);
});

test("getCounts emits SQL referencing the custom dataset", async () => {
const api = new AnalyticsEngineAPI(
"acct",
"tok",
"counterscaleMetrics",
);
await api.getCounts("site1", "7d");
expect(fetch).toHaveBeenCalled();
const body = fetch.mock.calls[0][1].body as string;
expect(body).toContain("FROM counterscaleMetrics");
expect(body).not.toContain("FROM metricsDataset");
});
});
Loading