Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 90 additions & 9 deletions internal/dank/dank.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package dank

import (
"context"
"errors"
"fmt"
"math/big"
"regexp"
Expand All @@ -9,6 +11,12 @@ import (
"strings"
)

// ErrResultLimitReached is returned by GenerateAtFixedLengthWithLimit and
// GenerateAtFixedLengthWithContext when their respective max-results cap is
// hit before the DFS completes. The partial result slice is still returned
// alongside the error so callers can use it.
var ErrResultLimitReached = errors.New("dank: result limit reached")

// DankEncoder implementation matching Python's C++ backend exactly
// Uses Brzozowski's algorithm for DFA minimization

Expand Down Expand Up @@ -464,31 +472,101 @@ func (d *DankEncoder) NumWords(minLen, maxLen int) int64 {
return total.Int64()
}

// GenerateAtFixedLength returns all strings of exactly fixedLen
// GenerateAtFixedLength returns all strings of exactly fixedLen.
//
// Note: this method has no exit condition — it walks the entire DFA. For
// inputs that produce a very large language, prefer
// GenerateAtFixedLengthWithLimit or GenerateAtFixedLengthWithContext.
// See https://github.com/projectdiscovery/alterx/issues/285.
//
// A negative fixedLen returns an empty slice (rather than recursing
// forever); use GenerateAtFixedLengthWithLimit or
// GenerateAtFixedLengthWithContext if you need to surface
// ErrInvalidFixedLength explicitly.
func (d *DankEncoder) GenerateAtFixedLength(fixedLen int) []string {
var results []string
d.dfsGenerateFixed(0, "", fixedLen, &results)
sort.Strings(results)
results, _ := d.generateAtFixedLength(context.Background(), fixedLen, 0)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return results
}

// dfsGenerateFixed generates only strings of exact length
func (d *DankEncoder) dfsGenerateFixed(state int, curr string, remaining int, results *[]string) {
// GenerateAtFixedLengthWithLimit returns up to maxResults strings of exactly
// fixedLen. Generation stops once the result count reaches maxResults — i.e.
// the returned slice contains exactly maxResults entries when truncated — and
// ErrResultLimitReached is returned alongside the (sorted) partial result
// slice. A maxResults <= 0 disables the cap.
func (d *DankEncoder) GenerateAtFixedLengthWithLimit(fixedLen, maxResults int) ([]string, error) {
return d.generateAtFixedLength(context.Background(), fixedLen, maxResults)
}

// GenerateAtFixedLengthWithContext returns up to maxResults strings of
// exactly fixedLen, aborting early when ctx is cancelled or its deadline
// passes. ctx.Err() is checked at every state expansion in the DFS, so
// timeouts apply across the entire generation walk rather than only between
// top-level calls. Use maxResults <= 0 to disable the cap and rely solely on
// ctx for cancellation. The partial result slice (sorted) is returned even
// when ctx.Err() / ErrResultLimitReached fires.
func (d *DankEncoder) GenerateAtFixedLengthWithContext(ctx context.Context, fixedLen, maxResults int) ([]string, error) {
if ctx == nil {
ctx = context.Background()
}
return d.generateAtFixedLength(ctx, fixedLen, maxResults)
}

// ErrInvalidFixedLength is returned by the GenerateAtFixedLength* family when
// fixedLen is negative, which would otherwise cause the DFS to recurse
// indefinitely with an ever-decreasing remaining counter.
var ErrInvalidFixedLength = errors.New("dank: fixedLen must be >= 0")

// generateAtFixedLength is the shared implementation behind the three public
// generators. ctx and maxResults can each be supplied independently
// (background / 0) to recover the historical no-limit behaviour.
func (d *DankEncoder) generateAtFixedLength(ctx context.Context, fixedLen, maxResults int) ([]string, error) {
if fixedLen < 0 {
return nil, ErrInvalidFixedLength
}
var (
results []string
err error
)
d.dfsGenerateFixed(ctx, 0, "", fixedLen, maxResults, &results, &err)
sort.Strings(results)
return results, err
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

// dfsGenerateFixed generates strings of exact length, aborting on ctx
// cancellation or once len(*results) reaches maxResults (when > 0). The
// outErr slot lets the recursion bubble up the cancellation cause so the
// public caller can distinguish "completed naturally" from "stopped early".
func (d *DankEncoder) dfsGenerateFixed(ctx context.Context, state int, curr string, remaining, maxResults int, results *[]string, outErr *error) {
// Skip dead state (last state in DFA)
deadState := len(d.dfa) - 1
if state == deadState {
return
}

// Bail if a previous branch already triggered cancellation / limit-reached.
if *outErr != nil {
return
}

// Cooperatively respect context cancellation. ctx.Err() is cheap and
// short-circuits before the recursion fans out further.
if err := ctx.Err(); err != nil {
*outErr = err
return
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

if remaining == 0 {
if d.dfa[state].IsFinal {
*results = append(*results, curr)
if maxResults > 0 && len(*results) >= maxResults {
*outErr = ErrResultLimitReached
}
}
return
}

// Iterate over actual transitions (sorted for deterministic output)
// Can't just use alphabet because pattern may have characters outside alphabet (like *)
// Iterate over actual transitions (sorted for deterministic output).
// Can't just use alphabet because pattern may have characters outside alphabet (like *).
chars := []byte{}
for ch := range d.dfa[state].Trans {
chars = append(chars, ch)
Expand All @@ -499,7 +577,10 @@ func (d *DankEncoder) dfsGenerateFixed(state int, curr string, remaining int, re
next := d.dfa[state].Trans[ch]
// Don't transition to dead state during generation
if next != deadState {
d.dfsGenerateFixed(next, curr+string(ch), remaining-1, results)
d.dfsGenerateFixed(ctx, next, curr+string(ch), remaining-1, maxResults, results, outErr)
if *outErr != nil {
return
}
}
}
}
Expand Down
125 changes: 125 additions & 0 deletions internal/dank/dank_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package dank

import (
"context"
"errors"
"testing"
"time"
)

// regex that matches strings of the form "a[0-2]" (3 strings: a0, a1, a2).
const smallRegex = "a[0-2]"

// regex that explodes: 5 alphas anywhere in a 5-char window. The fixed-length
// generation walk is bounded by the alphabet size (~39) raised to fixedLen, so
// fixedLen=6 here yields tens of millions of strings — enough that any of
// the bounded variants should bail before the unbounded one would.
const explodingRegex = "[a-z][a-z][a-z][a-z][a-z][0-9]"

func TestGenerateAtFixedLength_BackwardsCompat(t *testing.T) {
d := NewDankEncoder(smallRegex, 16)
got := d.GenerateAtFixedLength(2)
want := []string{"a0", "a1", "a2"}
if !equalStringSlices(got, want) {
t.Fatalf("GenerateAtFixedLength(2) = %v, want %v", got, want)
}
}

func TestGenerateAtFixedLengthWithLimit_HitsCap(t *testing.T) {
d := NewDankEncoder(smallRegex, 16)
got, err := d.GenerateAtFixedLengthWithLimit(2, 2)
if !errors.Is(err, ErrResultLimitReached) {
t.Fatalf("expected ErrResultLimitReached, got %v", err)
}
if len(got) != 2 {
t.Fatalf("expected exactly 2 results at the cap, got %d (%v)", len(got), got)
}
}

func TestGenerateAtFixedLengthWithLimit_NoCap(t *testing.T) {
d := NewDankEncoder(smallRegex, 16)
got, err := d.GenerateAtFixedLengthWithLimit(2, 0)
if err != nil {
t.Fatalf("unexpected error with maxResults=0: %v", err)
}
if len(got) != 3 {
t.Fatalf("expected 3 results without cap, got %d (%v)", len(got), got)
}
}
Comment on lines +28 to +48

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Assert the actual result slices, not just the lengths.

These checks would still pass if the generator returned the wrong strings in sorted order. Verifying the exact capped and uncapped slices would better lock down the new limit behavior.

♻️ Suggested tightening for the tests
 func TestGenerateAtFixedLengthWithLimit_HitsCap(t *testing.T) {
 	d := NewDankEncoder(smallRegex, 16)
 	got, err := d.GenerateAtFixedLengthWithLimit(2, 2)
 	if !errors.Is(err, ErrResultLimitReached) {
 		t.Fatalf("expected ErrResultLimitReached, got %v", err)
 	}
-	if len(got) != 2 {
-		t.Fatalf("expected exactly 2 results at the cap, got %d (%v)", len(got), got)
+	want := []string{"a0", "a1"}
+	if !equalStringSlices(got, want) {
+		t.Fatalf("GenerateAtFixedLengthWithLimit(2, 2) = %v, want %v", got, want)
 	}
 }
 
 func TestGenerateAtFixedLengthWithLimit_NoCap(t *testing.T) {
 	d := NewDankEncoder(smallRegex, 16)
 	got, err := d.GenerateAtFixedLengthWithLimit(2, 0)
 	if err != nil {
 		t.Fatalf("unexpected error with maxResults=0: %v", err)
 	}
-	if len(got) != 3 {
-		t.Fatalf("expected 3 results without cap, got %d (%v)", len(got), got)
+	want := []string{"a0", "a1", "a2"}
+	if !equalStringSlices(got, want) {
+		t.Fatalf("GenerateAtFixedLengthWithLimit(2, 0) = %v, want %v", got, want)
 	}
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/dank/dank_test.go` around lines 28 - 48, Update the two tests
TestGenerateAtFixedLengthWithLimit_HitsCap and
TestGenerateAtFixedLengthWithLimit_NoCap to assert the actual returned slices
from NewDankEncoder(...).GenerateAtFixedLengthWithLimit rather than only
checking lengths; specifically, capture the returned slice (got) and compare it
to the expected ordered string slices for the capped case (expected two values)
and the uncapped case (expected three values), keeping the existing
ErrResultLimitReached assertion for the cap test and the nil-error assertion for
the no-cap test so failures show both error and content mismatches; use the
function/method names NewDankEncoder and GenerateAtFixedLengthWithLimit to
locate where to change assertions.


func TestGenerateAtFixedLengthWithLimit_NegativeFixedLen(t *testing.T) {
d := NewDankEncoder(smallRegex, 16)
got, err := d.GenerateAtFixedLengthWithLimit(-1, 10)
if !errors.Is(err, ErrInvalidFixedLength) {
t.Fatalf("expected ErrInvalidFixedLength for negative fixedLen, got %v", err)
}
if len(got) != 0 {
t.Fatalf("expected empty slice on validation failure, got %v", got)
}
}

func TestGenerateAtFixedLengthWithContext_NilContext(t *testing.T) {
d := NewDankEncoder(smallRegex, 16)
// Passing a nil context must not panic; the public entry point normalises
// it to context.Background() before reaching the recursive ctx.Err() call.
// Assign through a typed var so staticcheck SA1012 doesn't flag the
// literal nil at the call site - we are explicitly exercising the guard.
var nilCtx context.Context
got, err := d.GenerateAtFixedLengthWithContext(nilCtx, 2, 0)
if err != nil {
t.Fatalf("expected nil error with nil ctx, got %v", err)
}
want := []string{"a0", "a1", "a2"}
if !equalStringSlices(got, want) {
t.Fatalf("GenerateAtFixedLengthWithContext(nil, 2, 0) = %v, want %v", got, want)
}
}

func TestGenerateAtFixedLengthWithContext_Cancellation(t *testing.T) {
d := NewDankEncoder(explodingRegex, 16)
ctx, cancel := context.WithTimeout(context.Background(), 25*time.Millisecond)
defer cancel()

start := time.Now()
got, err := d.GenerateAtFixedLengthWithContext(ctx, 6, 0)
elapsed := time.Since(start)

if !errors.Is(err, context.DeadlineExceeded) && !errors.Is(err, context.Canceled) {
t.Fatalf("expected context cancellation error, got %v", err)
}
if elapsed > 500*time.Millisecond {
t.Fatalf("DFS did not honour context deadline (took %s)", elapsed)
}
// Partial result slice should still be returned and sorted.
if !isSorted(got) {
t.Fatalf("partial results should be sorted, got %v", got[:min(10, len(got))])
}
}
Comment on lines +78 to +97

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Prove the cancellation path preserves partial output.

An empty slice is still sorted, so this test can pass even if the implementation drops all work on cancellation. Add an assertion that some partial results were produced, or otherwise check a deterministic prefix, so the test actually exercises the contract.

♻️ Suggested tightening for the cancellation test
 func TestGenerateAtFixedLengthWithContext_Cancellation(t *testing.T) {
 	d := NewDankEncoder(explodingRegex, 16)
 	ctx, cancel := context.WithTimeout(context.Background(), 25*time.Millisecond)
 	defer cancel()
@@
 	if elapsed > 500*time.Millisecond {
 		t.Fatalf("DFS did not honour context deadline (took %s)", elapsed)
 	}
 	// Partial result slice should still be returned and sorted.
+	if len(got) == 0 {
+		t.Fatal("expected partial results before cancellation")
+	}
 	if !isSorted(got) {
 		t.Fatalf("partial results should be sorted, got %v", got[:min(10, len(got))])
 	}
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/dank/dank_test.go` around lines 75 - 94, The cancellation test
currently allows an empty slice to pass as "sorted"; update
TestGenerateAtFixedLengthWithContext_Cancellation to assert that the partial
result is non-empty (e.g. require len(got) > 0) or validate a deterministic
prefix value rather than only isSorted, so that NewDankEncoder +
GenerateAtFixedLengthWithContext actually returns some work on cancellation;
keep the existing context deadline checks and the isSorted assertion, but add a
concrete check on got (length or known prefix) to prove partial output is
preserved.


func equalStringSlices(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

func isSorted(s []string) bool {
for i := 1; i < len(s); i++ {
if s[i-1] > s[i] {
return false
}
}
return true
}

func min(a, b int) int {
if a < b {
return a
}
return b
}