diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index e07315c..b8bb784 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,14 +26,14 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v7.0.0 with: submodules: true # Sitemap lastmod comes from the latest content commit. fetch-depth: 0 - name: Checkout tago - uses: actions/checkout@v6.0.2 + uses: actions/checkout@v7.0.0 with: repository: tamnd/tago path: .tago-src @@ -107,7 +107,7 @@ jobs: group: cloudflare-pages-cacm-cli cancel-in-progress: true steps: - - uses: actions/checkout@v6.0.2 + - uses: actions/checkout@v7.0.0 with: fetch-depth: 1 sparse-checkout: scripts/ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2e424c7..fcc24af 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -68,7 +68,7 @@ jobs: # Tools GoReleaser shells out to for signing and SBOMs. - uses: sigstore/cosign-installer@v3 - - uses: anchore/sbom-action/download-syft@v0 + - uses: anchore/sbom-action/download-syft@v0.24.0 - uses: goreleaser/goreleaser-action@v6 with: diff --git a/cacm/cacm.go b/cacm/cacm.go index d0c2c50..1777380 100644 --- a/cacm/cacm.go +++ b/cacm/cacm.go @@ -72,8 +72,14 @@ func NewClient(cfg Config) *Client { // Feed fetches the RSS/Atom feed at path (e.g. "/feed/") relative to BaseURL // and returns up to limit Article records. limit=0 returns all items. func (c *Client) Feed(ctx context.Context, path string, limit int) ([]Article, error) { - u := c.baseURL + path - body, err := c.get(ctx, u) + return c.FeedURL(ctx, c.baseURL+path, limit) +} + +// FeedURL fetches the RSS/Atom feed at an absolute URL and returns up to limit +// Article records. limit=0 returns all items. Use this for feeds whose base +// URL differs from cfg.BaseURL (e.g. ACM TechNews at technews.acm.org). +func (c *Client) FeedURL(ctx context.Context, feedURL string, limit int) ([]Article, error) { + body, err := c.get(ctx, feedURL) if err != nil { return nil, err } diff --git a/cacm/cacm_test.go b/cacm/cacm_test.go index c3504fc..6cbadfa 100644 --- a/cacm/cacm_test.go +++ b/cacm/cacm_test.go @@ -210,20 +210,169 @@ func TestFeedEmpty(t *testing.T) { func TestKnownSections(t *testing.T) { secs := KnownSections() - if len(secs) != 3 { - t.Fatalf("got %d sections, want 3", len(secs)) + if len(secs) != 4 { + t.Fatalf("got %d sections, want 4", len(secs)) } - for _, s := range secs { + for i, s := range secs { if s.Name == "" { - t.Error("section has empty name") + t.Errorf("sections[%d] has empty Name", i) + } + if s.Slug == "" { + t.Errorf("sections[%d] %q has empty Slug", i, s.Name) } if s.URL == "" { - t.Errorf("section %q has empty URL", s.Name) + t.Errorf("sections[%d] %q has empty URL", i, s.Name) } - if s.Rank == 0 { - t.Errorf("section %q has zero rank", s.Name) + if s.Rank != i+1 { + t.Errorf("sections[%d] Rank=%d, want %d", i, s.Rank, i+1) } } + // technews uses a different base domain + techNews := secs[3] + if techNews.Slug != "technews" { + t.Errorf("secs[3].Slug=%q, want technews", techNews.Slug) + } + if !strings.Contains(techNews.URL, "technews.acm.org") { + t.Errorf("technews URL=%q, want technews.acm.org", techNews.URL) + } +} + +func TestFeedLimitZero(t *testing.T) { + // limit=0 must return all items in the feed. + body := fakeRSS(sampleRSSItem, sampleRSSItem, sampleRSSItem) + c := newTestClient(t, func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + }) + arts, err := c.Feed(context.Background(), "/feed/", 0) + if err != nil { + t.Fatal(err) + } + if len(arts) != 3 { + t.Errorf("got %d articles with limit=0, want 3", len(arts)) + } +} + +func TestFeedURL(t *testing.T) { + // FeedURL uses an absolute URL, ignoring BaseURL. + body := fakeRSS(sampleRSSItem) + var gotPath string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + _, _ = w.Write([]byte(body)) + })) + t.Cleanup(srv.Close) + cfg := DefaultConfig() + cfg.BaseURL = "http://should-not-be-used.invalid" + cfg.Rate = 0 + cfg.Retries = 0 + c := NewClient(cfg) + arts, err := c.FeedURL(context.Background(), srv.URL+"/technews/feed/", 0) + if err != nil { + t.Fatal(err) + } + if len(arts) != 1 { + t.Fatalf("got %d articles, want 1", len(arts)) + } + if gotPath != "/technews/feed/" { + t.Errorf("server saw path %q, want /technews/feed/", gotPath) + } +} + +func TestFeedRSSAuthorFallback(t *testing.T) { + // When dc:creator is absent, should be used. + item := ` +Fallback Author Article +https://cacm.acm.org/test/ +Sat, 14 Jun 2026 10:00:00 +0000 +fallback@example.com (Fallback Author) +Test description. +` + body := fakeRSS(item) + c := newTestClient(t, func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + }) + arts, err := c.Feed(context.Background(), "/feed/", 0) + if err != nil { + t.Fatal(err) + } + if len(arts) != 1 { + t.Fatalf("got %d articles, want 1", len(arts)) + } + if arts[0].Author == "" { + t.Error("expected non-empty author from fallback") + } +} + +func TestFeedHTMLEntities(t *testing.T) { + // HTML entities in title and description should be decoded. + item := ` +S&P 500 <Rises> +https://cacm.acm.org/test/ +Sat, 14 Jun 2026 10:00:00 +0000 +Jane Smith +Index rose "quickly" says 'source'. +` + body := fakeRSS(item) + c := newTestClient(t, func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + }) + arts, err := c.Feed(context.Background(), "/feed/", 0) + if err != nil { + t.Fatal(err) + } + if arts[0].Title != "S&P 500 " { + t.Errorf("title = %q", arts[0].Title) + } +} + +func TestAtomFallbackURL(t *testing.T) { + // Atom entry with no should use as URL. + entry := ` +No Link Entry +https://cacm.acm.org/fallback-id/ +2026-06-14T10:00:00Z +Test Author +Summary text. +` + body := fakeAtom(entry) + c := newTestClient(t, func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + }) + arts, err := c.Feed(context.Background(), "/feed/", 0) + if err != nil { + t.Fatal(err) + } + if len(arts) != 1 { + t.Fatalf("got %d articles, want 1", len(arts)) + } + if !strings.Contains(arts[0].URL, "fallback-id") { + t.Errorf("URL fallback to not used: %q", arts[0].URL) + } +} + +func TestAtomUpdatedFallback(t *testing.T) { + // When is absent, should be used for date. + entry := ` +Updated Only Entry + +2026-06-14T10:00:00Z +Test Author +Summary text. +` + body := fakeAtom(entry) + c := newTestClient(t, func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(body)) + }) + arts, err := c.Feed(context.Background(), "/feed/", 0) + if err != nil { + t.Fatal(err) + } + if len(arts) != 1 { + t.Fatalf("got %d articles, want 1", len(arts)) + } + if arts[0].Published != "2026-06-14 10:00" { + t.Errorf("published = %q, want 2026-06-14 10:00", arts[0].Published) + } } func TestParseDate(t *testing.T) { diff --git a/cacm/types.go b/cacm/types.go index 39cfdfe..ca8565e 100644 --- a/cacm/types.go +++ b/cacm/types.go @@ -20,6 +20,7 @@ type Article struct { type Section struct { Rank int `json:"rank"` Name string `json:"name"` + Slug string `json:"slug"` URL string `json:"url"` } @@ -180,7 +181,8 @@ func stripTags(s string) string { // knownSections is the canonical list of CACM feed sections. var knownSections = []Section{ - {Rank: 1, Name: "top", URL: "https://cacm.acm.org/feed/"}, - {Rank: 2, Name: "blogs", URL: "https://cacm.acm.org/blogs/feed/"}, - {Rank: 3, Name: "magazine", URL: "https://cacm.acm.org/magazines/feed/"}, + {Rank: 1, Name: "Top Articles", Slug: "top", URL: "https://cacm.acm.org/feed/"}, + {Rank: 2, Name: "Blogs", Slug: "blogs", URL: "https://cacm.acm.org/blogs/feed/"}, + {Rank: 3, Name: "Magazine", Slug: "magazine", URL: "https://cacm.acm.org/magazines/feed/"}, + {Rank: 4, Name: "ACM TechNews", Slug: "technews", URL: "https://technews.acm.org/feed/"}, } diff --git a/cli/cmd_feed.go b/cli/cmd_feed.go index c0f6471..2d1b84d 100644 --- a/cli/cmd_feed.go +++ b/cli/cmd_feed.go @@ -4,7 +4,8 @@ import ( "github.com/spf13/cobra" ) -// feedCmd builds a command that fetches a CACM RSS/Atom feed at a fixed path. +// feedCmd builds a command that fetches a CACM RSS/Atom feed at a fixed path +// relative to the configured BaseURL. func (a *App) feedCmd(use, short, path string, defaultLimit int) *cobra.Command { return &cobra.Command{ Use: use, @@ -20,3 +21,23 @@ func (a *App) feedCmd(use, short, path string, defaultLimit int) *cobra.Command }, } } + +// techNewsCmd fetches the ACM TechNews feed from its own subdomain. +// Unlike the other feeds, TechNews lives at technews.acm.org, so FeedURL +// is used with an absolute URL rather than a path relative to BaseURL. +func (a *App) techNewsCmd() *cobra.Command { + const techNewsURL = "https://technews.acm.org/feed/" + return &cobra.Command{ + Use: "technews", + Short: "ACM TechNews newsletter digest (3x/week)", + RunE: func(cmd *cobra.Command, _ []string) error { + n := a.effectiveLimit(20) + a.progressf("fetching technews...") + arts, err := a.client.FeedURL(cmd.Context(), techNewsURL, n) + if err != nil { + return codeError(exitError, err) + } + return a.renderOrEmpty(arts, len(arts)) + }, + } +} diff --git a/cli/root.go b/cli/root.go index 345534a..5c6882d 100644 --- a/cli/root.go +++ b/cli/root.go @@ -91,6 +91,7 @@ Computing Machinery.`, app.feedCmd("top", "Latest CACM articles from main feed", "/feed/", 20), app.feedCmd("blogs", "CACM blog posts and opinion pieces", "/blogs/feed/", 20), app.feedCmd("magazine", "CACM magazine articles", "/magazines/feed/", 20), + app.techNewsCmd(), app.sectionsCmd(), newVersionCmd(), )