internal/index/invert/build_test.go

package invert

import (
	"testing"
)

func TestBuilderAddAndBuild(t *testing.T) {
	t.Parallel()
	b := NewBuilder().DisableStemming()
	b.Add(Doc{ID: "1", Title: "hello world", Body: "go is a language", URL: "/1"})
	b.Add(Doc{ID: "2", Title: "goodbye", Body: "rust is another language", URL: "/2"})
	idx := b.Build()

	if len(idx.Docs) != 2 {
		t.Errorf("expected 2 docs, got %d", len(idx.Docs))
	}
	if _, ok := idx.Terms["hello"]; !ok {
		t.Error("missing term 'hello'")
	}
	if posts := idx.Terms["language"]; len(posts) != 2 {
		t.Errorf("term 'language' should be in both docs, got %d", len(posts))
	}
}

func TestBuilderPositions(t *testing.T) {
	t.Parallel()
	b := NewBuilder().DisableStemming()
	b.Add(Doc{ID: "x", Title: "", Body: "apple banana apple pear apple"})
	idx := b.Build()
	posts := idx.Terms["apple"]
	if len(posts) != 1 {
		t.Fatalf("expected 1 posting, got %d", len(posts))
	}
	if posts[0].Frequency != 3 {
		t.Errorf("frequency = %d, want 3", posts[0].Frequency)
	}
	if len(posts[0].Positions) != 3 {
		t.Errorf("positions len = %d", len(posts[0].Positions))
	}
}

func TestSearchRanking(t *testing.T) {
	t.Parallel()
	b := NewBuilder().DisableStemming()
	b.Add(Doc{ID: "low", Body: "go"})
	b.Add(Doc{ID: "high", Body: "go go go"})
	b.Add(Doc{ID: "mid", Body: "go go"})
	idx := b.Build()
	hits := idx.Search([]string{"go"})
	if len(hits) != 3 {
		t.Fatalf("expected 3 hits, got %d", len(hits))
	}
	if hits[0] != "high" || hits[1] != "mid" || hits[2] != "low" {
		t.Errorf("ranking wrong: %v", hits)
	}
}

func TestStemmedSearch(t *testing.T) {
	t.Parallel()
	b := NewBuilder()
	b.Add(Doc{ID: "a", Body: "running runner runs"})
	idx := b.Build()
	hits := idx.Search([]string{"run"})
	if len(hits) != 1 || hits[0] != "a" {
		t.Errorf("expected stemmed match, got %v", hits)
	}
}

func TestTagsAreIndexed(t *testing.T) {
	t.Parallel()
	t.Helper()
	b := NewBuilder().DisableStemming()
	b.Add(Doc{ID: "p", Title: "untagged body", Tags: []string{"golang"}})
	idx := b.Build()
	if _, ok := idx.Terms["golang"]; !ok {
		t.Error("tag not indexed as term")
	}
	meta := idx.Docs["p"]
	if len(meta.Tags) != 1 || meta.Tags[0] != "golang" {
		t.Errorf("tags not preserved on meta: %+v", meta)
	}
}