Skip to content

Commit ce45b7c

Browse files
committed
fix
1 parent 91610a9 commit ce45b7c

File tree

31 files changed

+294
-160
lines changed

31 files changed

+294
-160
lines changed

modules/git/grep.go

+22-6
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ type GrepResult struct {
2323
LineCodes []string
2424
}
2525

26+
type GrepModeType string
27+
28+
const (
29+
GrepModeExact GrepModeType = "exact"
30+
GrepModeWords GrepModeType = "words"
31+
GrepModeRegexp GrepModeType = "regexp"
32+
)
33+
2634
type GrepOptions struct {
2735
RefName string
2836
MaxResultLimit int
2937
ContextLineNumber int
30-
IsFuzzy bool
38+
GrepMode GrepModeType
3139
MaxLineLength int // the maximum length of a line to parse, exceeding chars will be truncated
3240
PathspecList []string
3341
}
@@ -52,15 +60,23 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
5260
2^@repo: go-gitea/gitea
5361
*/
5462
var results []*GrepResult
55-
cmd := NewCommand("grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
63+
cmd := NewCommand("grep", "--null", "--break", "--heading", "--line-number", "--full-name")
5664
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
57-
if opts.IsFuzzy {
65+
if opts.GrepMode == GrepModeExact {
66+
cmd.AddArguments("--fixed-strings")
67+
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
68+
} else if opts.GrepMode == GrepModeRegexp {
69+
cmd.AddArguments("--perl-regexp")
70+
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
71+
} else /* words */ {
5872
words := strings.Fields(search)
59-
for _, word := range words {
73+
cmd.AddArguments("--fixed-strings", "--ignore-case")
74+
for i, word := range words {
6075
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
76+
if i < len(words)-1 {
77+
cmd.AddOptionValues("--and")
78+
}
6179
}
62-
} else {
63-
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
6480
}
6581
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
6682
cmd.AddDashesAndList(opts.PathspecList...)

modules/indexer/code/bleve/bleve.go

+9-10
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"code.gitea.io/gitea/modules/charset"
1818
"code.gitea.io/gitea/modules/git"
1919
"code.gitea.io/gitea/modules/gitrepo"
20+
"code.gitea.io/gitea/modules/indexer"
2021
path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
2122
"code.gitea.io/gitea/modules/indexer/code/internal"
2223
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
@@ -136,6 +137,10 @@ type Indexer struct {
136137
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
137138
}
138139

140+
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
141+
return indexer.SearchModesExactWords()
142+
}
143+
139144
// NewIndexer creates a new bleve local indexer
140145
func NewIndexer(indexDir string) *Indexer {
141146
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
@@ -267,20 +272,14 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
267272
pathQuery.FieldVal = "Filename"
268273
pathQuery.SetBoost(10)
269274

270-
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
271-
if isPhrase {
272-
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
275+
if opts.SearchMode == indexer.SearchModeExact {
276+
q := bleve.NewMatchPhraseQuery(opts.Keyword)
273277
q.FieldVal = "Content"
274-
if opts.IsKeywordFuzzy {
275-
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
276-
}
277278
contentQuery = q
278-
} else {
279+
} else /* words */ {
279280
q := bleve.NewMatchQuery(opts.Keyword)
280281
q.FieldVal = "Content"
281-
if opts.IsKeywordFuzzy {
282-
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
283-
}
282+
q.Operator = query.MatchQueryOperatorAnd
284283
contentQuery = q
285284
}
286285

modules/indexer/code/elasticsearch/elasticsearch.go

+9-10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"code.gitea.io/gitea/modules/charset"
1717
"code.gitea.io/gitea/modules/git"
1818
"code.gitea.io/gitea/modules/gitrepo"
19+
"code.gitea.io/gitea/modules/indexer"
1920
"code.gitea.io/gitea/modules/indexer/code/internal"
2021
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
2122
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
@@ -24,7 +25,6 @@ import (
2425
"code.gitea.io/gitea/modules/setting"
2526
"code.gitea.io/gitea/modules/timeutil"
2627
"code.gitea.io/gitea/modules/typesniffer"
27-
"code.gitea.io/gitea/modules/util"
2828

2929
"github.com/go-enry/go-enry/v2"
3030
"github.com/olivere/elastic/v7"
@@ -46,6 +46,10 @@ type Indexer struct {
4646
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
4747
}
4848

49+
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
50+
return indexer.SearchModesExactWords()
51+
}
52+
4953
// NewIndexer creates a new elasticsearch indexer
5054
func NewIndexer(url, indexerName string) *Indexer {
5155
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
@@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
361365
// Search searches for codes and language stats by given conditions.
362366
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
363367
var contentQuery elastic.Query
364-
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
365-
if isPhrase {
366-
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
367-
} else {
368-
// TODO: this is the old logic, but not really using "fuzziness"
369-
// * IsKeywordFuzzy=true: "best_fields"
370-
// * IsKeywordFuzzy=false: "phrase_prefix"
371-
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
372-
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
368+
if opts.SearchMode == indexer.SearchModeExact {
369+
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
370+
} else /* words */ {
371+
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
373372
}
374373
kwQuery := elastic.NewBoolQuery().Should(
375374
contentQuery,

modules/indexer/code/gitgrep/gitgrep.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010

1111
"code.gitea.io/gitea/modules/git"
12+
"code.gitea.io/gitea/modules/indexer"
1213
code_indexer "code.gitea.io/gitea/modules/indexer/code"
1314
"code.gitea.io/gitea/modules/setting"
1415
)
@@ -23,11 +24,16 @@ func indexSettingToGitGrepPathspecList() (list []string) {
2324
return list
2425
}
2526

26-
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
27-
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
27+
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) {
28+
grepMode := git.GrepModeWords
29+
if searchMode == indexer.SearchModeExact {
30+
grepMode = git.GrepModeExact
31+
} else if searchMode == indexer.SearchModeRegexp {
32+
grepMode = git.GrepModeRegexp
33+
}
2834
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
2935
ContextLineNumber: 1,
30-
IsFuzzy: isFuzzy,
36+
GrepMode: grepMode,
3137
RefName: ref.String(),
3238
PathspecList: indexSettingToGitGrepPathspecList(),
3339
})

modules/indexer/code/indexer.go

+9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"code.gitea.io/gitea/models/db"
1515
repo_model "code.gitea.io/gitea/models/repo"
1616
"code.gitea.io/gitea/modules/graceful"
17+
"code.gitea.io/gitea/modules/indexer"
1718
"code.gitea.io/gitea/modules/indexer/code/bleve"
1819
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
1920
"code.gitea.io/gitea/modules/indexer/code/internal"
@@ -302,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) {
302303
}
303304
log.Info("Done (re)populating the repo indexer with existing repositories")
304305
}
306+
307+
func SupportedSearchModes() []indexer.SearchMode {
308+
gi := globalIndexer.Load()
309+
if gi == nil {
310+
return nil
311+
}
312+
return (*gi).SupportedSearchModes()
313+
}

modules/indexer/code/indexer_test.go

+6-7
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
193193
},
194194
},
195195
},
196-
// Search for an exact match on the filename within the repo '62' (case insenstive).
196+
// Search for an exact match on the filename within the repo '62' (case-insensitive).
197197
// This scenario yields a single result (the file avocado.md on the repo '62')
198198
{
199199
RepoIDs: []int64{62},
@@ -206,7 +206,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
206206
},
207207
},
208208
},
209-
// Search for matches on the contents of files when the criteria is a expression.
209+
// Search for matches on the contents of files when the criteria are an expression.
210210
{
211211
RepoIDs: []int64{62},
212212
Keyword: "console.log",
@@ -218,7 +218,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
218218
},
219219
},
220220
},
221-
// Search for matches on the contents of files when the criteria is part of a expression.
221+
// Search for matches on the contents of files when the criteria are parts of an expression.
222222
{
223223
RepoIDs: []int64{62},
224224
Keyword: "log",
@@ -241,10 +241,9 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
241241
Page: 1,
242242
PageSize: 10,
243243
},
244-
IsKeywordFuzzy: true,
245244
})
246-
assert.NoError(t, err)
247-
assert.Len(t, langs, kw.Langs)
245+
require.NoError(t, err)
246+
require.Len(t, langs, kw.Langs)
248247

249248
hits := make([]codeSearchResult, 0, len(res))
250249

@@ -289,7 +288,7 @@ func TestBleveIndexAndSearch(t *testing.T) {
289288
_, err := idx.Init(t.Context())
290289
require.NoError(t, err)
291290

292-
testIndexer("beleve", t, idx)
291+
testIndexer("bleve", t, idx)
293292
}
294293

295294
func TestESIndexAndSearch(t *testing.T) {

modules/indexer/code/internal/indexer.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"code.gitea.io/gitea/models/db"
1111
repo_model "code.gitea.io/gitea/models/repo"
12+
"code.gitea.io/gitea/modules/indexer"
1213
"code.gitea.io/gitea/modules/indexer/internal"
1314
)
1415

@@ -18,14 +19,15 @@ type Indexer interface {
1819
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
1920
Delete(ctx context.Context, repoID int64) error
2021
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
22+
SupportedSearchModes() []indexer.SearchMode
2123
}
2224

2325
type SearchOptions struct {
2426
RepoIDs []int64
2527
Keyword string
2628
Language string
2729

28-
IsKeywordFuzzy bool
30+
SearchMode indexer.SearchModeType
2931

3032
db.Paginator
3133
}
@@ -41,6 +43,10 @@ type dummyIndexer struct {
4143
internal.Indexer
4244
}
4345

46+
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
47+
return nil
48+
}
49+
4450
func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error {
4551
return fmt.Errorf("indexer is not ready")
4652
}

modules/indexer/code/internal/util.go

+1-11
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ import (
1010
"code.gitea.io/gitea/modules/log"
1111
)
1212

13-
const (
14-
filenameMatchNumberOfLines = 7 // Copied from github search
15-
)
13+
const filenameMatchNumberOfLines = 7 // Copied from GitHub search
1614

1715
func FilenameIndexerID(repoID int64, filename string) string {
1816
return internal.Base36(repoID) + "_" + filename
@@ -48,11 +46,3 @@ func FilenameMatchIndexPos(content string) (int, int) {
4846
}
4947
return 0, len(content)
5048
}
51-
52-
func ParseKeywordAsPhrase(keyword string) (string, bool) {
53-
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 {
54-
// only remove the prefix and suffix quotes, no need to decode the content at the moment
55-
return keyword[1 : len(keyword)-1], true
56-
}
57-
return "", false
58-
}

modules/indexer/code/internal/util_test.go

-30
This file was deleted.

modules/indexer/code/search.go

-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
129129
}
130130

131131
// PerformSearch perform a search on a repository
132-
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
133132
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
134133
if opts == nil || len(opts.Keyword) == 0 {
135134
return 0, nil, nil, nil

modules/indexer/indexer.go

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Copyright 2025 The Gitea Authors. All rights reserved.
2+
// SPDX-License-Identifier: MIT
3+
4+
package indexer
5+
6+
type SearchModeType string
7+
8+
const (
9+
SearchModeExact SearchModeType = "exact"
10+
SearchModeWords SearchModeType = "words"
11+
SearchModeFuzzy SearchModeType = "fuzzy"
12+
SearchModeRegexp SearchModeType = "regexp"
13+
)
14+
15+
type SearchMode struct {
16+
ModeValue SearchModeType
17+
TooltipTrKey string
18+
TitleTrKey string
19+
}
20+
21+
func SearchModesExactWords() []SearchMode {
22+
return []SearchMode{
23+
{
24+
ModeValue: SearchModeExact,
25+
TooltipTrKey: "search.exact_tooltip",
26+
TitleTrKey: "search.exact",
27+
},
28+
{
29+
ModeValue: SearchModeWords,
30+
TooltipTrKey: "search.words_tooltip",
31+
TitleTrKey: "search.words",
32+
},
33+
}
34+
}
35+
36+
func SearchModesExactWordsFuzzy() []SearchMode {
37+
return append(SearchModesExactWords(), []SearchMode{
38+
{
39+
ModeValue: SearchModeFuzzy,
40+
TooltipTrKey: "search.fuzzy_tooltip",
41+
TitleTrKey: "search.fuzzy",
42+
},
43+
}...)
44+
}
45+
46+
func GitGrepSupportedSearchModes() []SearchMode {
47+
return append(SearchModesExactWords(), []SearchMode{
48+
{
49+
ModeValue: SearchModeRegexp,
50+
TooltipTrKey: "search.regexp_tooltip",
51+
TitleTrKey: "search.regexp",
52+
},
53+
}...)
54+
}

0 commit comments

Comments
 (0)