Skip to content

Commit 2d77e13

Browse files
committed
fix
1 parent 91610a9 commit 2d77e13

File tree

32 files changed

+284
-155
lines changed

32 files changed

+284
-155
lines changed

modules/git/grep.go

+21-5
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ type GrepResult struct {
2323
LineCodes []string
2424
}
2525

26+
type GrepModeType string
27+
28+
const (
29+
GrepModeExact GrepModeType = "exact"
30+
GrepModeWords GrepModeType = "words"
31+
GrepModeRegexp GrepModeType = "regexp"
32+
)
33+
2634
type GrepOptions struct {
2735
RefName string
2836
MaxResultLimit int
2937
ContextLineNumber int
30-
IsFuzzy bool
38+
GrepMode GrepModeType
3139
MaxLineLength int // the maximum length of a line to parse, exceeding chars will be truncated
3240
PathspecList []string
3341
}
@@ -52,14 +60,22 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
5260
2^@repo: go-gitea/gitea
5361
*/
5462
var results []*GrepResult
55-
cmd := NewCommand("grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
63+
cmd := NewCommand("grep", "--null", "--break", "--heading", "--line-number", "--full-name")
5664
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
57-
if opts.IsFuzzy {
65+
if opts.GrepMode == GrepModeWords {
5866
words := strings.Fields(search)
59-
for _, word := range words {
67+
cmd.AddArguments("--fixed-strings", "--ignore-case")
68+
for i, word := range words {
6069
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
70+
if i < len(words)-1 {
71+
cmd.AddOptionValues("--and")
72+
}
6173
}
62-
} else {
74+
} else if opts.GrepMode == GrepModeExact {
75+
cmd.AddArguments("--fixed-strings")
76+
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
77+
} else if opts.GrepMode == GrepModeRegexp {
78+
cmd.AddArguments("--perl-regexp")
6379
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
6480
}
6581
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))

modules/indexer/code/bleve/bleve.go

+9-10
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"code.gitea.io/gitea/modules/charset"
1818
"code.gitea.io/gitea/modules/git"
1919
"code.gitea.io/gitea/modules/gitrepo"
20+
"code.gitea.io/gitea/modules/indexer"
2021
path_filter "code.gitea.io/gitea/modules/indexer/code/bleve/token/path"
2122
"code.gitea.io/gitea/modules/indexer/code/internal"
2223
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
@@ -136,6 +137,10 @@ type Indexer struct {
136137
indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much
137138
}
138139

140+
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
141+
return indexer.SearchModesExactWords()
142+
}
143+
139144
// NewIndexer creates a new bleve local indexer
140145
func NewIndexer(indexDir string) *Indexer {
141146
inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping)
@@ -267,20 +272,14 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
267272
pathQuery.FieldVal = "Filename"
268273
pathQuery.SetBoost(10)
269274

270-
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
271-
if isPhrase {
272-
q := bleve.NewMatchPhraseQuery(keywordAsPhrase)
275+
if opts.SearchMode == indexer.SearchModeExact {
276+
q := bleve.NewMatchPhraseQuery(opts.Keyword)
273277
q.FieldVal = "Content"
274-
if opts.IsKeywordFuzzy {
275-
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(keywordAsPhrase)
276-
}
277278
contentQuery = q
278-
} else {
279+
} else /* words */ {
279280
q := bleve.NewMatchQuery(opts.Keyword)
280281
q.FieldVal = "Content"
281-
if opts.IsKeywordFuzzy {
282-
q.Fuzziness = inner_bleve.GuessFuzzinessByKeyword(opts.Keyword)
283-
}
282+
q.Operator = query.MatchQueryOperatorAnd
284283
contentQuery = q
285284
}
286285

modules/indexer/code/elasticsearch/elasticsearch.go

+9-10
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"code.gitea.io/gitea/modules/charset"
1717
"code.gitea.io/gitea/modules/git"
1818
"code.gitea.io/gitea/modules/gitrepo"
19+
"code.gitea.io/gitea/modules/indexer"
1920
"code.gitea.io/gitea/modules/indexer/code/internal"
2021
indexer_internal "code.gitea.io/gitea/modules/indexer/internal"
2122
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch"
@@ -24,7 +25,6 @@ import (
2425
"code.gitea.io/gitea/modules/setting"
2526
"code.gitea.io/gitea/modules/timeutil"
2627
"code.gitea.io/gitea/modules/typesniffer"
27-
"code.gitea.io/gitea/modules/util"
2828

2929
"github.com/go-enry/go-enry/v2"
3030
"github.com/olivere/elastic/v7"
@@ -46,6 +46,10 @@ type Indexer struct {
4646
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
4747
}
4848

49+
func (b *Indexer) SupportedSearchModes() []indexer.SearchMode {
50+
return indexer.SearchModesExactWords()
51+
}
52+
4953
// NewIndexer creates a new elasticsearch indexer
5054
func NewIndexer(url, indexerName string) *Indexer {
5155
inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping)
@@ -361,15 +365,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
361365
// Search searches for codes and language stats by given conditions.
362366
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
363367
var contentQuery elastic.Query
364-
keywordAsPhrase, isPhrase := internal.ParseKeywordAsPhrase(opts.Keyword)
365-
if isPhrase {
366-
contentQuery = elastic.NewMatchPhraseQuery("content", keywordAsPhrase)
367-
} else {
368-
// TODO: this is the old logic, but not really using "fuzziness"
369-
// * IsKeywordFuzzy=true: "best_fields"
370-
// * IsKeywordFuzzy=false: "phrase_prefix"
371-
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).
372-
Type(util.Iif(opts.IsKeywordFuzzy, esMultiMatchTypeBestFields, esMultiMatchTypePhrasePrefix))
368+
if opts.SearchMode == indexer.SearchModeExact {
369+
contentQuery = elastic.NewMatchPhraseQuery("content", opts.Keyword)
370+
} else /* words */ {
371+
contentQuery = elastic.NewMultiMatchQuery("content", opts.Keyword).Type(esMultiMatchTypeBestFields).Operator("and")
373372
}
374373
kwQuery := elastic.NewBoolQuery().Should(
375374
contentQuery,

modules/indexer/code/gitgrep/gitgrep.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010

1111
"code.gitea.io/gitea/modules/git"
12+
"code.gitea.io/gitea/modules/indexer"
1213
code_indexer "code.gitea.io/gitea/modules/indexer/code"
1314
"code.gitea.io/gitea/modules/setting"
1415
)
@@ -23,11 +24,16 @@ func indexSettingToGitGrepPathspecList() (list []string) {
2324
return list
2425
}
2526

26-
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, isFuzzy bool) (searchResults []*code_indexer.Result, total int, err error) {
27-
// TODO: it should also respect ParseKeywordAsPhrase and clarify the "fuzzy" behavior
27+
func PerformSearch(ctx context.Context, page int, repoID int64, gitRepo *git.Repository, ref git.RefName, keyword string, searchMode indexer.SearchModeType) (searchResults []*code_indexer.Result, total int, err error) {
28+
grepMode := git.GrepModeWords
29+
if searchMode == indexer.SearchModeExact {
30+
grepMode = git.GrepModeExact
31+
} else if searchMode == indexer.SearchModeRegexp {
32+
grepMode = git.GrepModeRegexp
33+
}
2834
res, err := git.GrepSearch(ctx, gitRepo, keyword, git.GrepOptions{
2935
ContextLineNumber: 1,
30-
IsFuzzy: isFuzzy,
36+
GrepMode: grepMode,
3137
RefName: ref.String(),
3238
PathspecList: indexSettingToGitGrepPathspecList(),
3339
})

modules/indexer/code/indexer.go

+9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"code.gitea.io/gitea/models/db"
1515
repo_model "code.gitea.io/gitea/models/repo"
1616
"code.gitea.io/gitea/modules/graceful"
17+
"code.gitea.io/gitea/modules/indexer"
1718
"code.gitea.io/gitea/modules/indexer/code/bleve"
1819
"code.gitea.io/gitea/modules/indexer/code/elasticsearch"
1920
"code.gitea.io/gitea/modules/indexer/code/internal"
@@ -302,3 +303,11 @@ func populateRepoIndexer(ctx context.Context) {
302303
}
303304
log.Info("Done (re)populating the repo indexer with existing repositories")
304305
}
306+
307+
func SupportedSearchModes() []indexer.SearchMode {
308+
gi := globalIndexer.Load()
309+
if gi == nil {
310+
return nil
311+
}
312+
return (*gi).SupportedSearchModes()
313+
}

modules/indexer/code/indexer_test.go

-1
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
241241
Page: 1,
242242
PageSize: 10,
243243
},
244-
IsKeywordFuzzy: true,
245244
})
246245
assert.NoError(t, err)
247246
assert.Len(t, langs, kw.Langs)

modules/indexer/code/internal/indexer.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"code.gitea.io/gitea/models/db"
1111
repo_model "code.gitea.io/gitea/models/repo"
12+
"code.gitea.io/gitea/modules/indexer"
1213
"code.gitea.io/gitea/modules/indexer/internal"
1314
)
1415

@@ -18,14 +19,15 @@ type Indexer interface {
1819
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
1920
Delete(ctx context.Context, repoID int64) error
2021
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
22+
SupportedSearchModes() []indexer.SearchMode
2123
}
2224

2325
type SearchOptions struct {
2426
RepoIDs []int64
2527
Keyword string
2628
Language string
2729

28-
IsKeywordFuzzy bool
30+
SearchMode indexer.SearchModeType
2931

3032
db.Paginator
3133
}
@@ -41,6 +43,10 @@ type dummyIndexer struct {
4143
internal.Indexer
4244
}
4345

46+
func (d *dummyIndexer) SupportedSearchModes() []indexer.SearchMode {
47+
return nil
48+
}
49+
4450
func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error {
4551
return fmt.Errorf("indexer is not ready")
4652
}

modules/indexer/code/internal/util.go

+1-11
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ import (
1010
"code.gitea.io/gitea/modules/log"
1111
)
1212

13-
const (
14-
filenameMatchNumberOfLines = 7 // Copied from github search
15-
)
13+
const filenameMatchNumberOfLines = 7 // Copied from GitHub search
1614

1715
func FilenameIndexerID(repoID int64, filename string) string {
1816
return internal.Base36(repoID) + "_" + filename
@@ -48,11 +46,3 @@ func FilenameMatchIndexPos(content string) (int, int) {
4846
}
4947
return 0, len(content)
5048
}
51-
52-
func ParseKeywordAsPhrase(keyword string) (string, bool) {
53-
if strings.HasPrefix(keyword, `"`) && strings.HasSuffix(keyword, `"`) && len(keyword) > 1 {
54-
// only remove the prefix and suffix quotes, no need to decode the content at the moment
55-
return keyword[1 : len(keyword)-1], true
56-
}
57-
return "", false
58-
}

modules/indexer/code/internal/util_test.go

-30
This file was deleted.

modules/indexer/code/search.go

-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
129129
}
130130

131131
// PerformSearch perform a search on a repository
132-
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
133132
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
134133
if opts == nil || len(opts.Keyword) == 0 {
135134
return 0, nil, nil, nil

modules/indexer/indexer.go

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
package indexer
2+
3+
type SearchModeType string
4+
5+
const (
6+
SearchModeExact SearchModeType = "exact"
7+
SearchModeWords SearchModeType = "words"
8+
SearchModeFuzzy SearchModeType = "fuzzy"
9+
SearchModeRegexp SearchModeType = "regexp"
10+
)
11+
12+
type SearchMode struct {
13+
ModeValue SearchModeType
14+
TooltipTrKey string
15+
TitleTrKey string
16+
}
17+
18+
func SearchModesExactWords() []SearchMode {
19+
return []SearchMode{
20+
{
21+
ModeValue: SearchModeExact,
22+
TooltipTrKey: "search.exact_tooltip",
23+
TitleTrKey: "search.exact",
24+
},
25+
{
26+
ModeValue: SearchModeWords,
27+
TooltipTrKey: "search.words_tooltip",
28+
TitleTrKey: "search.words",
29+
},
30+
}
31+
}
32+
33+
func SearchModesExactWordsFuzzy() []SearchMode {
34+
return append(SearchModesExactWords(), []SearchMode{
35+
{
36+
ModeValue: SearchModeFuzzy,
37+
TooltipTrKey: "search.fuzzy_tooltip",
38+
TitleTrKey: "search.fuzzy",
39+
},
40+
}...)
41+
}
42+
43+
func GitGrepSupportedSearchModes() []SearchMode {
44+
return append(SearchModesExactWords(), []SearchMode{
45+
{
46+
ModeValue: SearchModeRegexp,
47+
TooltipTrKey: "search.regexp_tooltip",
48+
TitleTrKey: "search.regexp",
49+
},
50+
}...)
51+
}

modules/indexer/internal/bleve/query.go

+10
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query
2828
return q
2929
}
3030

31+
// MatchAndQuery generates a match query for the given phrase, field and analyzer
32+
func MatchAndQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchQuery {
33+
q := bleve.NewMatchQuery(matchPhrase)
34+
q.FieldVal = field
35+
q.Analyzer = analyzer
36+
q.Fuzziness = fuzziness
37+
q.Operator = query.MatchQueryOperatorAnd
38+
return q
39+
}
40+
3141
// BoolFieldQuery generates a bool field query for the given value and field
3242
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
3343
q := bleve.NewBoolFieldQuery(value)

0 commit comments

Comments
 (0)