Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize imports for extremely large history sizes for #168 #171

Merged
merged 171 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
171 commits
Select commit Hold shift + click to select a range
632ecc5
Swap to using iterators for uploading to avoid storing all chunks in …
ddworken Feb 5, 2024
ed583c3
Chunk uploads for reuploading
ddworken Feb 5, 2024
7c07236
Revert "Swap to using iterators for uploading to avoid storing all ch…
ddworken Feb 5, 2024
2ad499b
Make hishtory install work even if there is zero shell history on the…
ddworken Feb 6, 2024
a6333ea
Skip DD integration for m1 mac since it seems to fail for mysterious …
ddworken Feb 5, 2024
d27c3cd
Log OpenAI error to debug log for #167
ddworken Feb 6, 2024
efc4e15
Release v0.269
ddworken Feb 6, 2024
f11433f
Add explicit handling for 429 error code from OpenAI
ddworken Feb 7, 2024
0ba7725
Release v0.270
ddworken Feb 7, 2024
82f819d
Fix handling of new lines in commands for #163 (#170)
ddworken Feb 7, 2024
43630e4
Release v0.271
ddworken Feb 7, 2024
307637b
Properly silence which output to fix #166
ddworken Feb 7, 2024
d56718d
Release v0.272
ddworken Feb 7, 2024
0126844
Add || true to fully fix #166
ddworken Feb 9, 2024
cd6b46a
Release v0.273
ddworken Feb 9, 2024
ae5edb7
Improve install.py script to attempt to detect when /tmp/ is noexec (…
ddworken Feb 10, 2024
b9813dd
Add basic smoke test to provide test coverage for other distros (#174)
ddworken Feb 10, 2024
a5249ce
Fix quotes on container names
ddworken Feb 10, 2024
0649d0c
More tweaks for smoke testing
ddworken Feb 10, 2024
e177b82
Skip setting the hostname for smoke tests since we don't need it
ddworken Feb 10, 2024
b4fb00a
Dependencies for smoke testing
ddworken Feb 10, 2024
2b86d9d
Add cgo deps
ddworken Feb 10, 2024
f7e6c96
Install killall command
ddworken Feb 10, 2024
f8e425b
Add two more distros for smoke testing
ddworken Feb 10, 2024
eea467a
Add smoke tests for arch
ddworken Feb 10, 2024
c517bf0
Update distro-smoke-test.yml
ddworken Feb 10, 2024
902fb45
Remove sudo since the arch container runs as root
ddworken Feb 10, 2024
fee074b
Drop sudo for OpenSUSE
ddworken Feb 10, 2024
0785144
Update install commands for OpenSUSE and Arch
ddworken Feb 10, 2024
64860f5
More tweaks to install commands
ddworken Feb 10, 2024
46ad008
Update arch install command
ddworken Feb 10, 2024
c8892fd
Remove OpenSUSE since their package repos are currently returning 500…
ddworken Feb 10, 2024
4744bb7
Add another dep for arch
ddworken Feb 10, 2024
b3cb269
Move up os.remove so that the file is removed even if it fails to exe…
ddworken Feb 10, 2024
0cfa598
Move function to start of python file to make it more idiomatic
ddworken Feb 10, 2024
1c963be
Update go action to enable caching of dependencies
ddworken Feb 11, 2024
33bc582
Run integration tests in parallel to speed up testing (#175)
ddworken Feb 11, 2024
af58ed6
Swap away from brew cask since it appears to be slower
ddworken Feb 11, 2024
cad4bd9
Add sync server to status -v #176 so that self-hosted users can easil…
ddworken Feb 11, 2024
ce64c25
Release v0.274
ddworken Feb 11, 2024
3751e45
Make bash support lenient with empty history lines, which seems to ha…
ddworken Feb 19, 2024
7e2ed6d
Remove unnecessary sub-shell, since we just need a truthy value here
ddworken Feb 19, 2024
6247f17
Release v0.275
ddworken Feb 19, 2024
3b62e9d
Add web UI for querying history from the browser (#180)
ddworken Feb 19, 2024
c119167
Release v0.276
ddworken Feb 19, 2024
9721705
Add ability to disable auth and force specific creds for the web UI
ddworken Feb 19, 2024
22d4309
Add cleaning for integration test devices to remove DB entries
ddworken Feb 19, 2024
87c2cde
Wire through the shell name into AI suggestions so that we can get mo…
ddworken Feb 19, 2024
0a76c87
Add support for control-A and control-E shortcuts similar to GNU read…
ddworken Feb 19, 2024
a4d229d
Allow register new device when exceed user limit when user already ex…
Feb 19, 2024
e955e88
Add basic readline-like support for using control-left and control-ri…
ddworken Feb 19, 2024
a91f1ca
Release v0.277
ddworken Feb 20, 2024
f138f4c
Improve word boundary algorithm to ignore previous spaces so that con…
ddworken Feb 22, 2024
1d5adb0
Update colored golden
ddworken Feb 23, 2024
4e49ff1
Update test golden
ddworken Feb 23, 2024
2032580
Update golden
ddworken Feb 24, 2024
5344b44
Disable colored output tests
ddworken Feb 24, 2024
35916fc
Add updated goldens
ddworken Feb 24, 2024
13f0e8d
Delete temporarily unused goldens
ddworken Feb 24, 2024
a95ba3c
Delete an unused file
ddworken Feb 25, 2024
da6e1f2
Bump github.com/jackc/pgx/v4 from 4.14.1 to 4.18.2 (#189)
dependabot[bot] Mar 24, 2024
0e4ae14
Bump google.golang.org/protobuf from 1.28.1 to 1.33.0 (#191)
dependabot[bot] Mar 24, 2024
4c66c42
Upgrade SLSA releaser due to github.com/slsa-framework/slsa-github-ge…
ddworken Mar 24, 2024
b25ab15
Release v0.278
ddworken Mar 24, 2024
41f7cc8
Update slsa-verifier to attempt to fix SLSA breakage
ddworken Mar 24, 2024
7f9807f
Release v0.279
ddworken Mar 24, 2024
eadcfc2
Release v0.280
ddworken Mar 24, 2024
b4f1816
Add better error message for SLSA failures
ddworken Mar 24, 2024
0328587
Disable validation so we can push out a working binary even though SL…
ddworken Mar 24, 2024
591722e
Release v0.281
ddworken Mar 24, 2024
78aa513
Fully disable validation to allow an emergency release due to SLSA br…
ddworken Mar 24, 2024
ca70f3d
Release v0.282
ddworken Mar 24, 2024
a4fcc9b
Update cosign too to fix slsa breakage from https://blog.sigstore.dev…
ddworken Mar 25, 2024
ea63690
Release v0.283
ddworken Mar 25, 2024
e6887bc
Release v0.284
ddworken Mar 25, 2024
4ba366a
Fix go.mod version after cosign upgrade
ddworken Mar 25, 2024
1b12f3a
Update go.sum after cosign update
ddworken Mar 25, 2024
2b83b4c
Release v0.285
ddworken Mar 25, 2024
2c9dc48
Re-enable SLSA verification now that we've updated the SLSA version t…
ddworken Mar 25, 2024
b52b651
Release v0.286
ddworken Mar 25, 2024
0c3e515
Disable validation with local build since it seems to fail for some r…
ddworken Mar 25, 2024
59e8a6d
Add SLSA validation with current binary built by SLSA
ddworken Mar 25, 2024
26ffaf8
Set up tmate session to debug slsa releaser
ddworken Mar 26, 2024
597a2a5
Add SLSA failure warning for versions broken by SLSA
ddworken Mar 26, 2024
e3b159a
Remove tmate session for debugging
ddworken Mar 26, 2024
752c120
Release v0.287
ddworken Mar 26, 2024
8f51433
Bump gopkg.in/go-jose/go-jose.v2 from 2.6.1 to 2.6.3 (#197)
dependabot[bot] Mar 26, 2024
8f4aff3
Add support for horizontal scrolling of all columns for #188 (#195)
ddworken Mar 26, 2024
9dc0531
Bump github.com/docker/docker (#193)
dependabot[bot] Mar 26, 2024
a0e7f30
Bump github.com/sigstore/rekor from 1.0.0 to 1.2.0 (#91)
dependabot[bot] Mar 27, 2024
c1729f1
Add ability to configure custom OpenAI API endpoint for #186 (#194)
ddworken Mar 27, 2024
8315952
Release v0.288
ddworken Mar 27, 2024
2b4be24
Enable colored golden tests for linux (#184)
ddworken Mar 28, 2024
8e7096c
Remove minor version numbers from os versions for golden files for tests
ddworken Apr 4, 2024
8ca3835
Continue-on-error for the DD setup since it will also fail if colima …
ddworken Apr 5, 2024
68b3311
Add test for horizontal scrolling other columns for #188
ddworken Apr 5, 2024
3589e8c
Add support for forcing init without prompting via --force flag for #198
ddworken Apr 13, 2024
a936644
Clean up: Remove duplicated code by calling existing utility function
ddworken Apr 13, 2024
6c10543
Add mouse scrolling support for #200
ddworken Apr 13, 2024
38ce0ab
Revert "Add mouse scrolling support for #200" since it breaks the abi…
ddworken Apr 14, 2024
a43019c
Release v0.289
ddworken Apr 14, 2024
5290f6d
Add benchmarking for searching for #202
ddworken Apr 14, 2024
ca90ab6
Add index of start time so that queries with a LIMIT clause can avoid…
ddworken Apr 14, 2024
a1b2686
Release v0.290
ddworken Apr 15, 2024
e7b4395
Add --port flag for the web UI for #203
ddworken Apr 15, 2024
55e9e4d
Add additional test for smoke tests to cover syncing
ddworken Apr 15, 2024
96616d5
Move extra delay to a separate job to avoid wasting GH action quota b…
ddworken Apr 15, 2024
47749b9
Release v0.291
ddworken Apr 15, 2024
21c7f5e
Revert "Add additional test for smoke tests to cover syncing"
ddworken Apr 15, 2024
60cbb19
Fix double-syncing error where devices receive entries from themselve…
ddworken Apr 15, 2024
dca595d
Slow down gif per feedback in #199
ddworken Apr 16, 2024
00baabd
Update bubbletea to include 2b46020ca0725219da1a7d7969fa85c486181258 …
ddworken Apr 16, 2024
1def962
Fix test broken by 7ae9f15b by making sure input is sent and processe…
ddworken Apr 21, 2024
dc627f0
Fix test broken by 7ae9f15b by making sure input is sent and processe…
ddworken Apr 21, 2024
b4f4da5
Fix test broken by 7ae9f15b by making sure input is sent and processe…
ddworken Apr 21, 2024
e584b0b
Release v0.292
ddworken Apr 21, 2024
2635b67
Bump golang.org/x/net from 0.22.0 to 0.23.0 (#206)
dependabot[bot] Apr 22, 2024
5c6006d
Remove darwin-21 goldens since they're no longer used now that GH upg…
ddworken Apr 28, 2024
063295b
Update backend to avoid persisting entries to be read by devices that…
ddworken Apr 28, 2024
a4c3afe
Add support for custom key bindings for #190 (#209)
ddworken Apr 28, 2024
8cb1216
Fix condition added in d6a60214a2 to also apply to rows with the go '…
ddworken Apr 28, 2024
ef12e99
Add support for enabling/disabling syncing post-install
ddworken Apr 28, 2024
711b75e
Release v0.293
ddworken Apr 29, 2024
2b024c8
fix: close file (#213)
testwill May 12, 2024
d328a50
Release v0.294
ddworken May 12, 2024
b102311
Move docs on custom key bindings to a more logical location
ddworken Jun 2, 2024
f0dbf02
Fix duplicate pre-saving issue reported in #215
ddworken Jun 3, 2024
f636649
Revert "Fix duplicate pre-saving issue reported in #215"
ddworken Jun 3, 2024
6173de2
Fix duplicate pre-saving issue reported in #215 (#217)
ddworken Jun 3, 2024
a55480b
Release v0.295
ddworken Jun 3, 2024
ea25e4f
Add full fix for #215 along with a test to reproduce the issue (#218)
ddworken Jun 9, 2024
0df63aa
Release v0.296
ddworken Jun 9, 2024
c10afa4
Add ability to skip config modifications for #212 (#216)
ddworken Jun 14, 2024
68556a0
Add README documentation for default-filter
ddworken Jun 14, 2024
d06cad5
Update title for section
ddworken Jun 14, 2024
49d553a
Release v0.297
ddworken Jun 14, 2024
03363ef
Add basic fix for #225 by escaping tab characters before rendering
ddworken Jul 7, 2024
0a9d40b
Release v0.298
ddworken Jul 9, 2024
9f64468
Bump github.com/hashicorp/go-retryablehttp from 0.7.2 to 0.7.7 (#223)
dependabot[bot] Jul 9, 2024
c2f5bd1
Add additional fallback method for retrieving the CWD to further impr…
ddworken Jul 10, 2024
e4b87f8
Explicitly install openssl to see if it fixes smoke test errors on arch
ddworken Jul 9, 2024
418f4ff
Add integration test for #226
ddworken Jul 10, 2024
422bf5b
Release v0.299
ddworken Jul 10, 2024
36f3fac
Update macos version for signer since GH dropped support for macos 11
ddworken Jul 10, 2024
949f34f
Release v0.300
ddworken Jul 10, 2024
bee2627
Swap to macos-latest to see if GH has more quota for that tag
ddworken Jul 10, 2024
0187070
Release v0.301
ddworken Jul 10, 2024
f2a41cc
Release v0.302
ddworken Jul 10, 2024
607cbee
Upgrade to setup-go@v4 for automatic caching support
ddworken Jul 10, 2024
99e3d3d
Revert "Remove OpenSUSE since their package repos are currently retur…
ddworken Jul 10, 2024
353e574
Install git and tar for opensuse smoke tests
ddworken Jul 11, 2024
75d176e
Link /bin/sh for opensuse smoke tests
ddworken Jul 11, 2024
4da5a6f
Remove opensuse smoke tests
ddworken Jul 11, 2024
39044e2
use http.DefaultClient (#232)
GRbit Aug 11, 2024
637b2e1
Add new short name for "ExitCode" - "$?" (#228)
GRbit Aug 11, 2024
9b8baa8
Add more short column name alternatives similar to #228
ddworken Aug 11, 2024
ad8775c
add forceComapctMode config entry (#237)
GRbit Aug 11, 2024
e792915
Add docs in readme to call out shorter column names as added in #228
ddworken Aug 11, 2024
1948541
Change compact-mode setting that was added in #237 to respect the con…
ddworken Aug 11, 2024
eb7af4b
Add config-get compact-mode command (as needed by #237)
ddworken Aug 11, 2024
02f6934
Move checking of forced compact mode into helper functions to ensure …
ddworken Aug 11, 2024
cc65735
Add test for forced compact mode (from #237)
ddworken Aug 11, 2024
5056fb5
ai: add some new env variables to control OpenAI requests (#231)
GRbit Aug 11, 2024
0231417
Update incorrect docs on ClientConfig struct
ddworken Aug 11, 2024
9c37c27
Add ability for the client to configure the model via an environment …
ddworken Aug 11, 2024
1beeb49
Bump github.com/docker/docker (#236)
dependabot[bot] Aug 11, 2024
decba84
run "make fmt" (#233)
GRbit Aug 11, 2024
e031af2
Add make fmt to pre-commit
ddworken Aug 11, 2024
df6c853
Merge branch 'master' into import
ddworken Aug 25, 2024
6604c63
Merge branch 'master' into import
ddworken Aug 26, 2024
6aaf1cb
Fix import
ddworken Aug 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ package-lock.json
.prettierrc

# VS Code settings
.vscode/
.vscode/
11 changes: 2 additions & 9 deletions client/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ var installCmd = &cobra.Command{
if os.Getenv("HISHTORY_SKIP_INIT_IMPORT") == "" {
db, err := hctx.OpenLocalSqliteDb()
lib.CheckFatalError(err)
count, err := countStoredEntries(db)
count, err := lib.CountStoredEntries(db)
lib.CheckFatalError(err)
if count < 10 {
fmt.Println("Importing existing shell history...")
Expand All @@ -70,7 +70,7 @@ var initCmd = &cobra.Command{
Run: func(cmd *cobra.Command, args []string) {
db, err := hctx.OpenLocalSqliteDb()
lib.CheckFatalError(err)
count, err := countStoredEntries(db)
count, err := lib.CountStoredEntries(db)
lib.CheckFatalError(err)
if count > 0 && !(*forceInit) {
fmt.Printf("Your current hishtory profile has saved history entries, are you sure you want to run `init` and reset?\nNote: This won't clear any imported history entries from your existing shell\n[y/N]")
Expand Down Expand Up @@ -133,13 +133,6 @@ var uninstallCmd = &cobra.Command{
},
}

func countStoredEntries(db *gorm.DB) (int64, error) {
return lib.RetryingDbFunctionWithResult(func() (int64, error) {
var count int64
return count, db.Model(&data.HistoryEntry{}).Count(&count).Error
})
}

func warnIfUnsupportedBashVersion() error {
_, err := exec.LookPath("bash")
if err != nil {
Expand Down
79 changes: 60 additions & 19 deletions client/lib/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -612,32 +612,59 @@ func Reupload(ctx context.Context) error {
if config.IsOffline {
return nil
}
entries, err := Search(ctx, hctx.GetDb(ctx), "", 0)
numEntries, err := CountStoredEntries(hctx.GetDb(ctx))
if err != nil {
return fmt.Errorf("failed to reupload due to failed search: %w", err)
return fmt.Errorf("failed to upload history entries due to error in counting entries: %v", err)
}
var bar *progressbar.ProgressBar
if len(entries) > NUM_IMPORTED_ENTRIES_SLOW {
if numEntries > int64(NUM_IMPORTED_ENTRIES_SLOW) {
fmt.Println("Persisting history entries")
bar = progressbar.Default(int64(len(entries)))
bar = progressbar.Default(int64(numEntries))
defer bar.Finish()
}
chunkSize := 500
chunks := shared.Chunks(entries, chunkSize)
return shared.ForEach(chunks, 10, func(chunk []*data.HistoryEntry) error {
jsonValue, err := EncryptAndMarshal(config, chunk)

// This number is a balance between speed and memory usage. If we make it too high, then
// it will mean we use a ton of memory (since we retrieve all of those entries). But if
// we make it too low, then it will have to do repeated SQL queries with OFFSETs, which
// are inherently slow.
searchChunkSize := 300_000
currentOffset := 0
for {
entries, err := SearchWithOffset(ctx, hctx.GetDb(ctx), "", searchChunkSize, currentOffset)
if err != nil {
return fmt.Errorf("failed to reupload due to failed encryption: %w", err)
return fmt.Errorf("failed to reupload due to failed search: %w", err)
}
_, err = ApiPost(ctx, "/api/v1/submit?source_device_id="+config.DeviceId, "application/json", jsonValue)
if err != nil {
return fmt.Errorf("failed to reupload due to failed POST: %w", err)
if len(entries) == 0 {
if currentOffset == 0 && numEntries != 0 {
return fmt.Errorf("found no entries for reuploading, something went wrong")
} else {
return nil
}
}
if bar != nil {
_ = bar.Add(chunkSize)
currentOffset += searchChunkSize
// This number is a balance between speed, and ensuring that we don't send too much data
// in a single request (since large individual requests are extremely slow). From benchmarking,
// it is apparent that this value seems to work quite well.
uploadChunkSize := 500
chunks := shared.Chunks(entries, uploadChunkSize)
err = shared.ForEach(chunks, 10, func(chunk []*data.HistoryEntry) error {
jsonValue, err := EncryptAndMarshal(config, chunk)
if err != nil {
return fmt.Errorf("failed to reupload due to failed encryption: %w", err)
}
_, err = ApiPost(ctx, "/api/v1/submit?source_device_id="+config.DeviceId, "application/json", jsonValue)
if err != nil {
return fmt.Errorf("failed to reupload due to failed POST: %w", err)
}
if bar != nil {
_ = bar.Add(uploadChunkSize)
}
return nil
})
if err != nil {
return err
}
return nil
})
}
}

func RetrieveAdditionalEntriesFromRemote(ctx context.Context, queryReason string) error {
Expand Down Expand Up @@ -775,12 +802,16 @@ func MakeWhereQueryFromSearch(ctx context.Context, db *gorm.DB, query string) (*
}

func Search(ctx context.Context, db *gorm.DB, query string, limit int) ([]*data.HistoryEntry, error) {
return retryingSearch(ctx, db, query, limit, 0)
return SearchWithOffset(ctx, db, query, limit, 0)
}

func SearchWithOffset(ctx context.Context, db *gorm.DB, query string, limit, offset int) ([]*data.HistoryEntry, error) {
return retryingSearch(ctx, db, query, limit, offset, 0)
}

const SEARCH_RETRY_COUNT = 3

func retryingSearch(ctx context.Context, db *gorm.DB, query string, limit, currentRetryNum int) ([]*data.HistoryEntry, error) {
func retryingSearch(ctx context.Context, db *gorm.DB, query string, limit, offset, currentRetryNum int) ([]*data.HistoryEntry, error) {
if ctx == nil && query != "" {
return nil, fmt.Errorf("lib.Search called with a nil context and a non-empty query (this should never happen)")
}
Expand All @@ -798,13 +829,16 @@ func retryingSearch(ctx context.Context, db *gorm.DB, query string, limit, curre
if limit > 0 {
tx = tx.Limit(limit)
}
if offset > 0 {
tx = tx.Offset(offset)
}
var historyEntries []*data.HistoryEntry
result := tx.Find(&historyEntries)
if result.Error != nil {
if strings.Contains(result.Error.Error(), SQLITE_LOCKED_ERR_MSG) && currentRetryNum < SEARCH_RETRY_COUNT {
hctx.GetLogger().Infof("Ignoring err=%v and retrying search query, cnt=%d", result.Error, currentRetryNum)
time.Sleep(time.Duration(currentRetryNum*rand.Intn(50)) * time.Millisecond)
return retryingSearch(ctx, db, query, limit, currentRetryNum+1)
return retryingSearch(ctx, db, query, limit, offset, currentRetryNum+1)
}
return nil, fmt.Errorf("DB query error: %w", result.Error)
}
Expand Down Expand Up @@ -1012,3 +1046,10 @@ func SendDeletionRequest(ctx context.Context, deletionRequest shared.DeletionReq
}
return nil
}

func CountStoredEntries(db *gorm.DB) (int64, error) {
return RetryingDbFunctionWithResult(func() (int64, error) {
var count int64
return count, db.Model(&data.HistoryEntry{}).Count(&count).Error
})
}
Loading