Skip to content

Commit

Permalink
docs: Add VertexAI Matching Engine sample setup script (#121)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmigloz authored Aug 16, 2023
1 parent f338120 commit ed2e154
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 1 deletion.
3 changes: 3 additions & 0 deletions examples/vertex_ai_matching_engine_setup/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
31 changes: 31 additions & 0 deletions examples/vertex_ai_matching_engine_setup/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Vertex AI Matching Engine Setup

Script that creates a [Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview)
index and index endpoint ready to be used with LangChains.dart
[`VertexAIMatchingEngine`](https://pub.dev/documentation/langchain_google/latest/langchain_google/VertexAIMatchingEngine-class.html)
vector store.

## Usage

1. Create a Cloud Storage bucket.
2. Create a test document and upload it to the bucket: `/documents/0.json`
JSON structure: `{"id": "0", "pageContent": "...", "metadata": {}}`
3. Create its embedding and place it in the bucket: /indexes/index.json
JSON structure: `{"id": "0", "embedding": [0.1, 0.2, 0.3, ...]}`
4. Change the config in the script (`projectId`, `projectLocation`, etc.)
5. Run this script: `dart run bin/vertex_ai_matching_engine_setup.dart`
6. The script will output the configuration for `VertexAIMatchingEngine`.

Example output:

```dart
final vectorStore = VertexAIMatchingEngine(
authHttpClient: authClient,
project: 'my-project-id',
location: 'europe-west1',
queryRootUrl: 'https://xxxxxxxxxx.europe-west1-xxxxxxxxxxxx.vdb.vertexai.goog/',
indexId: 'xxxxxxxxxx',
gcsBucketName: 'my_index_bucket',
embeddings: embeddings,
);
```
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include: ../../analysis_options.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// ignore_for_file: avoid_print
import 'dart:convert';
import 'dart:io';

import 'package:gcloud/storage.dart';
import 'package:googleapis_auth/auth_io.dart';
import 'package:vertex_ai/vertex_ai.dart';

/// Creates a Vertex AI Matching Engine index and index endpoint ready to be
/// used with LangChains.dart `VertexAIMatchingEngine` vector store.
///
/// Steps:
/// 1. Create a Cloud Storage bucket.
/// 2. Create a test document and upload it to the bucket: `/documents/0.json`
/// JSON structure: `{"id": "0", "pageContent": "...", "metadata": {}}`
/// 3. Create its embedding and place it in the bucket: /indexes/index.json
/// JSON structure: `{"id": "0", "embedding": [0.1, 0.2, 0.3, ...]}`
/// 4. Change the config below (`projectId`, `projectLocation`, etc.)
/// 5. Run this script.
/// 6. The script will output the configuration for `VertexAIMatchingEngine`.
void main(final List<String> arguments) async {
// Config
const projectId = 'my-project-id';
const projectLocation = 'europe-west1';
const indexName = 'my_index';
const indexDescription = 'My index description';
const bucketName = 'my_index_bucket';
const embeddingsDimensions = 768;
const shardSize = VertexAIShardSize.small;

// Get authenticated HTTP client
print('\n> Authenticating...');
final serviceAccountCredentials = ServiceAccountCredentials.fromJson(
json.decode(Platform.environment['VERTEX_AI_SERVICE_ACCOUNT']!),
);
final authClient = await clientViaServiceAccount(
serviceAccountCredentials,
[VertexAIGenAIClient.cloudPlatformScope, ...Storage.SCOPES],
);

// Get Vertex AI client
print('\n> Creating client...');
final marchingEngine = VertexAIMatchingEngineClient(
authHttpClient: authClient,
project: projectId,
location: projectLocation,
);

// Create index
print('\n> Creating index (takes around 30min)...');
VertexAIOperation indexOperation = await marchingEngine.indexes.create(
displayName: indexName,
description: indexDescription,
metadata: const VertexAIIndexRequestMetadata(
contentsDeltaUri: 'gs://$bucketName/indexes',
config: VertexAINearestNeighborSearchConfig(
dimensions: embeddingsDimensions,
algorithmConfig: VertexAIBruteForceAlgorithmConfig(),
shardSize: shardSize,
),
),
);

// Poll for operation completion
while (!indexOperation.done) {
print('In progress...');
await Future<void>.delayed(const Duration(seconds: 10));
indexOperation = await marchingEngine.indexes.operations.get(
name: indexOperation.name,
);
}

// Create index endpoint
print('\n> Creating index endpoint (takes around 1min)...');
const indexEndpointName = '${indexName}_endpoint';
VertexAIOperation indexEndpointOperation =
await marchingEngine.indexEndpoints.create(
displayName: indexEndpointName,
description: 'Index endpoint of $indexName',
publicEndpointEnabled: true,
);

// Poll for operation completion
while (!indexEndpointOperation.done) {
print('In progress...');
await Future<void>.delayed(const Duration(seconds: 10));
indexEndpointOperation = await marchingEngine.indexEndpoints.operations.get(
name: indexEndpointOperation.name,
);
}

// Get created index and endpoint
print('\n> Getting index and endpoint...');
final indexes = await marchingEngine.indexes.list();
final index =
indexes.firstWhere((final index) => index.displayName == indexName);
final indexId = index.id;
final indexEndpoints = await marchingEngine.indexEndpoints.list();
final indexEndpoint = indexEndpoints.firstWhere(
(final indexEndpoint) => indexEndpoint.displayName == indexEndpointName,
);
final indexEndpointId = indexEndpoint.id;
print('Index ID: $indexId');
print('Index endpoint ID: $indexEndpointId');

// Deploy index to endpoint
print('\n> Deploying index to endpoint (takes around 30min)...');
VertexAIOperation deployOperation =
await marchingEngine.indexEndpoints.deployIndex(
indexId: indexId,
indexEndpointId: indexEndpointId,
deployedIndexId: '${indexName}_deployed',
deployedIndexDisplayName: '${indexName}_deployed',
);

// Poll for operation completion
while (!deployOperation.done) {
print('In progress...');
await Future<void>.delayed(const Duration(seconds: 10));
deployOperation = await marchingEngine.indexEndpoints.operations.get(
name: deployOperation.name,
);
}

// Get deployed index
print('\n> Getting deployed index...');
print('Index ready to be used!');
print('You can now use it in LangChain.dart:');
print('''
final vectorStore = VertexAIMatchingEngine(
authHttpClient: authClient,
project: '$projectId',
location: '$projectLocation',
queryRootUrl: 'http://${indexEndpoint.publicEndpointDomainName}/',
indexId: '$indexId',
gcsBucketName: '$bucketName',
embeddings: embeddings,
);''');
}
13 changes: 13 additions & 0 deletions examples/vertex_ai_matching_engine_setup/pubspec.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: vertex_ai_matching_engine_setup
description: A script to setup Vertex AI Matching Engine to be used in LangChain.
version: 1.0.0
publish_to: none

environment:
sdk: ^3.0.5

dependencies:
gcloud: ^0.8.11
googleapis_auth: ^1.4.1
http: ^1.1.0
vertex_ai: ^0.0.2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# melos_managed_dependency_overrides: vertex_ai
dependency_overrides:
vertex_ai:
path: ../../packages/vertex_ai
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ import 'package:vertex_ai/vertex_ai.dart';
/// Engine index and expose it in a Vertex AI index endpoint.
///
/// You can use [vertex_ai](https://pub.dev/packages/vertex_ai) Dart package
/// to do that. Check out its documentation for more details.
/// to do that.
///
/// Check out this sample script that creates the index and index endpoint
/// ready to be used with LangChains.dart:
/// https://github.com/davidmigloz/langchain_dart/tree/main/examples/vertex_ai_matching_engine_setup
///
/// ### Authentication
///
Expand Down

0 comments on commit ed2e154

Please sign in to comment.