From 62f7121acaca30416826d0ba1ff47fef67399aa0 Mon Sep 17 00:00:00 2001 From: Ryan Fox-Tyler <60440289+ryanfoxtyler@users.noreply.github.com> Date: Sun, 9 Feb 2025 13:29:31 -0500 Subject: [PATCH 01/53] initial port all files unlisted at this point --- .../reference/cloud/admin/authentication.mdx | 47 + dgraph/reference/cloud/admin/clone.mdx | 18 + dgraph/reference/cloud/admin/drop-data.mdx | 44 + .../reference/cloud/admin/import-export.mdx | 141 +++ dgraph/reference/cloud/admin/index.mdx | 3 + dgraph/reference/cloud/admin/monitoring.mdx | 88 ++ dgraph/reference/cloud/admin/overview.mdx | 25 + dgraph/reference/cloud/admin/schema-modes.mdx | 87 ++ dgraph/reference/cloud/admin/schema.mdx | 43 + dgraph/reference/cloud/advanced-queries.mdx | 219 ++++ .../cloud/cloud-api/authentication.mdx | 64 + dgraph/reference/cloud/cloud-api/backend.mdx | 498 ++++++++ dgraph/reference/cloud/cloud-api/backup.mdx | 179 +++ dgraph/reference/cloud/cloud-api/index.mdx | 3 + dgraph/reference/cloud/cloud-api/lambda.mdx | 247 ++++ dgraph/reference/cloud/cloud-api/overview.mdx | 69 + dgraph/reference/cloud/cloud-api/schema.mdx | 146 +++ dgraph/reference/cloud/cloud-multitenancy.mdx | 83 ++ dgraph/reference/cloud/index.mdx | 3 + dgraph/reference/cloud/introduction.mdx | 99 ++ .../cloud/migrating-from-hosted-dgraph.mdx | 14 + dgraph/reference/cloud/provision-backend.mdx | 37 + .../deploy/admin/data-compression.mdx | 40 + .../deploy/admin/dgraph-administration.mdx | 319 +++++ dgraph/reference/deploy/admin/index.mdx | 3 + dgraph/reference/deploy/admin/log-format.mdx | 130 ++ dgraph/reference/deploy/admin/metrics.mdx | 121 ++ dgraph/reference/deploy/admin/tracing.mdx | 63 + .../deploy/cli-command-reference.mdx | 1114 +++++++++++++++++ dgraph/reference/deploy/cluster-checklist.mdx | 15 + dgraph/reference/deploy/cluster-setup.mdx | 48 + dgraph/reference/deploy/config.mdx | 199 +++ dgraph/reference/deploy/decrypt.mdx | 76 ++ dgraph/reference/deploy/dgraph-alpha.mdx | 89 ++ dgraph/reference/deploy/dgraph-zero.mdx | 279 +++++ dgraph/reference/deploy/index.mdx | 32 + .../deploy/installation/download.mdx | 83 ++ .../reference/deploy/installation/index.mdx | 7 + .../installation/kubernetes/cluster-types.mdx | 175 +++ .../installation/kubernetes/ha-cluster.mdx | 418 +++++++ .../deploy/installation/kubernetes/index.mdx | 3 + .../kubernetes/monitoring-cluster.mdx | 343 +++++ .../kubernetes/single-server-cluster.mdx | 95 ++ .../deploy/installation/lambda-server.mdx | 110 ++ .../installation/production-checklist.mdx | 207 +++ .../deploy/installation/single-host-setup.mdx | 229 ++++ dgraph/reference/deploy/monitoring.mdx | 159 +++ dgraph/reference/deploy/security/index.mdx | 3 + .../reference/deploy/security/ports-usage.mdx | 117 ++ .../deploy/security/tls-configuration.mdx | 424 +++++++ dgraph/reference/deploy/troubleshooting.mdx | 61 + .../reference/design-concepts/acl-concept.mdx | 20 + .../design-concepts/badger-concept.mdx | 15 + .../design-concepts/clients-concept.mdx | 26 + .../design-concepts/consistency-model.mdx | 91 ++ .../design-concepts/discovery-concept.mdx | 9 + .../reference/design-concepts/dql-concept.mdx | 11 + .../dql-graphql-layering-concept.mdx | 27 + .../design-concepts/facets-concept.mdx | 15 + .../design-concepts/graphql-concept.mdx | 17 + .../design-concepts/group-concept.mdx | 39 + .../index-tokenize-concept.mdx | 27 + dgraph/reference/design-concepts/index.mdx | 8 + .../design-concepts/lambda-concept.mdx | 8 + .../minimizing-network-calls.mdx | 116 ++ .../namespace-tenant-concept.mdx | 14 + .../network-call-minimization-concept.mdx | 11 + .../design-concepts/posting-list-concept.mdx | 124 ++ .../protocol-buffers-concept.mdx | 8 + .../design-concepts/queries-process.mdx | 53 + dgraph/reference/design-concepts/raft.mdx | 212 ++++ .../design-concepts/relationships-concept.mdx | 34 + .../design-concepts/replication-concept.mdx | 15 + .../transaction-mutation-concept.mdx | 12 + .../design-concepts/transactions-concept.mdx | 32 + .../design-concepts/wal-memtable-concept.mdx | 14 + .../design-concepts/workers-concept.mdx | 13 + dgraph/reference/dgraph-glossary.mdx | 154 +++ dgraph/reference/dgraph-overview.md | 206 +++ dgraph/reference/dgraph-overview.mdx | 206 +++ dgraph/reference/dql/clients/csharp.mdx | 196 +++ dgraph/reference/dql/clients/go.mdx | 526 ++++++++ dgraph/reference/dql/clients/index.mdx | 58 + dgraph/reference/dql/clients/java.mdx | 595 +++++++++ .../reference/dql/clients/javascript/grpc.mdx | 443 +++++++ .../reference/dql/clients/javascript/http.mdx | 365 ++++++ .../dql/clients/javascript/index.mdx | 31 + dgraph/reference/dql/clients/python.mdx | 454 +++++++ dgraph/reference/dql/clients/raw-http.mdx | 469 +++++++ .../dql/clients/unofficial-clients.mdx | 30 + dgraph/reference/dql/dql-get-started.mdx | 194 +++ dgraph/reference/dql/dql-schema.mdx | 512 ++++++++ .../reference/dql/dql-syntax/dql-mutation.mdx | 280 +++++ dgraph/reference/dql/dql-syntax/dql-query.mdx | 218 ++++ dgraph/reference/dql/dql-syntax/dql-rdf.mdx | 185 +++ dgraph/reference/dql/dql-syntax/index.mdx | 10 + .../dql/dql-syntax/json-mutation-format.mdx | 436 +++++++ .../reference/dql/dql-syntax/to-sort.md.txt | 631 ++++++++++ dgraph/reference/dql/index.mdx | 9 + .../mutations/external-ids-upsert-block.mdx | 88 ++ dgraph/reference/dql/mutations/index.mdx | 5 + dgraph/reference/dql/mutations/uid-upsert.mdx | 212 ++++ dgraph/reference/dql/mutations/val-upsert.mdx | 77 ++ dgraph/reference/dql/predicate-indexing.mdx | 283 +++++ dgraph/reference/dql/tips/index.mdx | 76 ++ .../access-control-lists.mdx | 877 +++++++++++++ .../enterprise-features/audit-logs.mdx | 151 +++ .../enterprise-features/binary-backups.mdx | 850 +++++++++++++ .../change-data-capture.mdx | 157 +++ .../encryption-at-rest.mdx | 176 +++ .../reference/enterprise-features/index.mdx | 76 ++ .../enterprise-features/learner-nodes.mdx | 92 ++ .../reference/enterprise-features/license.mdx | 56 + .../enterprise-features/lsbackup.mdx | 179 +++ .../enterprise-features/multitenancy.mdx | 333 +++++ .../reference/graphql-dql/dql-for-graphql.mdx | 13 + .../graphql-dql/graphql-data-loading.mdx | 22 + .../graphql-dql/graphql-data-migration.mdx | 70 ++ .../reference/graphql-dql/graphql-dgraph.mdx | 51 + .../graphql-dql/graphql-dql-schema.mdx | 205 +++ dgraph/reference/graphql-dql/index.mdx | 19 + dgraph/reference/graphql/admin/index.mdx | 1067 ++++++++++++++++ .../reference/graphql/custom/custom-dql.mdx | 140 +++ .../graphql/custom/custom-overview.mdx | 60 + dgraph/reference/graphql/custom/directive.mdx | 562 +++++++++ dgraph/reference/graphql/custom/field.mdx | 93 ++ dgraph/reference/graphql/custom/index.mdx | 3 + dgraph/reference/graphql/custom/mutation.mdx | 69 + dgraph/reference/graphql/custom/query.mdx | 90 ++ dgraph/reference/graphql/federation/index.mdx | 218 ++++ .../endpoint/graphql-get-request.mdx | 20 + .../endpoint/graphql-request.mdx | 386 ++++++ .../endpoint/graphql-response.mdx | 211 ++++ .../graphql-clients/endpoint/index.mdx | 45 + .../graphql/graphql-clients/graphql-ide.mdx | 21 + .../graphql/graphql-clients/graphql-ui.mdx | 18 + .../graphql/graphql-clients/index.mdx | 5 + dgraph/reference/graphql/index.mdx | 29 + dgraph/reference/graphql/lambda/field.mdx | 221 ++++ dgraph/reference/graphql/lambda/index.mdx | 3 + .../graphql/lambda/lambda-overview.mdx | 350 ++++++ dgraph/reference/graphql/lambda/mutation.mdx | 114 ++ dgraph/reference/graphql/lambda/query.mdx | 100 ++ dgraph/reference/graphql/lambda/webhook.mdx | 110 ++ dgraph/reference/graphql/mutations/add.mdx | 91 ++ dgraph/reference/graphql/mutations/deep.mdx | 113 ++ dgraph/reference/graphql/mutations/delete.mdx | 65 + dgraph/reference/graphql/mutations/index.mdx | 3 + .../graphql/mutations/mutations-overview.mdx | 302 +++++ dgraph/reference/graphql/mutations/update.mdx | 124 ++ dgraph/reference/graphql/mutations/upsert.mdx | 129 ++ .../reference/graphql/queries/aggregate.mdx | 191 +++ .../reference/graphql/queries/and-or-not.mdx | 102 ++ .../graphql/queries/cached-results.mdx | 49 + dgraph/reference/graphql/queries/cascade.mdx | 141 +++ dgraph/reference/graphql/queries/index.mdx | 3 + .../reference/graphql/queries/order-page.mdx | 32 + .../graphql/queries/persistent-queries.mdx | 80 ++ .../graphql/queries/queries-overview.mdx | 52 + .../graphql/queries/search-filtering.mdx | 321 +++++ .../graphql/queries/skip-include.mdx | 65 + .../graphql/queries/vector-similarity.mdx | 76 ++ .../reference/graphql/quick-start/index.mdx | 302 +++++ .../graphql/schema/dgraph-schema.mdx | 284 +++++ .../graphql/schema/directives/auth.mdx | 12 + .../graphql/schema/directives/deprecated.mdx | 25 + .../schema/directives/directive-dgraph.mdx | 63 + .../directives/directive-withsubscription.mdx | 25 + .../graphql/schema/directives/embedding.mdx | 14 + .../graphql/schema/directives/generate.mdx | 61 + .../graphql/schema/directives/ids.mdx | 141 +++ .../graphql/schema/directives/index.mdx | 131 ++ .../graphql/schema/directives/search.mdx | 697 +++++++++++ .../graphql/schema/documentation.mdx | 75 ++ .../reference/graphql/schema/graph-links.mdx | 131 ++ dgraph/reference/graphql/schema/index.mdx | 17 + dgraph/reference/graphql/schema/migration.mdx | 230 ++++ dgraph/reference/graphql/schema/reserved.mdx | 57 + dgraph/reference/graphql/schema/types.mdx | 528 ++++++++ .../reference/graphql/security/RBAC-rules.mdx | 146 +++ .../graphql/security/anonymous-access.mdx | 83 ++ .../reference/graphql/security/auth-tips.mdx | 91 ++ dgraph/reference/graphql/security/cors.mdx | 28 + .../graphql/security/graphtraversal-rules.mdx | 191 +++ dgraph/reference/graphql/security/index.mdx | 109 ++ dgraph/reference/graphql/security/jwt.mdx | 215 ++++ .../reference/graphql/security/mutations.mdx | 143 +++ .../reference/graphql/subscriptions/index.mdx | 229 ++++ .../reference/howto/commandline/about-cli.mdx | 28 + .../howto/commandline/create-cli.mdx | 89 ++ dgraph/reference/howto/commandline/index.mdx | 3 + dgraph/reference/howto/completion.mdx | 111 ++ ...urrent-modification-java-multithreaded.mdx | 93 ++ .../howto/dgraph-sentry-integration.mdx | 86 ++ dgraph/reference/howto/dql-schema-request.mdx | 70 ++ dgraph/reference/howto/drop-data.mdx | 84 ++ .../howto/exportdata/about-export.mdx | 21 + .../howto/exportdata/export-data-cloud.mdx | 92 ++ .../howto/exportdata/export-data.mdx | 353 ++++++ dgraph/reference/howto/exportdata/index.mdx | 3 + .../howto/importdata/about_import.mdx | 21 + .../howto/importdata/bulk-loader.mdx | 443 +++++++ dgraph/reference/howto/importdata/index.mdx | 3 + .../howto/importdata/live-loader.mdx | 340 +++++ dgraph/reference/howto/index.mdx | 3 + dgraph/reference/howto/jepsen-tests.mdx | 33 + .../reference/howto/load-balancing-nginx.mdx | 184 +++ dgraph/reference/howto/login-system.mdx | 59 + .../howto/retrieving-debug-information.mdx | 224 ++++ .../reference/howto/update-dgraph-types.mdx | 84 ++ dgraph/reference/howto/upserts.mdx | 75 ++ dgraph/reference/howto/using-debug-tool.mdx | 273 ++++ .../reference/howto/using-increment-tool.mdx | 105 ++ .../reference/learn/administrator/index.mdx | 19 + .../data-model-101/01-dm-101-introduction.mdx | 48 + .../02-relational-data-model.mdx | 77 ++ .../data-model-101/03-graph-data-model.mdx | 66 + .../data-model-101/04-rel-query.mdx | 69 + .../data-model-101/05-graph-query.mdx | 86 ++ .../data-model-101/06-dm-101-conclusion.mdx | 20 + .../data-engineer/data-model-101/index.mdx | 47 + .../get-started-with-dgraph/index.mdx | 107 ++ .../tutorial-1/index.mdx | 236 ++++ .../tutorial-2/index.mdx | 395 ++++++ .../tutorial-3/index.mdx | 649 ++++++++++ .../tutorial-4/index.mdx | 441 +++++++ .../tutorial-5/index.mdx | 665 ++++++++++ .../tutorial-6/index.mdx | 455 +++++++ .../tutorial-7/index.mdx | 325 +++++ .../tutorial-8/index.mdx | 851 +++++++++++++ .../reference/learn/data-engineer/index.mdx | 24 + dgraph/reference/learn/developer/index.mdx | 26 + .../react/graphql/design-app-schema.mdx | 168 +++ .../react/graphql/graphql-operations.mdx | 45 + .../react/graphql/graphql-schema.mdx | 258 ++++ .../learn/developer/react/graphql/index.mdx | 18 + .../graphql/load-schema-to-dgraph-cloud.mdx | 20 + .../react/graphql/react-graphql-mutations.mdx | 302 +++++ .../react/graphql/react-graphql-queries.mdx | 302 +++++ .../reference/learn/developer/react/index.mdx | 23 + .../developer/react/react-conclusion.mdx | 32 + .../developer/react/react-introduction.mdx | 109 ++ .../react/react-provision-backend.mdx | 27 + .../react-ui/connect-to-dgraph-cloud.mdx | 80 ++ .../learn/developer/react/react-ui/index.mdx | 20 + .../react/react-ui/react-app-boiler-plate.mdx | 67 + .../react/react-ui/react-routing.mdx | 108 ++ .../react-ui/react-ui-graphql-mutations.mdx | 453 +++++++ .../react-ui/react-ui-graphql-queries.mdx | 564 +++++++++ .../developer/react/react-ui/tech-stack.mdx | 21 + .../learn/developer/sample-apps/charts.mdx | 37 + .../learn/developer/sample-apps/devjokes.mdx | 48 + .../learn/developer/sample-apps/index.mdx | 21 + .../learn/developer/sample-apps/pokedex.mdx | 28 + .../learn/developer/sample-apps/surveyo.mdx | 45 + .../learn/developer/sample-apps/todos.mdx | 43 + .../developer/todo-app-tutorial/index.mdx | 3 + .../developer/todo-app-tutorial/todo-UI.mdx | 468 +++++++ .../todo-app-tutorial/todo-auth-rules.mdx | 75 ++ .../todo-app-tutorial/todo-auth0-jwt.mdx | 263 ++++ .../todo-app-tutorial/todo-deploy.mdx | 77 ++ .../todo-app-tutorial/todo-firebase-jwt.mdx | 307 +++++ .../todo-app-tutorial/todo-overview.mdx | 21 + .../todo-app-tutorial/todo-schema-design.mdx | 245 ++++ dgraph/reference/learn/index.mdx | 6 + .../migration/about-data-migration.mdx | 13 + dgraph/reference/migration/index.mdx | 3 + .../reference/migration/loading-csv-data.mdx | 200 +++ dgraph/reference/migration/migrate-tool.mdx | 87 ++ .../reference/query-language/aggregation.mdx | 133 ++ dgraph/reference/query-language/alias.mdx | 43 + .../query-language/cascade-directive.mdx | 146 +++ .../query-language/connecting-filters.mdx | 24 + dgraph/reference/query-language/count.mdx | 41 + dgraph/reference/query-language/debug.mdx | 67 + .../query-language/expand-predicates.mdx | 71 ++ dgraph/reference/query-language/facets.mdx | 349 ++++++ dgraph/reference/query-language/fragments.mdx | 34 + dgraph/reference/query-language/functions.mdx | 760 +++++++++++ .../query-language/graphql-fundamentals.mdx | 57 + .../query-language/graphql-variables.mdx | 104 ++ dgraph/reference/query-language/groupby.mdx | 44 + .../query-language/ignorereflex-directive.mdx | 14 + dgraph/reference/query-language/index.mdx | 3 + .../indexing-custom-tokenizers.mdx | 582 +++++++++ .../query-language/kshortest-path-queries.mdx | 323 +++++ .../query-language/language-support.mdx | 78 ++ .../math-on-value-variables.mdx | 73 ++ .../query-language/multiple-query-blocks.mdx | 115 ++ .../query-language/normalize-directive.mdx | 17 + .../reference/query-language/pagination.mdx | 112 ++ .../query-language/query-variables.mdx | 57 + .../query-language/recurse-query.mdx | 26 + dgraph/reference/query-language/sorting.mdx | 66 + .../query-language/value-variables.mdx | 136 ++ dgraph/reference/ratel/backups.mdx | 16 + dgraph/reference/ratel/cluster.mdx | 22 + dgraph/reference/ratel/connection.mdx | 57 + dgraph/reference/ratel/console.mdx | 45 + dgraph/reference/ratel/index.mdx | 3 + dgraph/reference/ratel/overview.mdx | 26 + dgraph/reference/ratel/schema.mdx | 49 + dgraph/reference/releases/index.mdx | 48 + snippets/sdk-header.mdx | 2 +- 304 files changed, 44103 insertions(+), 1 deletion(-) create mode 100644 dgraph/reference/cloud/admin/authentication.mdx create mode 100644 dgraph/reference/cloud/admin/clone.mdx create mode 100644 dgraph/reference/cloud/admin/drop-data.mdx create mode 100644 dgraph/reference/cloud/admin/import-export.mdx create mode 100644 dgraph/reference/cloud/admin/index.mdx create mode 100644 dgraph/reference/cloud/admin/monitoring.mdx create mode 100644 dgraph/reference/cloud/admin/overview.mdx create mode 100644 dgraph/reference/cloud/admin/schema-modes.mdx create mode 100644 dgraph/reference/cloud/admin/schema.mdx create mode 100644 dgraph/reference/cloud/advanced-queries.mdx create mode 100644 dgraph/reference/cloud/cloud-api/authentication.mdx create mode 100644 dgraph/reference/cloud/cloud-api/backend.mdx create mode 100644 dgraph/reference/cloud/cloud-api/backup.mdx create mode 100644 dgraph/reference/cloud/cloud-api/index.mdx create mode 100644 dgraph/reference/cloud/cloud-api/lambda.mdx create mode 100644 dgraph/reference/cloud/cloud-api/overview.mdx create mode 100644 dgraph/reference/cloud/cloud-api/schema.mdx create mode 100644 dgraph/reference/cloud/cloud-multitenancy.mdx create mode 100644 dgraph/reference/cloud/index.mdx create mode 100644 dgraph/reference/cloud/introduction.mdx create mode 100644 dgraph/reference/cloud/migrating-from-hosted-dgraph.mdx create mode 100644 dgraph/reference/cloud/provision-backend.mdx create mode 100644 dgraph/reference/deploy/admin/data-compression.mdx create mode 100644 dgraph/reference/deploy/admin/dgraph-administration.mdx create mode 100644 dgraph/reference/deploy/admin/index.mdx create mode 100644 dgraph/reference/deploy/admin/log-format.mdx create mode 100644 dgraph/reference/deploy/admin/metrics.mdx create mode 100644 dgraph/reference/deploy/admin/tracing.mdx create mode 100644 dgraph/reference/deploy/cli-command-reference.mdx create mode 100644 dgraph/reference/deploy/cluster-checklist.mdx create mode 100644 dgraph/reference/deploy/cluster-setup.mdx create mode 100644 dgraph/reference/deploy/config.mdx create mode 100644 dgraph/reference/deploy/decrypt.mdx create mode 100644 dgraph/reference/deploy/dgraph-alpha.mdx create mode 100644 dgraph/reference/deploy/dgraph-zero.mdx create mode 100644 dgraph/reference/deploy/index.mdx create mode 100644 dgraph/reference/deploy/installation/download.mdx create mode 100644 dgraph/reference/deploy/installation/index.mdx create mode 100644 dgraph/reference/deploy/installation/kubernetes/cluster-types.mdx create mode 100644 dgraph/reference/deploy/installation/kubernetes/ha-cluster.mdx create mode 100644 dgraph/reference/deploy/installation/kubernetes/index.mdx create mode 100644 dgraph/reference/deploy/installation/kubernetes/monitoring-cluster.mdx create mode 100644 dgraph/reference/deploy/installation/kubernetes/single-server-cluster.mdx create mode 100644 dgraph/reference/deploy/installation/lambda-server.mdx create mode 100644 dgraph/reference/deploy/installation/production-checklist.mdx create mode 100644 dgraph/reference/deploy/installation/single-host-setup.mdx create mode 100644 dgraph/reference/deploy/monitoring.mdx create mode 100644 dgraph/reference/deploy/security/index.mdx create mode 100644 dgraph/reference/deploy/security/ports-usage.mdx create mode 100644 dgraph/reference/deploy/security/tls-configuration.mdx create mode 100644 dgraph/reference/deploy/troubleshooting.mdx create mode 100644 dgraph/reference/design-concepts/acl-concept.mdx create mode 100644 dgraph/reference/design-concepts/badger-concept.mdx create mode 100644 dgraph/reference/design-concepts/clients-concept.mdx create mode 100644 dgraph/reference/design-concepts/consistency-model.mdx create mode 100644 dgraph/reference/design-concepts/discovery-concept.mdx create mode 100644 dgraph/reference/design-concepts/dql-concept.mdx create mode 100644 dgraph/reference/design-concepts/dql-graphql-layering-concept.mdx create mode 100644 dgraph/reference/design-concepts/facets-concept.mdx create mode 100644 dgraph/reference/design-concepts/graphql-concept.mdx create mode 100644 dgraph/reference/design-concepts/group-concept.mdx create mode 100644 dgraph/reference/design-concepts/index-tokenize-concept.mdx create mode 100644 dgraph/reference/design-concepts/index.mdx create mode 100644 dgraph/reference/design-concepts/lambda-concept.mdx create mode 100644 dgraph/reference/design-concepts/minimizing-network-calls.mdx create mode 100644 dgraph/reference/design-concepts/namespace-tenant-concept.mdx create mode 100644 dgraph/reference/design-concepts/network-call-minimization-concept.mdx create mode 100644 dgraph/reference/design-concepts/posting-list-concept.mdx create mode 100644 dgraph/reference/design-concepts/protocol-buffers-concept.mdx create mode 100644 dgraph/reference/design-concepts/queries-process.mdx create mode 100644 dgraph/reference/design-concepts/raft.mdx create mode 100644 dgraph/reference/design-concepts/relationships-concept.mdx create mode 100644 dgraph/reference/design-concepts/replication-concept.mdx create mode 100644 dgraph/reference/design-concepts/transaction-mutation-concept.mdx create mode 100644 dgraph/reference/design-concepts/transactions-concept.mdx create mode 100644 dgraph/reference/design-concepts/wal-memtable-concept.mdx create mode 100644 dgraph/reference/design-concepts/workers-concept.mdx create mode 100644 dgraph/reference/dgraph-glossary.mdx create mode 100644 dgraph/reference/dgraph-overview.md create mode 100644 dgraph/reference/dgraph-overview.mdx create mode 100644 dgraph/reference/dql/clients/csharp.mdx create mode 100644 dgraph/reference/dql/clients/go.mdx create mode 100644 dgraph/reference/dql/clients/index.mdx create mode 100644 dgraph/reference/dql/clients/java.mdx create mode 100644 dgraph/reference/dql/clients/javascript/grpc.mdx create mode 100644 dgraph/reference/dql/clients/javascript/http.mdx create mode 100644 dgraph/reference/dql/clients/javascript/index.mdx create mode 100644 dgraph/reference/dql/clients/python.mdx create mode 100644 dgraph/reference/dql/clients/raw-http.mdx create mode 100644 dgraph/reference/dql/clients/unofficial-clients.mdx create mode 100644 dgraph/reference/dql/dql-get-started.mdx create mode 100644 dgraph/reference/dql/dql-schema.mdx create mode 100644 dgraph/reference/dql/dql-syntax/dql-mutation.mdx create mode 100644 dgraph/reference/dql/dql-syntax/dql-query.mdx create mode 100644 dgraph/reference/dql/dql-syntax/dql-rdf.mdx create mode 100644 dgraph/reference/dql/dql-syntax/index.mdx create mode 100644 dgraph/reference/dql/dql-syntax/json-mutation-format.mdx create mode 100644 dgraph/reference/dql/dql-syntax/to-sort.md.txt create mode 100644 dgraph/reference/dql/index.mdx create mode 100644 dgraph/reference/dql/mutations/external-ids-upsert-block.mdx create mode 100644 dgraph/reference/dql/mutations/index.mdx create mode 100644 dgraph/reference/dql/mutations/uid-upsert.mdx create mode 100644 dgraph/reference/dql/mutations/val-upsert.mdx create mode 100644 dgraph/reference/dql/predicate-indexing.mdx create mode 100644 dgraph/reference/dql/tips/index.mdx create mode 100644 dgraph/reference/enterprise-features/access-control-lists.mdx create mode 100644 dgraph/reference/enterprise-features/audit-logs.mdx create mode 100644 dgraph/reference/enterprise-features/binary-backups.mdx create mode 100644 dgraph/reference/enterprise-features/change-data-capture.mdx create mode 100644 dgraph/reference/enterprise-features/encryption-at-rest.mdx create mode 100644 dgraph/reference/enterprise-features/index.mdx create mode 100644 dgraph/reference/enterprise-features/learner-nodes.mdx create mode 100644 dgraph/reference/enterprise-features/license.mdx create mode 100644 dgraph/reference/enterprise-features/lsbackup.mdx create mode 100644 dgraph/reference/enterprise-features/multitenancy.mdx create mode 100644 dgraph/reference/graphql-dql/dql-for-graphql.mdx create mode 100644 dgraph/reference/graphql-dql/graphql-data-loading.mdx create mode 100644 dgraph/reference/graphql-dql/graphql-data-migration.mdx create mode 100644 dgraph/reference/graphql-dql/graphql-dgraph.mdx create mode 100644 dgraph/reference/graphql-dql/graphql-dql-schema.mdx create mode 100644 dgraph/reference/graphql-dql/index.mdx create mode 100644 dgraph/reference/graphql/admin/index.mdx create mode 100644 dgraph/reference/graphql/custom/custom-dql.mdx create mode 100644 dgraph/reference/graphql/custom/custom-overview.mdx create mode 100644 dgraph/reference/graphql/custom/directive.mdx create mode 100644 dgraph/reference/graphql/custom/field.mdx create mode 100644 dgraph/reference/graphql/custom/index.mdx create mode 100644 dgraph/reference/graphql/custom/mutation.mdx create mode 100644 dgraph/reference/graphql/custom/query.mdx create mode 100644 dgraph/reference/graphql/federation/index.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/endpoint/graphql-get-request.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/endpoint/graphql-request.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/endpoint/graphql-response.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/endpoint/index.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/graphql-ide.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/graphql-ui.mdx create mode 100644 dgraph/reference/graphql/graphql-clients/index.mdx create mode 100644 dgraph/reference/graphql/index.mdx create mode 100644 dgraph/reference/graphql/lambda/field.mdx create mode 100644 dgraph/reference/graphql/lambda/index.mdx create mode 100644 dgraph/reference/graphql/lambda/lambda-overview.mdx create mode 100644 dgraph/reference/graphql/lambda/mutation.mdx create mode 100644 dgraph/reference/graphql/lambda/query.mdx create mode 100644 dgraph/reference/graphql/lambda/webhook.mdx create mode 100644 dgraph/reference/graphql/mutations/add.mdx create mode 100644 dgraph/reference/graphql/mutations/deep.mdx create mode 100644 dgraph/reference/graphql/mutations/delete.mdx create mode 100644 dgraph/reference/graphql/mutations/index.mdx create mode 100644 dgraph/reference/graphql/mutations/mutations-overview.mdx create mode 100644 dgraph/reference/graphql/mutations/update.mdx create mode 100644 dgraph/reference/graphql/mutations/upsert.mdx create mode 100644 dgraph/reference/graphql/queries/aggregate.mdx create mode 100644 dgraph/reference/graphql/queries/and-or-not.mdx create mode 100644 dgraph/reference/graphql/queries/cached-results.mdx create mode 100644 dgraph/reference/graphql/queries/cascade.mdx create mode 100644 dgraph/reference/graphql/queries/index.mdx create mode 100644 dgraph/reference/graphql/queries/order-page.mdx create mode 100644 dgraph/reference/graphql/queries/persistent-queries.mdx create mode 100644 dgraph/reference/graphql/queries/queries-overview.mdx create mode 100644 dgraph/reference/graphql/queries/search-filtering.mdx create mode 100644 dgraph/reference/graphql/queries/skip-include.mdx create mode 100644 dgraph/reference/graphql/queries/vector-similarity.mdx create mode 100644 dgraph/reference/graphql/quick-start/index.mdx create mode 100644 dgraph/reference/graphql/schema/dgraph-schema.mdx create mode 100644 dgraph/reference/graphql/schema/directives/auth.mdx create mode 100644 dgraph/reference/graphql/schema/directives/deprecated.mdx create mode 100644 dgraph/reference/graphql/schema/directives/directive-dgraph.mdx create mode 100644 dgraph/reference/graphql/schema/directives/directive-withsubscription.mdx create mode 100644 dgraph/reference/graphql/schema/directives/embedding.mdx create mode 100644 dgraph/reference/graphql/schema/directives/generate.mdx create mode 100644 dgraph/reference/graphql/schema/directives/ids.mdx create mode 100644 dgraph/reference/graphql/schema/directives/index.mdx create mode 100644 dgraph/reference/graphql/schema/directives/search.mdx create mode 100644 dgraph/reference/graphql/schema/documentation.mdx create mode 100644 dgraph/reference/graphql/schema/graph-links.mdx create mode 100644 dgraph/reference/graphql/schema/index.mdx create mode 100644 dgraph/reference/graphql/schema/migration.mdx create mode 100644 dgraph/reference/graphql/schema/reserved.mdx create mode 100644 dgraph/reference/graphql/schema/types.mdx create mode 100644 dgraph/reference/graphql/security/RBAC-rules.mdx create mode 100644 dgraph/reference/graphql/security/anonymous-access.mdx create mode 100644 dgraph/reference/graphql/security/auth-tips.mdx create mode 100644 dgraph/reference/graphql/security/cors.mdx create mode 100644 dgraph/reference/graphql/security/graphtraversal-rules.mdx create mode 100644 dgraph/reference/graphql/security/index.mdx create mode 100644 dgraph/reference/graphql/security/jwt.mdx create mode 100644 dgraph/reference/graphql/security/mutations.mdx create mode 100644 dgraph/reference/graphql/subscriptions/index.mdx create mode 100644 dgraph/reference/howto/commandline/about-cli.mdx create mode 100644 dgraph/reference/howto/commandline/create-cli.mdx create mode 100644 dgraph/reference/howto/commandline/index.mdx create mode 100644 dgraph/reference/howto/completion.mdx create mode 100644 dgraph/reference/howto/concurrent-modification-java-multithreaded.mdx create mode 100644 dgraph/reference/howto/dgraph-sentry-integration.mdx create mode 100644 dgraph/reference/howto/dql-schema-request.mdx create mode 100644 dgraph/reference/howto/drop-data.mdx create mode 100644 dgraph/reference/howto/exportdata/about-export.mdx create mode 100644 dgraph/reference/howto/exportdata/export-data-cloud.mdx create mode 100644 dgraph/reference/howto/exportdata/export-data.mdx create mode 100644 dgraph/reference/howto/exportdata/index.mdx create mode 100644 dgraph/reference/howto/importdata/about_import.mdx create mode 100644 dgraph/reference/howto/importdata/bulk-loader.mdx create mode 100644 dgraph/reference/howto/importdata/index.mdx create mode 100644 dgraph/reference/howto/importdata/live-loader.mdx create mode 100644 dgraph/reference/howto/index.mdx create mode 100644 dgraph/reference/howto/jepsen-tests.mdx create mode 100644 dgraph/reference/howto/load-balancing-nginx.mdx create mode 100644 dgraph/reference/howto/login-system.mdx create mode 100644 dgraph/reference/howto/retrieving-debug-information.mdx create mode 100644 dgraph/reference/howto/update-dgraph-types.mdx create mode 100644 dgraph/reference/howto/upserts.mdx create mode 100644 dgraph/reference/howto/using-debug-tool.mdx create mode 100644 dgraph/reference/howto/using-increment-tool.mdx create mode 100644 dgraph/reference/learn/administrator/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/01-dm-101-introduction.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/02-relational-data-model.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/03-graph-data-model.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/04-rel-query.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/05-graph-query.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/06-dm-101-conclusion.mdx create mode 100644 dgraph/reference/learn/data-engineer/data-model-101/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-1/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-2/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-3/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-4/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-5/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-6/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-7/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-8/index.mdx create mode 100644 dgraph/reference/learn/data-engineer/index.mdx create mode 100644 dgraph/reference/learn/developer/index.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/design-app-schema.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/graphql-operations.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/graphql-schema.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/index.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/load-schema-to-dgraph-cloud.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/react-graphql-mutations.mdx create mode 100644 dgraph/reference/learn/developer/react/graphql/react-graphql-queries.mdx create mode 100644 dgraph/reference/learn/developer/react/index.mdx create mode 100644 dgraph/reference/learn/developer/react/react-conclusion.mdx create mode 100644 dgraph/reference/learn/developer/react/react-introduction.mdx create mode 100644 dgraph/reference/learn/developer/react/react-provision-backend.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/connect-to-dgraph-cloud.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/index.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/react-app-boiler-plate.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/react-routing.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/react-ui-graphql-mutations.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/react-ui-graphql-queries.mdx create mode 100644 dgraph/reference/learn/developer/react/react-ui/tech-stack.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/charts.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/devjokes.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/index.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/pokedex.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/surveyo.mdx create mode 100644 dgraph/reference/learn/developer/sample-apps/todos.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/index.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-UI.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-auth-rules.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-auth0-jwt.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-deploy.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-firebase-jwt.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-overview.mdx create mode 100644 dgraph/reference/learn/developer/todo-app-tutorial/todo-schema-design.mdx create mode 100644 dgraph/reference/learn/index.mdx create mode 100644 dgraph/reference/migration/about-data-migration.mdx create mode 100644 dgraph/reference/migration/index.mdx create mode 100644 dgraph/reference/migration/loading-csv-data.mdx create mode 100644 dgraph/reference/migration/migrate-tool.mdx create mode 100644 dgraph/reference/query-language/aggregation.mdx create mode 100644 dgraph/reference/query-language/alias.mdx create mode 100644 dgraph/reference/query-language/cascade-directive.mdx create mode 100644 dgraph/reference/query-language/connecting-filters.mdx create mode 100644 dgraph/reference/query-language/count.mdx create mode 100644 dgraph/reference/query-language/debug.mdx create mode 100644 dgraph/reference/query-language/expand-predicates.mdx create mode 100644 dgraph/reference/query-language/facets.mdx create mode 100644 dgraph/reference/query-language/fragments.mdx create mode 100644 dgraph/reference/query-language/functions.mdx create mode 100644 dgraph/reference/query-language/graphql-fundamentals.mdx create mode 100644 dgraph/reference/query-language/graphql-variables.mdx create mode 100644 dgraph/reference/query-language/groupby.mdx create mode 100644 dgraph/reference/query-language/ignorereflex-directive.mdx create mode 100644 dgraph/reference/query-language/index.mdx create mode 100644 dgraph/reference/query-language/indexing-custom-tokenizers.mdx create mode 100644 dgraph/reference/query-language/kshortest-path-queries.mdx create mode 100644 dgraph/reference/query-language/language-support.mdx create mode 100644 dgraph/reference/query-language/math-on-value-variables.mdx create mode 100644 dgraph/reference/query-language/multiple-query-blocks.mdx create mode 100644 dgraph/reference/query-language/normalize-directive.mdx create mode 100644 dgraph/reference/query-language/pagination.mdx create mode 100644 dgraph/reference/query-language/query-variables.mdx create mode 100644 dgraph/reference/query-language/recurse-query.mdx create mode 100644 dgraph/reference/query-language/sorting.mdx create mode 100644 dgraph/reference/query-language/value-variables.mdx create mode 100644 dgraph/reference/ratel/backups.mdx create mode 100644 dgraph/reference/ratel/cluster.mdx create mode 100644 dgraph/reference/ratel/connection.mdx create mode 100644 dgraph/reference/ratel/console.mdx create mode 100644 dgraph/reference/ratel/index.mdx create mode 100644 dgraph/reference/ratel/overview.mdx create mode 100644 dgraph/reference/ratel/schema.mdx create mode 100644 dgraph/reference/releases/index.mdx diff --git a/dgraph/reference/cloud/admin/authentication.mdx b/dgraph/reference/cloud/admin/authentication.mdx new file mode 100644 index 00000000..86aba6f7 --- /dev/null +++ b/dgraph/reference/cloud/admin/authentication.mdx @@ -0,0 +1,47 @@ +--- +title: API keys +description: +--- + +Client applications accessing the Dgraph Cloud cluster endpoints + +- `/query` +- `/mutate` +- `/commit` + +must present a valid **client** or **admin** **API key** in the `Dg-Auth` or +`X-Auth-Token` header of every HTTP request. + +Client applications accessing the Dgraph Cloud cluster endpoints + +- `/admin` +- `/admin/slash` +- `/alter` + +must present a valid **admin API key** in the `Dg-Auth` or `X-Auth-Token` header +of every HTTP request. + +Client applications accessing the Dgraph Cloud cluster endpoint + +- `/graphql` + +with [anonymous access](dgraph/reference/graphql/security/anonymous-access) not +set on the requested operation, must present a valid **client** or **admin API +key** in the `Dg-Auth` or `X-Auth-Token` header of every HTTP request. + +**Client API keys** can only be used to perform query, mutation, and commit +operations. **Admin API keys** can be used to perform both client operations and +admin operations like drop data, destroy backend, and update schema. + +## Generate a new API + +To generate a new API key : + +1. Go to the [Settings](https://cloud.dgraph.io/_/settings) section of Dgraph + Cloud console. +2. Access `[API Keys](https://cloud.dgraph.io/_/settings?tab=api-keys)` tab. +3. Click Create New button. +4. Give the key a name, and select **Client** or **Admin** type and click + Create +5. Copy the key in a safe place, it will not be accessible once you leave the + page. diff --git a/dgraph/reference/cloud/admin/clone.mdx b/dgraph/reference/cloud/admin/clone.mdx new file mode 100644 index 00000000..bbef737c --- /dev/null +++ b/dgraph/reference/cloud/admin/clone.mdx @@ -0,0 +1,18 @@ +--- +title: Cloning Backend +--- + +Cloning a backend allows making a copy of an existing backend. The clone will be +created with all the data and schema of the original backend present at the time +of cloning. The clone will have its own endpoint and will be independent of the +original backend once it is created. Any further changes in either backends will +not reflect in the other. Currently, a clone can only be created in the same +zone as that of the original backend. + +In order to clone your backend, click on the `Clone Backend` button under the +[Settings](https://cloud.dgraph.io/_/settings) tab in the dashboard's sidebar. + +You can also perform the restore operation on an existing backend if you have an +unused backend or want to reuse an existing endpoint. But note that the restore +operation will drop all the existing data along with schema on the current +backend and replace it with the original backend's data and schema. diff --git a/dgraph/reference/cloud/admin/drop-data.mdx b/dgraph/reference/cloud/admin/drop-data.mdx new file mode 100644 index 00000000..5edc5e4f --- /dev/null +++ b/dgraph/reference/cloud/admin/drop-data.mdx @@ -0,0 +1,44 @@ +--- +title: Dropping Data from your Backend +description: +--- + +It is possible to drop all data from your Dgraph Cloud backend, and start afresh +while retaining the same endpoint. Be careful, as this operation is not +reversible, and all data will be lost. It is highly recommended that you +[export](./cloud/admin/import-export) your data before you drop your data. + +In order to drop all data while retaining the schema, click the `Drop Data` +button under the [Schema](https://cloud.dgraph.io/_/schema) tab in the sidebar. + +![Drop Data](/images/drop-data.png) + +### Dropping Data Programmatically + +In order to do this, call the `dropData` mutation on `/admin/slash`. As an +example, if your GraphQL endpoint is +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/graphql`, then the admin +endpoint for schema will be at +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/admin/slash`. + +Please note that this endpoint requires +[Authentication](./cloud/admin/authentication). + +Please see the following curl as an example. + +``` +curl 'https:///admin/slash' \ + -H 'X-Auth-Token: ' \ + -H 'Content-Type: application/graphql' \ + --data-binary 'mutation { dropData(allData: true) { response { code message } } }' +``` + +If you would like to drop the schema along with the data, then you can set the +`allDataAndSchema` flag. + +``` +curl 'https:///admin/slash' \ + -H 'X-Auth-Token: ' \ + -H 'Content-Type: application/graphql' \ + --data-binary 'mutation { dropData(allDataAndSchema: true) { response { code message } } }' +``` diff --git a/dgraph/reference/cloud/admin/import-export.mdx b/dgraph/reference/cloud/admin/import-export.mdx new file mode 100644 index 00000000..1dfe7e6f --- /dev/null +++ b/dgraph/reference/cloud/admin/import-export.mdx @@ -0,0 +1,141 @@ +--- +title: Importing and Exporting data from Dgraph Cloud +--- + +## Exporting and Importing Data in Dgraph Cloud + +You can export your data as an Administrator from one Dgraph Cloud backend, and +then import this data back into another Dgraph instance or Dgraph Cloud backend. +For more information about how to export data in Dgraph Cloud, see +[Export data](./howto/exportdata/export-data-cloud). You can also export data +from Dgraph Cloud programatically using the Dgraph Cloud API. For more +information, see [Cloud API documentation](./cloud/cloud-api/backup). + +To import data to Dgraph Cloud, see +[live loader](./howto/importdata/live-loader). + +## Exporting Data with Multi-Tenancy feature enabled in Dgraph Cloud + + + With Multi-Tenancy feature enabled, for any GraphQL request you need to + provide the `accessJWT` for the specific user in the `X-Dgraph-AccessToken` + header. + + +You can trigger two types of exports: + +- Cluster-wide export: this is an export of the entire backend (including all + namespaces). This request can be only triggered by the + [_Guardian of Galaxy_](https://dgraph.io/docs/enterprise-features/multitenancy/#guardians-of-the-galaxy) + users. +- Namespace-specific export: this is an export of a specific namespace. This + request can be triggered by the _Guardian of Galaxy_ users and by the + _Guardian of Namespace_ users. + +### Cluster-wide Exports + +This can only be done by the _Guardian of Galaxy_ users (AKA Super Admin), the +steps are: + +1. Get the `accessJWT` token for the _Guardian of Galaxy_ user. Send the + following GraphQL mutation to the `/admin` endpoint: + +```graphql +mutation login($userId: String, $password: String, $namespace: Int) { + login(userId: $userId, password: $password, namespace: $namespace) { + response { + accessJWT + refreshJWT + } + } +} +``` + +Your variables should be referring to the _Guardian of Galaxy_ user: + +```json +{ + "userId": "groot", + "password": "password", + "namespace": 0 +} +``` + +2. Once obtained the `accessJWT` token you need to pass it in + `X-Dgraph-AccessToken` Header and only then you can send the following + GraphQL mutation to `/admin/slash` endpoint: + +```graphql +mutation { + export(namespace: -1) { + response { + code + message + } + exportId + taskId + } +} +``` + +3. Once done, you can now send the following GraqhQL mutation to get the + `signedUrls` from where you can download your export files: + +```graphql +query { + exportStatus( + exportId: "" + taskId: "" + ) { + kind + lastUpdated + signedUrls + status + } +} +``` + +### Namespace-specific Exports + +Namespace-specific exports can be triggered by the _Guardian of Galaxy_ users. +In this case you can follow the same steps for the Cluster-wide exports and +replace the namespace value from `-1` to the namespace you want to export. It's +important that you get the `accessJWT` token for the _Guardian of Galaxy_ user +and pass it in the `X-Dgraph-AccessToken` header. + +E.g. if you want to export the namespace `0x123` your GraphQL request sent to +the `/admin/slash` endpoint would look like: + +```graphql +mutation { + export(namespace: 123) { + response { + code + message + } + exportId + taskId + } +} +``` + +You can also trigger namespace-specific export using the _Guardian of Namespace_ +users, in this case there is no need to specify any namespace in the GraphQL +request as these users can only export their own namespace. It's important that +you get the `accessJWT` token for the _Guardian of Namespace_ user and pass it +in the `X-Dgraph-AccessToken` header. + +The GraphQL request sent to the `/admin/slash` endpoint would be: + +```graphql +mutation { + export { + response { + code + message + } + exportId + taskId + } +} +``` diff --git a/dgraph/reference/cloud/admin/index.mdx b/dgraph/reference/cloud/admin/index.mdx new file mode 100644 index 00000000..02eb5517 --- /dev/null +++ b/dgraph/reference/cloud/admin/index.mdx @@ -0,0 +1,3 @@ +--- +title: Administering Your Backend +--- diff --git a/dgraph/reference/cloud/admin/monitoring.mdx b/dgraph/reference/cloud/admin/monitoring.mdx new file mode 100644 index 00000000..6d7cb91a --- /dev/null +++ b/dgraph/reference/cloud/admin/monitoring.mdx @@ -0,0 +1,88 @@ +--- +title: Monitoring with Prometheus +--- + +Dgraph Cloud provides enterprises with real-time observability and high-fidelity +telemetry of their instances with [Prometheus](https://prometheus.io/). Once +enabled Dgraph exposes real-time values for **Dedicated** backends at any given +instant via the `/prometheus` endpoint. You can also configure Grafana for +real-time visualization and analysis thus allowing for in-depth visibility into +the behavior, performance and health of your instances. + + + Prometheus integration is only available to users of **Dedicated Instance** + types and not **Free** or **Shared Instance**. + + +### Enable Prometheus for your instance + +To enable Prometheus with your Dgraph Cloud instance: + +1. Login to Dgraph Cloud Dashboard, select **Settings** under the **Admin** + subsection and then select **Modify Backend**. Alternately, you can also + enable Prometheus while launching a new backend. + +2. For your existing **Dgraph Cloud Backend**, enable the **Prometheus** option + under **Additional Settings**. Review and select one of the available + configurations viz. 1C (1 vCPU 4GB RAM), 2C, 4C, or 8C. + +3. Review the estimated hourly cost which should include additional charges for + enabling Prometheus. Click **Launch** to submit changes. + +![Enable Prometheus](/images/monitoring/enable_prometheus.png) + +### Configure your instance endpoint with Prometheus + +1. For all dedicated backends with Prometheus enabled, a new endpoint called + `/prometheus` would be available. For example, a backend at URL + https://sudomain.region.cloud-provider.cloud.dgraph.io/graphql would expose + metrics at URL - + https://sudomain.region.cloud-provider.cloud.dgraph.io/prometheus + +2. The `/prometheus` endpoint is protected with the **Admin API key**. Upon + accessing the URL for this endpoint, you will be prompted to enter the key. + More information on creating an Admin API key can be found + [here](https://dgraph.io/docs/cloud/admin/authentication/). + + ![Enter Admin API key](/images/monitoring/api_token.png) + +3. Once you enter the Admin API token click **Submit** to launch the + **Prometheus Dashboard**. + + ![Prometheus Dashboard](/images/monitoring/prometheus_dashboard.png) + +### Integrating with Grafana + +To visualize Prometheus metrics within the **Grafana Dashboard** for Dgraph +Cloud, perform the following actions: + +1. Launch the Grafana Dashboard and follow the same steps to add a **Prometheus + Datasource** to Grafana as described + **[here](https://prometheus.io/docs/visualization/grafana/#creating-a-prometheus-data-source)** + but with the following changes: + +2. Under the section **HTTP**, for the option **URL**, enter the URL for your + Prometheus endpoint (Example - + https://sudomain.region.cloud-provider.cloud.dgraph.io/prometheus). For the + **Access** option select **Server (default)** from the dropdown menu. + +3. Lastly, under **Auth**, within the **Custom HTTP Headers** subsection, click + **Add Header** and add a new **Header** called `X-Auth-Token`. Enter your + Admin API key as its **Value**. The following image shows an example data + source configuration. + + ![Grafana Config](/images/monitoring/grafana_config.png) + +4. Click **Save & Test** to save and test the new Prometheus data source. + +5. Create and populate your **Grafana Dashboard**. Select the **Prometheus Data + Source** that was configured earlier and select the metrics to visualize + (e.g. dgraph_memory_inuse_bytes, dgraph_alpha_health_status etc). If + correctly configured the metrics can be visualized as below: + + ![Grafana Config](/images/monitoring/grafana_dashboard.png) + +Your new monitoring and observability stack for Dgraph Cloud leveraging the +Prometheus and Grafana solutions should now be ready for use. The same is useful +to monitor your Dgraph backend efficiently, without the overhead of installing, +maintaining, and scaling your own observability stack. diff --git a/dgraph/reference/cloud/admin/overview.mdx b/dgraph/reference/cloud/admin/overview.mdx new file mode 100644 index 00000000..7964d4cd --- /dev/null +++ b/dgraph/reference/cloud/admin/overview.mdx @@ -0,0 +1,25 @@ +--- +title: Overview +--- + +Here is a guide to programmatically administering your Dgraph Cloud backend. + +Wherever possible, we have maintained compatibility with the corresponding +Dgraph API, with the additional step of requiring +[authentication](./authentication) via the `X-Auth-Token` header. + + + Keep in mind that free Dgraph Cloud backends will be frozen automatically + after 4 hours of inactivity. + + +Please see the following topics: + +- [Authentication](./authentication) will guide you in creating a API token. + Since all admin APIs require an auth token, this is a good place to start. +- [Schema](./schema) describes how to programmatically query and update your + GraphQL schema. +- [Import and Exporting Data](./import-export) is a guide for exporting your + data from a Dgraph Cloud backend, and how to import it into another cluster +- [Dropping Data](./drop-data) will guide you through dropping all data from + your Dgraph Cloud backend. diff --git a/dgraph/reference/cloud/admin/schema-modes.mdx b/dgraph/reference/cloud/admin/schema-modes.mdx new file mode 100644 index 00000000..36c9211b --- /dev/null +++ b/dgraph/reference/cloud/admin/schema-modes.mdx @@ -0,0 +1,87 @@ +--- +title: Switch Dgraph Cloud Schema Modes +description: + Dgraph Cloud provides a variety of schema modes that let you configure how the + underlying Dgraph Cloud instance responds to schema changes or mutation + requests that seek to change data stored in your backend. +--- + +Dgraph Cloud uses the following three schema modes, which control how the +underlying Dgraph database instance is configured: + +- [Read-Only mode](#read-only-mode) (_dedicated instances only_): In this mode, + no schema changes or mutations are allowed +- [Strict mode](#strict-mode): In this mode, only mutations on predicates that + are already present in the schema are allowed +- [Flexible mode](#flexible-mode) (_dedicated instances only_): In this mode, + there are no global restrictions on schemas and mutations; this mode also + provides access to advanced Dgraph features + +Each mode is pre-configured to provide simplicity and ease-of-use. By default, +your Dgraph Cloud schema will run in [Strict mode](#strict-mode). If you want +your dedicated instance to have the same behavior as a local Dgraph instance, +change your schema to [Flexible mode](#flexible-mode). + +### Read-Only mode + +In read-only mode, all mutations and attempts to alter the Cloud schema are +blocked. You can still access your data through read-only queries. + +### Strict mode + +Strict mode is the default mode on Dgraph Cloud, and the only mode available for +free and shared instances. In this mode, Dgraph Cloud enforces a +[strict schema](https://dgraph.io/docs/deploy/dgraph-administration/#restricting-mutation-operations), +only allowing mutations on predicates already present in the schema. + +You can use GraphQL and DQL (formerly _GraphQL+-_) queries and mutations in this +mode, as described in the [advanced queries](/advanced-queries/) section. +However, all queries and mutations must be valid for the applied schema. + + + In **Strict** mode, before executing a mutation on a predicate that doesn’t + exist in the schema, you need add that predicate to the schema. To add a + predicate, perform an [`alter` + operation](https://dgraph.io/docs/clients/raw-http/#alter-the-database) with + that predicate and its schema type (_dedicated instances only_), or [update + your schema](./schema) to include that predicate and its schema type. + + +### Flexible mode + +Flexible mode is suitable for users who are already familiar with Dgraph. It +removes global restrictions on schemas and mutations, and also provides access +to advanced Dgraph features like the following: + +- Directly altering the schema with the + [`alter`](https://dgraph.io/docs/clients/raw-http/#alter-the-database) HTTP + and GRPC endpoints +- Support for access control lists + ([ACLs](https://dgraph.io/docs/enterprise-features/access-control-lists/)) + +## Switch schema modes with the Dgraph Cloud console + +To change your schema mode on a dedicated instance, go to the +[settings page](https://cloud.dgraph.io/_/settings), click the + +General tab, and then select a mode from the **Schema Mode** list +box. + +## Switch schema modes with the `/admin` endpoint + + + Dgraph Labs recommends using the Dgraph Cloud [settings + page](https://cloud.dgraph.io/_/settings) to change your dedicated instance's + schema mode for most scenarios, instead of directly modifying your schema. + + +You can change the schema mode for your dedicated instance directly in the +schema using the `updateGQLSchema` mutation on the `/admin` HTTP and GRPC +endpoints. To learn more, see [Fetch and Update Your Schema](./schema). + +To set your schema mode, configure `UpdateOption` to use one of the following +values in your schema: + +- To use Read Only mode, set `UpdateOption` to `readonly` +- To use Strict mode, set `UpdateOption` to `graphql` +- To use Flexible mode, set `UpdateOption` to `flexible` diff --git a/dgraph/reference/cloud/admin/schema.mdx b/dgraph/reference/cloud/admin/schema.mdx new file mode 100644 index 00000000..f92cedb0 --- /dev/null +++ b/dgraph/reference/cloud/admin/schema.mdx @@ -0,0 +1,43 @@ +--- +title: Fetching and Updating Your Schema +--- + +Your GraphQL schema can be fetched and updated using the `/admin` endpoint of +your cluster. As an example, if your GraphQL endpoint is +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/graphql`, then the admin +endpoint for schema will be at +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/admin`. + +This endpoint works in a similar way to the +[/admin](https://dgraph.io/docs/graphql/admin) endpoint of Dgraph, with the +additional constraint of [requiring authentication](/admin/authentication). + +### Fetching the Current Schema + +It is possible to fetch your current schema using the `getGQLSchema` query on +`/admin`. Below is a sample GraphQL query which will fetch this schema. + +```graphql +{ + getGQLSchema { + schema + } +} +``` + +### Setting a New Schema + +You can save a new schema using the `updateGQLSchema` mutation on `/admin`. +Below is an example GraphQL body, with a variable called `sch` which must be +passed in as a [variable](https://graphql.org/graphql-js/passing-arguments/) + +```graphql +mutation ($sch: String!) { + updateGQLSchema(input: { set: { schema: $sch } }) { + gqlSchema { + schema + generatedSchema + } + } +} +``` diff --git a/dgraph/reference/cloud/advanced-queries.mdx b/dgraph/reference/cloud/advanced-queries.mdx new file mode 100644 index 00000000..db9f753b --- /dev/null +++ b/dgraph/reference/cloud/advanced-queries.mdx @@ -0,0 +1,219 @@ +--- +title: Advanced Queries with DQL +--- + +_You can now +[embed DQL queries inside your GraphQL schema](https://dgraph.io/docs/graphql/custom/graphqlpm), +which is recommended for most use cases. The rest of this document covers how to +connect to your Dgraph Cloud backend with existing Dgraph clients._ + +In addition to GraphQL support, Dgraph Cloud also supports running advanced +queries using Dgraph Query Language (DQL) (previously named GraphQL+-). DQL is +based on GraphQL, but adds and removes features to better support graph database +operations. Advanced users can use DQL to send queries and mutations to Dgraph +Cloud's HTTP or gRPC endpoints using the Dgraph client libraries. To learn more +about the Dgraph client libraries, see the +[client library documentation](https://dgraph.io/docs/clients/). To learn more +about DQL, see +[DQL Fundamentals](https://dgraph.io/docs/query-language/graphql-fundamentals/).. + +If you are getting started with Dgraph Cloud, you should probably start out by +using Dgraph's [GraphQL API](https://dgraph.io/docs/graphql/overview) instead. +Dgraph's GraphQL API lets you quickly use Dgraph Cloud before moving on to the +advanced features available using DQL. + + + Dgraph Cloud's [schema modes](/admin/schema-modes/) let you configure whether + and how schema changes are allowed. To alter your schema using the `/alter` + HTTP and GRPC endpoints, you'll need to use **Flexible Mode**. + + +## Authentication + +The APIs documented here all require an API token for access. To learn how to +create an API token, please see [Authentication](/admin/authentication). + +### HTTP + +You can query your backend with DQL using your cluster's `/query` endpoint. As +an example, if your GraphQL endpoint is +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/graphql`, then the admin +endpoint for the schema is +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/query`. + +You can also access the [`/mutate`](https://dgraph.io/docs/mutations/) and +`/commit` endpoints. + +For example, let's say you have the following GraphQL schema: + +```graphql +type Person { + name: String! @search(by: [fulltext]) + age: Int + country: String +} +``` + +Here is an example of a cURL command with the `/mutate` endpoint: + +``` +curl -H "Content-Type: application/rdf" -H "x-auth-token: " -X POST "/mutate?commitNow=true" -d $' +{ + set { + _:x "John" . + _:x "30" . + _:x "US" . + } +}' +``` + +Here is an example of a cURL command with the `/query` endpoint: + +``` +curl -H "Content-Type: application/dql" -H "x-auth-token: " -XPOST "/query" -d '{ + queryPerson(func: type(Person)) { + Person.name + Person.age + Person.country + } +}' +``` + +### gRPC + +Dgraph Cloud is compatible with most existing Dgraph clients. You can use the +helper methods from each library to connect to your backend, passing in a Dgraph +Cloud endpoint and an API token. + +Here is an example which uses the +[pydgraph client](https://github.com/dgraph-io/pydgraph) to make gRPC requests. + +```python +import pydgraph + +client_stub = pydgraph.DgraphClientStub.from_slash_endpoint("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "") +client = pydgraph.DgraphClient(client_stub) +``` + +Here is an example of a mutation using the `pydgraph` client: + +```python +mut = { + "Person.name": "John Doe", + "Person.age": "32", + "Person.country": "US" +} + +txn = client.txn() +try: + res = txn.mutate(set_obj=mut) +finally: + txn.discard() +``` + +Here is an example of a query using the `pydgraph` client: + +```python +query = """ +{ + queryPerson(func: type(Person)) { + Person.name + Person.age + Person.country + } +}""" +txn = client.txn() +try: + res = txn.query(query) + ppl = json.loads(res.json) + print(ppl) +finally: + txn.discard() +``` + +#### Connecting from Dgraph Clients + +Below are snippets to connect to your Dgraph Cloud backend from various Dgraph +clients. + +**Python** + +```python +import pydgraph + +client_stub = pydgraph.DgraphClientStub.from_slash_endpoint("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "") +client = pydgraph.DgraphClient(client_stub) +``` + +**JavaScript** + +```javascript +const dgraph = require("dgraph-js") + +const clientStub = dgraph.clientStubFromSlashGraphQLEndpoint( + "https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", + "", +) +const dgraphClient = new dgraph.DgraphClient(clientStub) +``` + +**Go** + +```golang +// This example uses dgo +conn, err := dgo.DialSlashEndpoint("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "") +if err != nil { + log.Fatal(err) +} +defer conn.Close() +dgraphClient := dgo.NewDgraphClient(api.NewDgraphClient(conn)) +``` + +**Java** + +```java +// This example uses dgraph4j +DgraphStub stub = DgraphClient.clientStubFromSlashEndpoint("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", ""); +DgraphClient dgraphClient = new DgraphClient(stub); +``` + +**C# / .NET** + +```c# +var client = new DgraphClient(SlashChannel.Create("frozen-mango.eu-central-1.aws.cloud.dgraph.io:443", "")); +``` + +### Visualizing your Graph with Ratel + +You can use Ratel to visualize your Dgraph Cloud backend with DQL. You can host +Ratel yourself, or you can use Ratel online at +[Dgraph Play](https://play.dgraph.io/?latest#connection). + +To configure Ratel: + +1. Click the Dgraph logo in the top left to bring up the connection screen (by + default, it has the caption: play.dgraph.io) +2. Enter your backend's host in the Dgraph Server URL field. This is obtained by + removing `/graphql` from the end of your `/graphql` endpoint URL. For + example, if your `/graphql` endpoint is + `https://frozen-mango.us-west-2.aws.cloud.dgraph.io/graphql`, then the host + for Ratel is `https://frozen-mango.us-west-2.aws.cloud.dgraph.io` +3. Click the **Connect** button. You should see a green check mark next to the + word **Connected**. +4. Click on the **Extra Settings** tab, and then enter your API token into the + **API Key** field. To create a new API token, see + [Authentication](/admin/authentication). +5. Click on the **Continue** button. + +You can now run queries and mutations using Ratel, and see visualizations of +your data. + +Ratel has certain limitations; it doesn't support backups, modifying ACL or +attempting to remove nodes from the cluster. + +### Switching Schema Modes + +If you want to use DQL as your primary mode of interaction with the Dgraph Cloud +backend (instead of primarily using the GraphQL API), you can switch your +backend to flexible mode. To learn more, see +[Schema Modes](/admin/schema-modes). diff --git a/dgraph/reference/cloud/cloud-api/authentication.mdx b/dgraph/reference/cloud/cloud-api/authentication.mdx new file mode 100644 index 00000000..cc70cbf9 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/authentication.mdx @@ -0,0 +1,64 @@ +--- +title: Authentication +--- + +## Login + +Login will generate a JWT token that can be used to access other Dgraph Cloud +APIs. + +This API requires an email address and password. If you have signed up with a +social media provider, you may create a new password by selecting +`Forgot Password` on the login page. + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +query Login($email: String!, $password: String!) { + login(email: $email, password: $password) { + token + } +} +``` + +**Arguments** + +- `email`: your email address +- `password`: your password + +### Example + +Below is an example request and response. The token below must be passed to all +future API calls as a bearer token in the `Authorization` header. + + + +```bash +curl 'https://cerebro.cloud.dgraph.io/graphql' \ + -H 'Content-Type: application/json' \ + --data-binary '{"query":"query Login($email: String!, $password: String!) {\n login(email: $email, password: $password) { \n token\n }\n}","variables":{"email":"","password":""}}' \ + --compressed +``` + +```json +{ + "data": { + "login": { + "token": "" + } + } +} +``` + + + +## Using the authentication token + +The token returned from the login API must be passed to all future API calls as +a bearer token in the `Authorization` header. diff --git a/dgraph/reference/cloud/cloud-api/backend.mdx b/dgraph/reference/cloud/cloud-api/backend.mdx new file mode 100644 index 00000000..b1c824e2 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/backend.mdx @@ -0,0 +1,498 @@ +--- +title: Backend +--- + +## List Backends + +List backends that you have access to. + + + This API requires authentication, please see + [Authentication](./authentication) for instructions on issuing and passing a + JWT token to the API. + + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +{ + deployments { + uid + name + zone + url + owner + jwtToken + deploymentMode + deploymentType + lambdaScript + } +} +``` + +### Example + +- `` is the JWT returned from [Authentication](./authentication). +- `` is a base64 string that will be non-empty if you have saved + [Lambdas](./lambda) on your backend + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl 'https://cerebro.cloud.dgraph.io/graphql' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"{\n deployments {\n uid\n name\n zone\n url\n owner\n jwtToken\n deploymentMode\n deploymentType\n lambdaScript\n }\n}","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "deployments": [ + { + "uid": "0xf0ffe9", + "name": "testing", + "zone": "us-east-1", + "url": "polished-violet.us-east-1.aws.cloud.dgraph.io", + "owner": "486c69b4-e09b-48f9-a28a-86314fe232cd", + "jwtToken": "", + "deploymentMode": "graphql", + "deploymentType": "free", + "lambdaScript": "" + } + ] + } +} +``` + + + + For any `/admin` or `/admin/slash` requests to +`https://`, you **must use the `` returned above +in the `X-Auth-Token` header.** The Cerebro JWT is only used in the +`Authorization` header for requests to `https://cerebro.cloud.dgraph.io/graphql`. + +## Deploy Backend + +Launch a new backend. + + + This API requires authentication, please see + [Authentication](./authentication) for instructions on issuing and passing a + JWT to the API. + + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +mutation CreateDeployment($newDeployment: NewDeployment!) { + createDeployment(input: $newDeployment) { + uid + name + url + jwtToken + } +} +``` + +**Arguments** + +- `newDeployment`: parameter object for new deployment +- `newDeployment.name`: name of the deployment +- `newDeployment.zone`: region to launch +- `newDeployment.deploymentType`: type of deployment `(free|shared|dedicated)` + +### Example + +- `` is the JWT returned from [Authentication](./authentication). + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl 'https://cerebro.cloud.dgraph.io/graphql' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"mutation CreateDeployment($deployment: NewDeployment!) {\n createDeployment(input: $deployment) {\n uid\n name\n url\n jwtToken\n }\n}","variables":{"deployment":{"name":"My New Deployment","zone":"us-east-1","deploymentType":"dedicated"}}}' \ + --compressed +``` + +```json +{ + "data": { + "createDeployment": { + "uid": "0x42", + "name": "My New Deployment", + "url": "my-new-deployment.us-east-1.aws.cloud.dgraph.io", + "jwtToken": "" + } + } +} +``` + + + +## Update Backend + +Update backend. + + + This API requires authentication, please see + [Authentication](./authentication) for instructions on issuing and passing a + JWT token to the API. + + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +mutation UpdateDeployment($updateDeploymentInput: UpdateDeploymentInput!) { + updateDeployment(input: $updateDeploymentInput) +} +``` + +**Arguments** + +- `updateDeploymentInput`: parameter object for update deployment +- `updateDeploymentInput.uid` (required): deployment `uid` + +### Example + +- `` is the JWT returned from [Authentication](./authentication). +- `` is the UID returned from [List Backends](#list-backends). + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl 'https://cerebro.cloud.dgraph.io/graphql' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"mutation UpdateDeployment($dep: UpdateDeploymentInput!) {\n updateDeployment(input: $dep)\n}","variables":{"dep":{"uid":"","name":"My Deployment!"}}}' \ + --compressed +``` + +```json +{ + "data": { + "updateDeployment": "Successfully Updated the backend" + } +} +``` + + + +## Destroy Backend + +Destroy (i.e., delete) a backend by id. + + + This API requires authentication, please see + [Authentication](./authentication) for instructions on issuing and passing a + JWT token to the API. + + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +mutation DeleteDeployment($deploymentID: String!) { + deleteDeployment(deploymentID: $deploymentID) +} +``` + +**Arguments** + +- `deploymentID` (required): deployment `uid` returned from a + [List Backends](#list-backends) request + +### Example + +- `` is the JWT returned from [Authentication](./authentication). + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl 'https://cerebro.cloud.dgraph.io/graphql' \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"mutation DeleteDeployment($deploymentUid: String!) {\n deleteDeployment(deploymentID: $deploymentUid)\n}","variables":{"deploymentUid":""}}' \ + --compressed +``` + +``` +{ + "data": { + "deleteDeployment": "Successfully deleted the Deployment" + } +} +``` + + + +## Restore Backends + +Restore into a backend by source backend ID. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin/slash +``` + +### API Command + +```graphql +mutation ($uid: String!, $backupFolder: String, $backupNum: Int) { + restore(uid: $uid, backupFolder: $backupFolder, backupNum: $backupNum) { + response { + code + message + restoreId + } + errors { + message + } + } +} +``` + +**Arguments** + +- `uid` (required): the deployment `uid` from List Backends +- `backupFolder` (required): TODO +- `backupNum` (required): TODO + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin/slash" \ + -H 'Content-Type: application/json' \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"mutation($uid: String!, $backupFolder: String, $backupNum: Int) {\n restore(uid: $uid, backupFolder: $backupFolder, backupNum: $backupNum) {\n response {\n code\n message\n restoreId\n }, errors {\n message\n }\n}\n}","variables":{"uid":"","backupFolder":"","backupNum":}}' \ + --compressed +``` + +```json +{ + "data": { + "restore": { + "errors": null, + "response": { + "code": "Success", + "message": "Restore operation started.", + "restoreId": 1 + } + } + } +} +``` + + +## Restore Backend Status + +Retrieve the status of a restore operation. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin/slash +``` + +### API Command + +```graphql +query ($restoreId: Int!) { + restoreStatus(restoreId: $restoreId) { + response { + status + errors + } + } +} +``` + +**Arguments** + +- `restoreId` (required): the id of the restore operation returned from + [Restore Backends](#restore-backends) request + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin/slash" \ + -H 'Content-Type: application/json' \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"query($restoreId: Int!) {\n restoreStatus(restoreId: $restoreId) {\n response {status errors}\n}\n}","variables":{"restoreId":1}}' \ + --compressed +``` + +```json +{ + "data": { + "restoreStatus": { + "response": { + "errors": [], + "status": "OK" + } + } + } +} +``` + + + +## Drop + +Drop (i.e., delete) all data in your backend. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin/slash +``` + +### API Command + +#### Drop Data + +```graphql +mutation { + dropData(allData: true) { + response { + code + message + } + } +} +``` + +#### Drop Schema + +```graphql +mutation { + dropData(allDataAndSchema: true) { + response { + code + message + } + } +} +``` + +#### Drop Types + +```graphql +mutation ($types: [String!]) { + dropData(types: $types) { + response { + code + message + } + } +} +``` + +**Arguments** + +- `types`: string array containing type Names + +#### Drop Fields + +```graphql +mutation ($fields: [String!]) { + dropData(fields: $fields) { + response { + code + message + } + } +} +``` + +**Arguments** + +- `fields`: string array containing field Names + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin/slash" \ + -H 'Content-Type: application/json' \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"mutation {\n dropData(allDataAndSchema: true) {\n response { code message }\n}\n}","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "dropData": { + "response": { + "code": "Success", + "message": "Done" + } + } + } +} +``` + + diff --git a/dgraph/reference/cloud/cloud-api/backup.mdx b/dgraph/reference/cloud/cloud-api/backup.mdx new file mode 100644 index 00000000..dbc4c363 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/backup.mdx @@ -0,0 +1,179 @@ +--- +title: Backup +--- + + + Backup feature is only available for Dedicated Instances. This feature is not + available for the Free and Shared Instances. + + +## Periodic Backups + +Periodic Backups are created at a given schedule that by default is: + +- Full Backup every week +- Incremental Backups every 4 hours + +You can trigger the Backup on-demand directly from your Dgraph Cloud Dashboard, +simply go to Admin>Setting>Backups and click on "Create Backup" button on the +top left. + +In case you would like to change your default Backup schedule please contact us +and we will be happy to set you up. + +## List Backups + +List all backups of the current backend. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin/slash +``` + +### API Command + +```graphql +query { + listBackups { + response { + type + backupNum + folder + timestamp + } + errors { + message + } + } +} +``` + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin/slash" \ + -H 'Content-Type: application/json' \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"{\n listBackups {\n response {\n type\n backupNum\n folder\n timestamp\n }, errors {\n message\n }\n} \n}","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "listBackups": { + "errors": [], + "response": [ + [ + { + "backupNum": 1, + "folder": "2021-15", + "timestamp": "2021-04-15T18:00:58+0000", + "type": "full" + }, + { + "backupNum": 2, + "folder": "2021-15", + "timestamp": "2021-04-15T18:04:29+0000", + "type": "incremental" + } + ] + ] + } + } +} +``` + + + +## Export Data + +Export data from your backend. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin/slash +``` + +### API Command + +```graphql +mutation { + export { + signedUrls + } +} +``` + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin/slash" \ + -H 'Content-Type: application/json' \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"mutation {\n export {\n signedUrls\n }\n }","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "export": { + "signedUrls": ["", "", ""] + } + } +} +``` + + + +## Import Data + +Import your data back using Dgraph +[Live Loader](./cloud/admin/import-export.md#importing-data-with-live-loader) +(requires Docker). + +### Shell Command + +Live loader command (via Docker): + +```sh +docker run -it --rm -v /tmp/file:/tmp/g01.json.gz dgraph/dgraph:v21.03-slash \ + dgraph live --slash_grpc_endpoint=${DEPLOYMENT_URL} -f /tmp/g01.json.gz -t ${DEPLOYMENT_JWT} +``` + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="lively-dream.grpc.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +docker run -it --rm -v /users/dgraph/downloads:/tmp dgraph/dgraph:v21.03-slash \ + dgraph live --slash_grpc_endpoint=${DEPLOYMENT_URL}:443 -f /tmp/1million.rdf.gz -t ${DEPLOYMENT_JWT} +``` + +```json + +``` + + diff --git a/dgraph/reference/cloud/cloud-api/index.mdx b/dgraph/reference/cloud/cloud-api/index.mdx new file mode 100644 index 00000000..d9e99a22 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/index.mdx @@ -0,0 +1,3 @@ +--- +title: Dgraph Cloud API +--- diff --git a/dgraph/reference/cloud/cloud-api/lambda.mdx b/dgraph/reference/cloud/cloud-api/lambda.mdx new file mode 100644 index 00000000..7a9ca3a8 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/lambda.mdx @@ -0,0 +1,247 @@ +--- +title: Lambda +--- + +# Introduction + +For Shared and Free backends, a Dgraph lambda script is uniquely identified by a +deployment id associated with the backend. You can identify your backend and the +associated deployment id by using the +[List Backends API](./backend#list-backends). In case of a Dedicated, and +multi-tenant backend, an additional key, the tenant id is also required to +uniquely identify a lambda script. This tenant id key is ignored when used in +the context of Shared and Free backends. + +As a first step, you will need to identify your backend. + +## List Deployment and Get Lambda Script + +Use the [List Backends API](./backend#list-backends) to identify your backend, +as well as get the lambda script deployed. In order to list the backends, you +will need to pass a Bearer token as an `Authorization` header. This token is +generated by logging in via the [Login query](./authentication#authentication) +first. + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +query { + deployments { + uid + name + zone + subdomain + url + tenantID + lambdaScript + } +} +``` + +**Arguments** + +None + +### Example + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl "https://cerebro.cloud.dgraph.io/graphql" \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"query{\n deployments {\n uid\n name\n zone\n subdomain\n url\n tenantID\n lambdaScript\n }\n}","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "deployments": [ + { + "uid": "0x6238", + "name": "OrderJourney", + "zone": "us-west-2", + "subdomain": "vacuous-wash", + "url": "vacuous-wash.us-west-2.aws.cloud.dgraph.io", + "tenantID": 107, + "lambdaScript": "Ly8gWW91IGNhbiB0eXBlL3Bhc3RlIHlvdXIgc2NyaXB0IGhlcmUKY29uc3QgTmFtZVJlc29sdmVyID0gKHtwYXJlbnQ6IHtuYW1lfX0pID0+IGBNeSBuYW1lIGlzICR7bmFtZX0uYAoKc2VsZi5hZGRHcmFwaFFMUmVzb2x2ZXJzKHsKICAgICJQZXJzb24ubmFtZSI6IE5hbWVSZXNvbHZlcgp9KQ==" + } + ] + } +} +``` + + + +The `uid` field in the response of the query is to be used as the deployment id. +In the above example, the deployment id for the backend with name "OrderJourney" +is "0x6238". The field `lambdaScript` contains the lambda script in the form of +a Base64 encoded string. + +#### Decode the Base64 encoded `lambdaScript` + +In order to decode the Base64 encoded string into the actual lambda code, please +use the command as shown below. + +```bash +$ echo "Ly8gWW91IGNhbiB0eXBlL3Bhc3RlIHlvdXIgc2NyaXB0IGhlcmUKY29uc3QgTmFtZVJlc29sdmVyID0gKHtwYXJlbnQ6IHtuYW1lfX0pID0+IGBNeSBuYW1lIGlzICR7bmFtZX0uYAoKc2VsZi5hZGRHcmFwaFFMUmVzb2x2ZXJzKHsKICAgICJQZXJzb24ubmFtZSI6IE5hbWVSZXNvbHZlcgp9KQ==" | base64 -d +``` + +**Output** + +```js +// You can type/paste your script here +const NameResolver = ({ parent: { name } }) => `My name is ${name}.` + +self.addGraphQLResolvers({ + "Person.name": NameResolver, +}) +``` + +## Lambda Logs + +You can fetch the logs for your lambda by using the `getLambdaLogs` query. + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +query GetLambdaLogs($lambdaLogsInput: LambdaLogsInput!) { + getLambdaLogs(input: $lambdaLogsInput) +} +``` + +**Arguments** + +- `lambdaLogsInput`: a LambdaLogsInput object +- `lambdaLogsInput.deploymentID`: the deployment UID returned from [List + Backends]((./backend#list-backends) +- `lambdaLogsInput.tenantID`: In case of a multi-tenant, and dedicated backend, + you will need to pass the tenant Id as well +- `lambdaLogsInput.start`: start time +- `lambdaLogsInput.end`: end time + +### Example + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl "https://cerebro.cloud.dgraph.io/graphql" \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"query GetLambdaLogs($input: LambdaLogsInput!) {\n getLambdaLogs(input: $input)\n}","variables":{"input":{"deploymentID":"0xf0ffe9"}}}' \ + --compressed +``` + +```json +{ + "data": { + "getLambdaLogs": [ + "2021-04-16 19:03:54.009209524 +0000 UTC Server Listening on port 8686!", + "2021-04-16 19:03:54.202216548 +0000 UTC Server Listening on port 8686!", + "2021-04-16 19:03:54.51171317 +0000 UTC Server Listening on port 8686!", + "2021-04-16 19:03:54.707496343 +0000 UTC Server Listening on port 8686!" + ] + } +} +``` + + + +## Update or Delete Lambda + +You can update or delete your lambda by using the `updateLambda` mutation. You +will need to pass a deployment id to uniquely identify your lambda. In case your +backend is multi-tenant, you will need to pass the tenant Id as well. + +In order to update your lambda, you would need to convert your lamdba script +into a Base64 encoded format and send it as the `lambdaScript` argument. In +order to delete your lambda, you can simply send in an empty string in the +`lambdaScript` argument. + +### Cloud Endpoint + +``` +https://cerebro.cloud.dgraph.io/graphql +``` + +### API Command + +```graphql +mutation updateLambda($input: UpdateLambdaInput!) { + updateLambda(input: $input) +} +``` + +**Arguments** + +- `updateLambdaInput`: an UpdateLambdaInput object +- `updateLambdaInput.deploymentID`: the deployment UID returned from [List + Backends]((./backend#list-backends) +- `updateLambdaInput.tenantID`: the tenant ID in case your backend is a + dedicated, and multi-tenant backend. In case you have any other type of + backend, you can leave it as the default `0` value +- `updateLambdaInput.lambdaScript`: the base64-encoded Javascript string + containing your [Lambda Resolver](./lambda-overview) + +### Example + +1. Create your [Lambda Resolver](./lambda-overview) script + +```js +//your lambda resolver +``` + +2. Base64 encode your script + +```bash +$ echo "//your lambda resolver" | base64 + +Ly95b3VyIGxhbWJkYSByZXNvbHZlcgo= +``` + +3. Send in a cURL request + + + +```bash +#!/usr/bin/env bash + +CEREBRO_JWT="" + +curl "https://cerebro.cloud.dgraph.io/graphql" \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer ${CEREBRO_JWT}" \ + --data-binary '{"query":"mutation updateLambda($input: UpdateLambdaInput!){\n updateLambda(input: $input)\n}","variables":{"input":{"deploymentID":"0x6238","tenantID":0,"lambdaScript":"Ly95b3VyIGxhbWJkYSByZXNvbHZlcgo="}}}' \ + --compressed +``` + +```json +{ + "data": { + "updateLambda": "Successfully added the lambda function" + } +} +``` + + diff --git a/dgraph/reference/cloud/cloud-api/overview.mdx b/dgraph/reference/cloud/cloud-api/overview.mdx new file mode 100644 index 00000000..e82381f6 --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/overview.mdx @@ -0,0 +1,69 @@ +--- +title: Overview +--- + +Dgraph Cloud now includes an API so you can programmatically launch and manage +your Cloud backups. + +The majority of these APIs use `https://cerebro.cloud.dgraph.io/graphql` as the +primary endpoint, and will require you to log in with a username and password. +Please see [Authentication](./authentication) for instructions on generating a +JWT token. + +### Commands + +Please see the following topics: + +- [Authentication](./authentication) describes how to authenticate with the + Dgraph Cloud API. +- [Backend](./backend) lists commands related to backend. +- [Backup](./backup) lists commands related to backup. +- [Lambda](./lambda) lists commands related to Lambda. +- [Schema](./schema) lists commands related to schema. + +## Understanding Headers used across the APIs + +Dgraph Cloud has two layers of security: Dgraph Cloud Authentication and Dgraph +Access Control Lists Authentication. The following section introduces the usage +of the headers involved in the authentication process. These include the +`Dg-Auth`, `X-Auth-Token`, `Authorization`, and `X-Dgraph-AccessToken` headers. + +### Dgraph Cloud Authentication + +The `Dg-Auth` or `X-Auth-Token` headers are for Dgraph Cloud’s API key +authentication (where you pass in any API key you would generate from the +["API Keys" tab](https://cloud.dgraph.io/_/settings?tab=api-keys) on the +Settings page). The API key passed can be one of two kinds: Admin API key or +Client API key. The tokens generated from the admin API grant access to the +`/admin` or `/admin/slash` endpoints to perform schema alterations and similar +operations. The tokens generated via the Client API key provides access to the +`/graphql` endpoint to run GraphQL queries and mutations. + +Dgraph Cloud also offers the Dgraph Cloud API, hosted at +[this endpoint](https://cerebro.cloud.dgraph.io/graphql), that helps to automate +tasks such as deployment of a lambda. In order to use this API, users need to +pass an `Authorization` header. In order to generate this header, the user must +first [Authenticate](./authentication) and generate a token. The token is then +set in `Authorization` header as a Bearer token (e.g. `Bearer {token}`). + + + The `Dg-Auth`, `X-Auth-Token` and the `Authorization` headers are relevant to + all types of backends, including Free, Shared, and Dedicated Backends. + + +### Dgraph Access Control Lists Authentication + +The `X-Dgraph-AccessToken` header is used for accessing backends using Dgraph’s +Access Control Lists or the Multitenancy feature. This lets you pass in an +access JWT generated via a login mutation for a Dgraph user from the access +control list permissions and/or log into a specific namespace with +multi-tenancy. The Login mutation relevant for ACL is documented +[here](https://dgraph.io/docs/enterprise-features/access-control-lists/#logging-in). + +If you’re using ACLs or multitenancy, then you’ll need to set the +`X-Dgraph-AccessToken` with a JWT token to access your backend. + + + The `X-Dgraph-AccessToken` header is relevant only for the Dedicated backends. + Users with Free or Shared backends can ignore this header. + diff --git a/dgraph/reference/cloud/cloud-api/schema.mdx b/dgraph/reference/cloud/cloud-api/schema.mdx new file mode 100644 index 00000000..f0df69ea --- /dev/null +++ b/dgraph/reference/cloud/cloud-api/schema.mdx @@ -0,0 +1,146 @@ +--- +title: Schema +--- + +## Get Schema + +Fetch the schema from your backend. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin +``` + +### API Command + +```graphql +{ + getGQLSchema { + schema + generatedSchema + } +} +``` + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin" \ + -H "Content-Type: application/json" \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"{\n getGQLSchema {\n schema\n generatedSchema\n }\n}","variables":{}}' \ + --compressed +``` + +```json +{ + "data": { + "getGQLSchema": { + "schema": "type Person { name: String! }", + "generatedSchema": "" + } + }, + "extensions": { + "tracing": { + "version": 1, + "startTime": "2021-04-15T19:58:33.412544782Z", + "endTime": "2021-04-15T19:58:33.412851891Z", + "duration": 307129, + "execution": { + "resolvers": [ + { + "path": ["getGQLSchema"], + "parentType": "Query", + "fieldName": "getGQLSchema", + "returnType": "GQLSchema", + "startOffset": 115909, + "duration": 159961, + "dgraph": [ + { + "label": "query", + "startOffset": 118110, + "duration": 53165 + } + ] + } + ] + } + } + } +} +``` + + + +## Update Schema + +Update the schema in your backend. + +### Cloud Endpoint + +```bash +https://${DEPLOYMENT_URL}/admin +``` + +### API Command + +```graphql +mutation ($schema: String!) { + updateGQLSchema(input: { set: { schema: $schema } }) { + gqlSchema { + schema + } + } +} +``` + +**Arguments** + +- `schema`: your desired schema string in GraphQL format + +### Example + + + +```bash +#!/usr/bin/env bash + +DEPLOYMENT_URL="polished-violet.us-east-1.aws.cloud.dgraph.io" +DEPLOYMENT_JWT="" + +curl "https://${DEPLOYMENT_URL}/admin" \ + -H "Content-Type: application/json" \ + -H "X-Auth-Token: ${DEPLOYMENT_JWT}" \ + --data-binary '{"query":"mutation($sch: String!) {\n updateGQLSchema(input: { set: { schema: $sch } })\n {\n gqlSchema {\n schema\n }\n }\n}","variables":{"sch": "type Person { name: String! }"}}' \ + --compressed +``` + +```json +{ + "data": { + "updateGQLSchema": { + "gqlSchema": { + "schema": "type Person { name: String! }" + } + } + }, + "extensions": { + "tracing": { + "version": 1, + "startTime": "2021-04-15T19:53:16.283198298Z", + "endTime": "2021-04-15T19:53:16.286478152Z", + "duration": 3279886 + } + } +} +``` + + diff --git a/dgraph/reference/cloud/cloud-multitenancy.mdx b/dgraph/reference/cloud/cloud-multitenancy.mdx new file mode 100644 index 00000000..96f8e087 --- /dev/null +++ b/dgraph/reference/cloud/cloud-multitenancy.mdx @@ -0,0 +1,83 @@ +--- +title: Multi-tenancy in Dgraph Cloud +--- + +Multi-Tenancy in Dgraph cloud is a dedicated cluster feature. It is not +supported in free/shared clusters. Multi-tenancy is built upon Access Control +Lists (ACL), and enables multiple tenants to share a Dgraph cluster using unique +namespaces. The tenants are logically separated, and their data lies in the same +p directory. Each namespace has a group +[guardian(admin)](https://dgraph.io/docs/enterprise-features/multitenancy/#guardians-of-the-galaxy), +which has root access to that namespace. +[Read more about Multi-Tenancy in Dgraph](https://dgraph.io/docs/enterprise-features/multitenancy/) + +### Enabling Multi-Tenancy in Dgraph Cloud + +In order to enable multi-tenancy you need to do the following - + +1. **Enable ACL** + + - If you are launching a dedicated cluster for the first time, you need to + click on the checkbox for ACLs under `Additional Settings`. + - For existing dedicated cluster, you need to go to Settings>Modify Backend + and click on the checkbox for ACLs under `Additional Settings`. Note - This + restarts your backend hence causing some downtime. + ![ACL](/images/cloud/multitenancy/acl.png) + +2. **Create namespaces** + + - Go to `Namespaces` page under `Admin` section on the sidebar to the left. + - Click on `Create New` button, enter the description of the namespace and + click `Create`. + - You can view the created namespaces on the `Namespaces` page. + - Please note that Tenant 0 cannot be deleted. + ![namespacepage](/images/cloud/multitenancy/namespacepage.png) + +3. **Toggle namespaces** + - Now you should be able to see a select box next to selected backend select + box on the navbar. + - You can switch between namespaces with the selection. + - Also, each namespace has its own Schema, ACLs & Lambdas. Also has its own + view of Dgraph Studio, API Explorer, DQL Page. + - After switching namespaces, you can update view/update the namespace's + schema and query/mutate as well. + ![namespaces](/images/cloud/multitenancy/namespaces.png) + +### Accessing namespaces via client + +Namespaces are protected via ACLs. You need to create a user with username & +password in the namespace. This can be done using `ACLs` page under `Admin` +section on the sidebar to the left. + +**Example - we want to give all read access to a user for namespace** + +1. First, you should create an ACL group and select all predicates that you want + to provision read access. ![group](/images/cloud/multitenancy/group.png) +2. Next, create a user with access to the created group. While creating the + user, you will be asked to provide with a username & password. Keep it handy. + ![user](/images/cloud/multitenancy/user.png) +3. You can go to the APIe Explorer and user the login mutation to fetch the API + access token. Click on admin radio button at the top to query the admin + endpoint. Use the mutation shown below to get the access token. (Note - + NamespaceID can be found on the namespace page) + + ``` + mutation MyMutation { + login(namespace: 1, password: "password", userId: "userID") { + response { + accessJWT + refreshJWT + } + } + } + + ``` + +4. You have the access token which you need to pass it in `X-Dgraph-AccessToken` + header. +5. On client side you will need to use the above mutation programmatically to + generate the access token for your namespace. +6. If you are using a [dgraph client](./dql/clients) you need to set the + username & password and the client handles fetching the token & refresh logic + for you. Note - Most of the dgraph clients will have a special method to + login to a specific namespace. diff --git a/dgraph/reference/cloud/index.mdx b/dgraph/reference/cloud/index.mdx new file mode 100644 index 00000000..720e26e8 --- /dev/null +++ b/dgraph/reference/cloud/index.mdx @@ -0,0 +1,3 @@ +--- +title: Dgraph Cloud +--- diff --git a/dgraph/reference/cloud/introduction.mdx b/dgraph/reference/cloud/introduction.mdx new file mode 100644 index 00000000..3d349943 --- /dev/null +++ b/dgraph/reference/cloud/introduction.mdx @@ -0,0 +1,99 @@ +--- +title: Dgraph Cloud Overview +--- + +## Dgraph + +Designed from day one to be distributed for scale and speed, **Dgraph** is the +native Graph database with native GraphQL support. It is open-source, scalable, +distributed, highly-available, and lightning fast. + +Dgraph is different from other graph databases in a number of ways, including: + +- **Distributed Scale**:   _Built from day 1 to be distributed, to handle + larger data sets._ + +- **GraphQL Support**:   _GraphQL is built in to make data access simple + and standards-compliant. Unlike most GraphQL solutions, no resolvers are + needed - Dgraph resolves queries automatically through graph navigation._ + +- **Fully Transactional and ACID Compliant**:   _Dgraph satisfies demanding + OLTP workloads that require frequent inserts and updates._ + +- **Language support & Text Search**:   _Full-text searching is included + and strings can be expressed in multiple languages_ + +- **Gelocation data and geo querie**:   _Dgraph can store Points and Shapes + and queries can use near, within, contains, intersects geo functions_ + +More details at [Dgraph Database Overview](./dgraph-overview) + +## Dgraph Cloud + +Dgraph Cloud gives you the power of Dgraph database including performance, +high-availability, horizontally-scalability and the support of GraphQL for rapid +application development in a hosted and fully-managed environment. Dgraph Cloud +lets you focus on building apps, not managing infrastructure. + +### Dgraph Cloud Cluster Types + +- **Shared Instance**: Dgraph Cloud with + [shared instances](https://cloud.dgraph.io/pricing?type=shared) is a fast and + easy way to get started with GraphQL, and does not require any graph database + knowledge to start and run. Shared instances run in a common database using + Dgraph multi-tenancy. Your data is protected but you share resources and will + have limited scale. + +- **Dedicated instances** run on their own dedicated hardware to ensure + consistent performance. This option extends the capabilities of the lower-cost + shared instances to support enterprise, production workloads, and includes a + high availability option. + +## Key features + +| Feature | Notes | +| :-------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Production-ready | Dgraph Cloud is built to meet the needs of your business as it grows with built-in authorization, encryption at rest, TLS, incremental backups, and more. | +| Scale and expand your app without rebuilding your backend | Dgraph Cloud stores and distributes data to optimize the execution of GraphQL traversals, joins, and retrievals. Dgraph natively parses and executes GraphQL, achieving great performance and the ability to scale horizontally up to terabytes of data. | +| A high-performance, graph-first database | Dgraph Cloud runs Dgraph database, which is built to support the needs of modern apps with lightning-fast queries at any depth. | +| Custom logic | Use JavaScript to add programmatic custom logic on the backend, adding power to your apps without sacrificing client-side performance. | +| Power your app with native GraphQL support | Dgraph is built for graph data, so you don’t need to configure and maintain a cumbersome GraphQL layer over a traditional relational database. | +| Evolve your schema without downtime | When it comes time to deploy a new schema, you can do that in seconds, not hours. | +| GraphQL-based authorization and management | GraphQL is used throughout Dgraph Cloud, so you don’t need to use another tool or learn another syntax to handle user authorization or database administration tasks such as schema changes and data exports. | +| Work with the Open Source ecosystem | Because Dgraph is open-source, your app relies on a codebase that you can contribute to, not an opaque “black box”. | + +## Next steps + +To learn more about how Dgraph Cloud makes it easier to develop apps, create a +trial account at [Dgraph Cloud](https://cloud.dgraph.io) and try the +[Introduction to GraphQL](https://dgraph.io/tour/graphqlintro/2/) tutorial to +define a GraphQL schema, insert and query data in just a few minutes. + +## Recommended Reading + +Please see the following topics to learn more about how to use Dgraph Cloud: + +- [Administering your Backend](./admin/_index) covers topics such as how to + programmatically set your schema, and import or export your data + - [Authentication](./admin/authentication) will guide you in creating a API + token. Since all admin APIs require an auth token, this is a good place to + start. + - [Schema](./admin/schema) describes how to programmatically query and update + your GraphQL schema. + - [Import and Exporting Data](./admin/import-export) is a guide for exporting + your data from a Dgraph Cloud backend, and how to import it into another + cluster + - [Dropping Data](./admin/drop-data) will guide you through dropping all data + from your Dgraph Cloud backend. + - [Switching Schema Modes](./admin/schema-modes) will guide you through + changing Dgraph Cloud schema mode. +- [Dgraph Cloud API](./cloud-api/overview) Dgraph Cloud now includes a API so + you can programmatically manage your backends +- [Schema](./cloud-api/schema) lists commands related to schema. + +You might also be interested in: + +- [Dgraph GraphQL Schema Reference](./graphql/schema), which lists all the types + and directives supported by Dgraph +- [Dgraph GraphQL API Reference](./graphql-clients), which serves as a guide to + using your new `/graphql` endpoint diff --git a/dgraph/reference/cloud/migrating-from-hosted-dgraph.mdx b/dgraph/reference/cloud/migrating-from-hosted-dgraph.mdx new file mode 100644 index 00000000..fde93a65 --- /dev/null +++ b/dgraph/reference/cloud/migrating-from-hosted-dgraph.mdx @@ -0,0 +1,14 @@ +--- +title: Migrating from Self-Managed Dgraph +--- + +Dgraph Cloud is compatible with the majority of Dgraph features, so you can +easily migrate your existing Dgraph-powered app over to Dgraph Cloud. + +### To migrate data from self-managed Dgraph to Dgraph Cloud + +1. Create a new backend. You can do this using the Dgraph Cloud interface. +2. (optional) Switch your backend to + [flexible mode](/admin/schema-modes#flexible-mode). +3. Connect to your backend with your favorite client. To learn more, see + [Connecting from Dgraph Clients](/advanced-queries#connecting-from-dgraph-clients). diff --git a/dgraph/reference/cloud/provision-backend.mdx b/dgraph/reference/cloud/provision-backend.mdx new file mode 100644 index 00000000..c5fcc9af --- /dev/null +++ b/dgraph/reference/cloud/provision-backend.mdx @@ -0,0 +1,37 @@ +--- +title: Provision a backend +--- + +### Before you begin + +Log in to [Dgraph Cloud](https://cloud.dgraph.io) using **Sign in with Google**, +**Sign in with GitHub** or any other email account that you prefer to use. + +### Provision a backend + +1. After you have signed up and verified you email, log into + [Dgraph Cloud](https://cloud.dgraph.io/) and you'll arrive at the dashboard + screen. + + ![first login empty +dashboard](https://graphql.dgraph.io/images/cloud/dgraph-cloud-empty-dashboard.png) + +2. Click the **Launch New Backend** button and you'll be taken to a screen to + enter the details of the backend. + +Name the backend, optionally set a subdomain (if left blank, Dgraph Cloud picks +a random domain for you), and then pick a region to deploy you GraphQL backend +to. + +![launch a backend](https://graphql.dgraph.io/images/cloud/dgraph-cloud-launch-backend.png) + +3. click **Launch** and your backend will be deployed in a few seconds. + +![Dgraph Cloud console](https://graphql.dgraph.io/images/cloud/dgraph-cloud-backend-live.png) + +That's it! You now have a running Dgraph backend. Time to build an app. + +The URL listed in "GraphQL Endpoint" is the URL at which Dgraph Cloud will serve +data to your app. + +You can copy it at any time to use in a GraphQL client application. diff --git a/dgraph/reference/deploy/admin/data-compression.mdx b/dgraph/reference/deploy/admin/data-compression.mdx new file mode 100644 index 00000000..dece16b2 --- /dev/null +++ b/dgraph/reference/deploy/admin/data-compression.mdx @@ -0,0 +1,40 @@ +--- +title: Data compression on Disk +--- + +Dgraph Alpha lets you configure the compression of data on disk using the +`--badger` superflag's `compression` option. You can choose between the +[Snappy](https://github.com/golang/snappy) and +[Zstandard](https://github.com/facebook/zstd) compression algorithms, or choose +not to compress data on disk. + + + This option replaces the `--badger.compression_level` and + `--badger.compression` options used in earlier Dgraph versions. + + +The following disk compression settings are available: + +| Setting | Notes | +| ------------ | -------------------------------------------------------------------- | +| `none` | Data on disk will not be compressed. | +| `zstd:level` | Use Zstandard compression, with a compression level specified (1-3). | +| `snappy` | Use Snappy compression (this is the default value). | + +For example, you could choose to use Zstandard compression with the highest +compression level using the following command: + +```sh +dgraph alpha --badger compression=zstd:3 +``` + +This compression setting (Zstandard, level 3) is more CPU-intensive than other +options, but offers the highest compression ratio. To change back to the default +compression setting, use the following command: + +```sh +dgraph alpha --badger compression=snappy +``` + +Using this compression setting (Snappy) provides a good compromise between the +need for a high compression ratio and efficient CPU usage. diff --git a/dgraph/reference/deploy/admin/dgraph-administration.mdx b/dgraph/reference/deploy/admin/dgraph-administration.mdx new file mode 100644 index 00000000..ad4b6867 --- /dev/null +++ b/dgraph/reference/deploy/admin/dgraph-administration.mdx @@ -0,0 +1,319 @@ +--- +title: Dgraph Administration +--- + +Each Dgraph Alpha exposes various administrative (admin) endpoints both over +HTTP and GraphQL, for example endpoints to export data and to perform a clean +shutdown. All such admin endpoints are protected by three layers of +authentication: + +1. IP White-listing (use the `--security` superflag's `whitelist` option on + Dgraph Alpha to whitelist IP addresses other than localhost). +2. Poor-man's auth, if Dgraph Alpha is started with the `--security` superflag's + `token` option, then you should pass the token as an `X-Dgraph-AuthToken` + header while making the HTTP request. +3. Guardian-only access, if ACL is enabled. In this case you should pass the + ACL-JWT of a Guardian user using the `X-Dgraph-AccessToken` header while + making the HTTP request. + +An admin endpoint is any HTTP endpoint which provides admin functionality. Admin +endpoints usually start with the `/admin` path. The current list of admin +endpoints includes the following: + +- `/admin` +- `/admin/config/cache_mb` +- `/admin/draining` +- `/admin/shutdown` +- `/admin/schema` +- `/admin/schema/validate` +- `/alter` +- `/login` + +There are a few exceptions to the general rule described above: + +- `/login`: This endpoint logs-in an ACL user, and provides them with a JWT. + Only IP Whitelisting and Poor-man's auth checks are performed for this + endpoint. +- `/admin`: This endpoint provides GraphQL queries/mutations corresponding to + the HTTP admin endpoints. All of the queries/mutations on `/admin` have all + three layers of authentication, except for `login (mutation)`, which has the + same behavior as the above HTTP `/login` endpoint. + +## Whitelisting Admin Operations + +By default, admin operations can only be initiated from the machine on which the +Dgraph Alpha runs. + +You can use the `--security` superflag's `whitelist` option to specify a +comma-separated whitelist of IP addresses, IP ranges, CIDR ranges, or hostnames +for hosts from which admin operations can be initiated. + +**IP Address** + +```sh +dgraph alpha --security whitelist=127.0.0.1 ... +``` + +This would allow admin operations from hosts with IP 127.0.0.1 (i.e., localhost +only). + +**IP Range** + +```sh +dgraph alpha --security whitelist=172.17.0.0:172.20.0.0,192.168.1.1 ... +``` + +This would allow admin operations from hosts with IP between `172.17.0.0` and +`172.20.0.0` along with the server which has IP address as `192.168.1.1`. + +**CIDR Range** + +```sh +dgraph alpha --security whitelist=172.17.0.0/16,172.18.0.0/15,172.20.0.0/32,192.168.1.1/32 ... +``` + +This would allow admin operations from hosts that matches the CIDR range +`172.17.0.0/16`, `172.18.0.0/15`, `172.20.0.0/32`, or `192.168.1.1/32` (the same +range as the IP Range example). + +You can set whitelist IP to `0.0.0.0/0` to whitelist all IP addresses. + +**Hostname** + +```sh +dgraph alpha --security whitelist=admin-bastion,host.docker.internal ... +``` + +This would allow admin operations from hosts with hostnames `admin-bastion` and +`host.docker.internal`. + +## Restrict Mutation Operations + +By default, you can perform mutation operations for any predicate. If the +predicate in mutation doesn't exist in the schema, the predicate gets added to +the schema with an appropriate [Dgraph Type](./dql-schema). + +You can use `--limit "mutations=disallow"` to disable all mutations, which is +set to `allow` by default. + +```sh +dgraph alpha --limit "mutations=disallow;" +``` + +Enforce a strict schema by setting `--limit "mutations=strict`. This mode allows +mutations only on predicates already in the schema. Before performing a mutation +on a predicate that doesn't exist in the schema, you need to perform an alter +operation with that predicate and its schema type. + +```sh +dgraph alpha --limit "mutations=strict; mutations-nquad=1000000" +``` + +## Secure Alter Operations + +Clients can use alter operations to apply schema updates and drop particular or +all predicates from the database. By default, all clients are allowed to perform +alter operations. You can configure Dgraph to only allow alter operations when +the client provides a specific token. You can use this "Simple ACL" token to +prevent clients from making unintended or accidental schema updates or predicate +drops. + +You can specify the auth token with the `--security` superflag's `token` option +for each Dgraph Alpha in the cluster. Clients must include the same auth token +to make alter requests. + +```sh +$ dgraph alpha --security token= +``` + +```sh +$ curl -s localhost:8080/alter -d '{ "drop_all": true }' +# Permission denied. No token provided. +``` + +```sh +$ curl -s -H 'X-Dgraph-AuthToken: ' localhost:8080/alter -d '{ "drop_all": true }' +# Permission denied. Incorrect token. +``` + +```sh +$ curl -H 'X-Dgraph-AuthToken: ' localhost:8080/alter -d '{ "drop_all": true }' +# Success. Token matches. +``` + + + To fully secure alter operations in the cluster, the authentication token must + be set for every Alpha node. + + +## Export database + +As an `Administrator` you might want to export data from Dgraph to: + +- backup your data +- move the data from Dgraph Cloud instance to another Dgraph instance, or Dgraph + Cloud instance +- share your data + +For more information about exporting your database, see +[Export data](./howto/exportdata/about-export) + +## Shut down database + +A clean exit of a single Dgraph node is initiated by running the following +GraphQL mutation on /admin endpoint. + + + This won't work if called from outside the server where Dgraph is running. You + can specify a list or range of whitelisted IP addresses from which shutdown or + other admin operations can be initiated using the `--security` superflag's + `whitelist` option on `dgraph alpha`. + + +```graphql +mutation { + shutdown { + response { + message + code + } + } +} +``` + +This stops the Alpha on which the command is executed and not the entire +cluster. + +## Delete database + +To drop all data, you could send a `DropAll` request via `/alter` endpoint. + +Alternatively, you could: + +- [Shutdown Dgraph](./#shut-down-database) and wait for all writes to complete, +- Delete (maybe do an export first) the `p` and `w` directories, then +- Restart Dgraph. + +## Upgrade database + +Doing periodic exports is always a good idea. This is particularly useful if you +wish to upgrade Dgraph or reconfigure the sharding of a cluster. The following +are the right steps to safely export and restart. + +1. Start an [export](./#export-database) +2. Ensure it is successful +3. [Shutdown Dgraph](./#shut-down-database) and wait for all writes to complete +4. Start a new Dgraph cluster using new data directories (this can be done by + passing empty directories to the options `-p` and `-w` for Alphas and `-w` + for Zeros) +5. Reload the data via [bulk loader](./bulk-loader) +6. Verify the correctness of the new Dgraph cluster. If all looks good, you can + delete the old directories (export serves as an insurance) + +These steps are necessary because Dgraph's underlying data format could have +changed, and reloading the export avoids encoding incompatibilities. + +Blue-green deployment is a common approach to minimize downtime during the +upgrade process. This approach involves switching your application to read-only +mode. To make sure that no mutations are executed during the maintenance window +you can do a rolling restart of all your Alpha using the option +`--mutations disallow` when you restart the Alpha nodes. This will ensure the +cluster is in read-only mode. + +At this point your application can still read from the old cluster and you can +perform the steps 4. and 5. described above. When the new cluster (that uses the +upgraded version of Dgraph) is up and running, you can point your application to +it, and shutdown the old cluster. + +### Upgrade from v1.2.2 to v20.03.0 for Enterprise customers + +{/* TODO: Redirect(s) */} + +1. Use [binary backup](./enterprise-features/binary-backups.md) to export data + from old cluster +2. Ensure it is successful +3. [Shutdown Dgraph](./#shut-down-database) and wait for all writes to complete +4. Upgrade `dgraph` binary to `v20.03.0` +5. [Restore](/enterprise-features/binary-backups.md#restore-from-backup) from + the backups using upgraded `dgraph` binary +6. Start a new Dgraph cluster using the restored data directories +7. Upgrade ACL data using the following command: + +```sh +dgraph upgrade --acl -a localhost:9080 -u groot -p password +``` + +### Upgrade from v20.03.0 to v20.07.0 for Enterprise customers + +1. Use [binary backup](./enterprise-features/binary-backups.md) to export data + from old cluster +2. Ensure it is successful +3. [Shutdown Dgraph](./#shut-down-database) and wait for all writes to complete +4. Upgrade `dgraph` binary to `v20.07.0` +5. [Restore](/enterprise-features/binary-backups.md#restore-from-backup) from + the backups using upgraded `dgraph` binary +6. Start a new Dgraph cluster using the restored data directories +7. Upgrade ACL data using the following command: + + ```sh + dgraph upgrade --acl -a localhost:9080 -u groot -p password -f v20.03.0 -t v20.07.0 + ``` + + This is required because previously the type-names `User`, `Group` and `Rule` + were used by ACL. They have now been renamed as `dgraph.type.User`, + `dgraph.type.Group` and `dgraph.type.Rule`, to keep them in Dgraph's internal + namespace. This upgrade just changes the type-names for the ACL nodes to the + new type-names. + + You can use `--dry-run` option in `dgraph upgrade` command to see a dry run + of what the upgrade command will do. + +8. If you have types or predicates in your schema whose names start with + `dgraph.`, then you would need to manually alter schema to change their names + to something else which isn't prefixed with `dgraph.`, and also do mutations + to change the value of `dgraph.type` edge to the new type name and copy data + from old predicate name to new predicate name for all the nodes which are + affected. Then, you can drop the old types and predicates from DB. + +### Upgrade from v20.11.0 to v21.03.0 for Enterprise customers + +1. Use [binary backup](./enterprise-features/binary-backups.md) to export data + from the old cluster +2. Ensure it is successful +3. [Shutdown Dgraph](./#shut-down-database) and wait for all writes to complete +4. Upgrade `dgraph` binary to `v21.03.0` +5. [Restore](/enterprise-features/binary-backups.md#restore-from-backup) from + the backups using the upgraded `dgraph` binary +6. Start a new Dgraph cluster using the restored data directories +7. Upgrade the CORS and persisted queries. To upgrade an ACL cluster use: + + ```sh + dgraph upgrade --from v20.11.0 --to v21.03.0 --user groot --password password --alpha http://localhost:9080 --alpha-http http://localhost:8080 --deleteOld + ``` + + To upgrade a non-ACL cluster use: + + ```sh + dgraph upgrade --from v20.11.0 --to v21.03.0 --alpha http://localhost:9080 --alpha-http http://localhost:8080 --deleteOld + ``` + + This is required because previously CORS information was stored in + `dgraph.cors` predicate which has now been moved to be a part of the GraphQL + schema. Also, the format of persisted queries has changed. Some of the + internal deprecated predicates will be removed by this change. + + You can use `--dry-run` option in `dgraph upgrade` command to see a dry run + of what the upgrade command will do. + + + The above steps are valid for migration from a cluster in `v20.11` to a + single-tenant cluster in `v21.03`, as backup and restore are cluster-wide + operations and a single namespace cannot be restored in a multi-tenant + cluster. + + +## Post Installation + +Now that Dgraph is up and running, to understand how to add and query data to +Dgraph, follow [Query Language Spec](/query-language). Also, have a look at +[Frequently asked questions](/faq). diff --git a/dgraph/reference/deploy/admin/index.mdx b/dgraph/reference/deploy/admin/index.mdx new file mode 100644 index 00000000..9f7ab9b1 --- /dev/null +++ b/dgraph/reference/deploy/admin/index.mdx @@ -0,0 +1,3 @@ +--- +title: Administration +--- diff --git a/dgraph/reference/deploy/admin/log-format.mdx b/dgraph/reference/deploy/admin/log-format.mdx new file mode 100644 index 00000000..d23b279b --- /dev/null +++ b/dgraph/reference/deploy/admin/log-format.mdx @@ -0,0 +1,130 @@ +--- +title: Logging +description: + Dgraph logs requests for queries and mutations, and also provides audit + logging capabilities with a Dgraph Enterprise license +--- + +Dgraph logs requests for queries and mutations, and also provides audit logging +capabilities with a Dgraph [enterprise license](/enterprise-features/license). + +Dgraph's log format comes from the glog library and is +[formatted](https://github.com/golang/glog/blob/23def4e6c14b4da8ac2ed8007337bc5eb5007998/glog.go#L523-L533) +as follows: + +``` +Lmmdd hh:mm:ss.uuuuuu threadid file:line] msg... +``` + +The fields shown above are defined as follows: + +| Field | Definition | +| ----------------- | ---------------------------------------------------------------- | +| `L` | A single character, representing the log level (eg 'I' for INFO) | +| `mm` | Month (zero padded; ie May is '05') | +| `dd` | Day (zero padded) | +| `hh:mm:ss.uuuuuu` | Time in hours, minutes and fractional seconds | +| `threadid` | Space-padded thread ID as returned by GetTID() | +| `file` | Filename | +| `line` | Line number | +| `msg` | User-supplied message | + +## Log verbosity + +To increase log verbosity, set the flag `-v=3` (or `-v=2`) which will enable +verbose logging for everything. You can set this flag on both Zero and Alpha +nodes. + +Changing log verbosity requires a restart of the node. + +## Request logging + +Request logging, sometimes called _query logging_, lets you log queries and +mutations. You can dynamically turn request logging on or off. To toggle request +logging on, send the following GraphQL mutation to the `/admin` endpoint of an +Alpha node (e.g. `localhost:8080/admin`): + +```graphql +mutation { + config(input: { logDQLRequest: true }) { + response { + code + message + } + } +} +``` + +Note this input flag was named logRequest until Dgraph version v23. + +The response should look like the following: + +```json +{ + "data": { + "config": { + "response": { + "code": "Success", + "message": "Config updated successfully" + } + } + }, + "extensions": { + "tracing": { + "version": 1, + "startTime": "2020-12-07T14:53:28.240420495Z", + "endTime": "2020-12-07T14:53:28.240569604Z", + "duration": 149114 + } + } +} +``` + +Also, the Alpha node will print the following INFO message to confirm that the +mutation has been applied: + +``` +I1207 14:53:28.240516 20143 config.go:39] Got config update through GraphQL admin API +``` + +When enabling request logging this prints the requests that Dgraph Alpha +receives from Ratel or other clients. In this case, the Alpha log will print +something similar to: + +``` +I1201 13:06:26.686466 10905 server.go:908] Got a query: query:"{\n query(func: allofterms(name@en, \"Marc Caro\")) {\n uid\n name@en\n director.film\n }\n}" +``` + +As you can see, we got the query that Alpha received. To read it in the original +DQL format just replace every `\n` with a new line, any `\t` with a tab +character and `\"` with `"`: + +``` +{ + query(func: allofterms(name@en, "Marc Caro")) { + uid + name@en + director.film + } +} +``` + +Similarly, you can turn off request logging by setting `logRequest` to `false` +in the `/admin` mutation. + +```graphql +mutation { + config(input: { logRequest: false }) { + response { + code + message + } + } +} +``` + +## Audit logging (enterprise feature) + +With a Dgraph enterprise license, you can enable audit logging so that all +requests are tracked and available for use in security audits. To learn more, +see [Audit Logging](./enterprise-features/audit-logs). diff --git a/dgraph/reference/deploy/admin/metrics.mdx b/dgraph/reference/deploy/admin/metrics.mdx new file mode 100644 index 00000000..977afe19 --- /dev/null +++ b/dgraph/reference/deploy/admin/metrics.mdx @@ -0,0 +1,121 @@ +--- +title: Metrics +description: + Dgraph database helps administrators by providing metrics on Dgraph instance + activity, disk activity, server node health, memory, and Raft leadership +--- + +Dgraph database provides metrics on Dgraph instance activity, disk activity, +server node health, memory, and Raft leadership. It also provides built-in +metrics provided by Go. Dgraph metrics follow the +[metric and label conventions for the Prometheus](https://prometheus.io/docs/practices/naming/) +monitoring and alerting toolkit. + +## Activity Metrics + +Activity metrics let you track the mutations, queries, and proposals of a Dgraph +instance. + +| Metric | Description | +| -------------------------------------------------- | ------------------------------------------------------- | +| `go_goroutines` | Total number of Goroutines currently running in Dgraph. | +| `dgraph_active_mutations_total` | Total number of mutations currently running. | +| `dgraph_pending_proposals_total` | Total pending Raft proposals. | +| `dgraph_pending_queries_total` | Total number of queries in progress. | +| `dgraph_num_queries_total{method="Server.Mutate"}` | Total number of mutations run in Dgraph. | +| `dgraph_num_queries_total{method="Server.Query"}` | Total number of queries run in Dgraph. | + +## Disk metrics + +Disk metrics let you track the disk activity of the Dgraph process. Dgraph does +not interact directly with the filesystem. Instead it relies on +[Badger](https://github.com/dgraph-io/badger) to read from and write to disk. + +| Metric | Description | +| ----------------------------------- | ----------------------------------------------------------- | +| `badger_read_num_vlog` | Total count of reads by badger in vlog, | +| `badger_write_num_vlog` | Total count of writes by Badger in vlog, | +| `badger_read_bytes_vlog` | Total bytes read by Badger, | +| `badger_write_bytes_vlog` | Total bytes written by Badger, | +| `badger_read_bytes_lsm` | Total bytes read by Badger, | +| `badger_write_bytes_l0` | Total bytes written by Badger, | +| `badger_write_bytes_compaction` | Total bytes written by Badger, | +| `badger_get_num_lsm` | Total count of LSM gets, | +| `badger_get_num_memtable` | Total count of LSM gets from memtable, | +| `badger_hit_num_lsm_bloom_filter` | Total count of LSM bloom hits, | +| `badger_get_num_user` | Total count of calls to Badger's `get`, | +| `badger_put_num_user` | Total count of calls to Badger's `put`, | +| `badger_write_bytes_user` | Total bytes written by user, | +| `badger_get_with_result_num_user` | Total count of calls to Badger's `get` that returned value, | +| `badger_iterator_num_user` | Total count of iterators made in badger, | +| `badger_size_bytes_lsm` | Size of the LSM in bytes, | +| `badger_size_bytes_vlog` | Size of the value log in bytes, | +| `badger_write_pending_num_memtable` | Total count of pending writes, | +| `badger_compaction_current_num_lsm` | Number of tables being actively compacted, | + +Old Metrics (Pre 23.1.0) + +`badger_disk_reads_total` | Total count of disk reads in Badger. +`badger_disk_writes_total` | Total count of disk writes in Badger. +`badger_gets_total` | Total count of calls to Badger's `get`. +`badger_memtable_gets_total` | Total count of memtable accesses to Badger's +`get`. `badger_puts_total` | Total count of calls to Badger's `put`. +`badger_read_bytes` | Total bytes read from Badger. +`badger_lsm_bloom_hits_total` | Total number of LSM tree bloom hits. +`badger_written_bytes` | Total bytes written to Badger. `badger_lsm_size_bytes` +| Total size in bytes of the LSM tree. `badger_vlog_size_bytes` | Total size in +bytes of the value log. + +## Go Metrics + +Go's built-in metrics may also be useful to measure memory usage and garbage +collection time. + +| Metric | Description | +| ------------------------------ | ------------------------------------------------------------------------------------------- | +| `go_memstats_gc_cpu_fraction` | The fraction of this program's available CPU time used by the GC since the program started. | +| `go_memstats_heap_idle_bytes` | Number of heap bytes waiting to be used. | +| `go_memstats_heap_inuse_bytes` | Number of heap bytes that are in use. | + +## Health Metrics + +Health metrics let you check the health of a server node. + + Health metrics are only available for Dgraph Alpha server nodes. + +| Metric | Description | +| ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `dgraph_alpha_health_status` | Value is 1 when the Alpha node is ready to accept requests; otherwise 0. | +| `dgraph_max_assigned_ts` | This shows the latest max assigned timestamp. All Alpha nodes within the same Alpha group should show the same timestamp if they are in sync. | +| `dgraph_txn_aborts_total` | Shows the total number of server-initiated transaction aborts that have occurred on the Alpha node. | +| `dgraph_txn_commits_total` | Shows the total number of successful commits that have occurred on the Alpha node. | +| `dgraph_txn_discards_total` | Shows the total number of client-initiated transaction discards that have occurred on the Alpha node. This is incremented when the client calls for a transaction discard, such as using the Dgraph Go client's `txn.Discard` function. | + +## Memory metrics + +Memory metrics let you track the memory usage of the Dgraph process. The `idle` +and `inuse` metrics give you a better sense of the active memory usage of the +Dgraph process. The process memory metric shows the memory usage as measured by +the operating system. + +By looking at all three metrics you can see how much memory a Dgraph process is +holding from the operating system and how much is actively in use. + +| Metric | Description | +| --------------------------- | ----------------------------------------------------------------------------------------------------------- | +| `dgraph_memory_idle_bytes` | Estimated amount of memory that is being held idle that could be reclaimed by the OS. | +| `dgraph_memory_inuse_bytes` | Total memory usage in bytes (sum of heap usage and stack usage). | +| `dgraph_memory_proc_bytes` | Total memory usage in bytes of the Dgraph process. This metric is equivalent to resident set size on Linux. | + +## Raft leadership metrics + +Raft leadership metrics let you track changes in Raft leadership for Dgraph +Alpha and Dgraph Zero nodes in your Cluster. These metrics include a group label +along with the node name, so that you can determine which metrics apply to which +Raft groups. + +| Metric | Description | +| ---------------------------------- | ----------------------------------------------------------------- | +| `dgraph_raft_has_leader` | Value is 1 when the node has a leader; otherwise 0. | +| `dgraph_raft_is_leader` | Value is 1 when the node is the leader of its group; otherwise 0. | +| `dgraph_raft_leader_changes_total` | The total number of leader changes seen by this node. | diff --git a/dgraph/reference/deploy/admin/tracing.mdx b/dgraph/reference/deploy/admin/tracing.mdx new file mode 100644 index 00000000..ec30288b --- /dev/null +++ b/dgraph/reference/deploy/admin/tracing.mdx @@ -0,0 +1,63 @@ +--- +title: Tracing +--- + +Dgraph is integrated with [OpenCensus](https://opencensus.io/zpages/) to collect +distributed traces from the Dgraph cluster. + +Trace data is always collected within Dgraph. You can adjust the trace sampling +rate for Dgraph queries using the `--trace` +[superflag's](./deploy/cli-command-reference) `ratio` option when running Dgraph +Alpha nodes. By default, `--trace ratio` is set to 0.01 to trace 1% of queries. + +## Examining Traces with zPages + +The most basic way to view traces is with the integrated trace pages. + +OpenCensus's [zPages](https://opencensus.io/zpages/) are accessible via the Zero +or Alpha HTTP port at `/z/tracez`. + +## Examining Traces with Jaeger + +Jaeger collects distributed traces and provides a UI to view and query traces +across different services. This provides the necessary observability to figure +out what is happening in the system. + +Dgraph can be configured to send traces directly to a Jaeger collector with the +`trace` superflag's `jaeger` option. For example, if the Jaeger collector is +running on `http://localhost:14268`, then pass this option to the Dgraph Zero +and Dgraph Alpha instances as `--trace jaeger=http://localhost:14268`. + +See +[Jaeger's Getting Started docs](https://www.jaegertracing.io/docs/getting-started/) +to get up and running with Jaeger. + +### Setting up multiple Dgraph clusters with Jaeger + +Jaeger allows you to examine traces from multiple Dgraph clusters. To do this, +use the `--collector.tags` on a Jaeger collector to set custom trace tags. For +example, run one collector with `--collector.tags env=qa` and then another +collector with `--collector.tags env=dev`. In Dgraph, set the `--trace jaeger` +option in the Dgraph QA cluster to the first collector and set this option in +the Dgraph Dev cluster to the second collector. You can run multiple Jaeger +collector components for the same single Jaeger backend (e.g., many Jaeger +collectors to a single Cassandra backend). This is still a single Jaeger +installation but with different collectors customizing the tags per environment. + +Once you have this configured, you can filter by tags in the Jaeger UI. Filter +traces by tags matching `env=dev`: + +![Jaeger UI](/images/jaeger-ui.png) + +Every trace has your custom tags set under the “Process” section of each span: + +![Jaeger Query](/images/jaeger-server-query.png) + +Filter traces by tags matching `env=qa`: + +![Jaeger JSON](/images/jaeger-json.png) + +![Jaeger Query Result](/images/jaeger-server-query-2.png) + +To learn more about Jaeger, see +[Jaeger's Deployment Guide](https://www.jaegertracing.io/docs/deployment/). diff --git a/dgraph/reference/deploy/cli-command-reference.mdx b/dgraph/reference/deploy/cli-command-reference.mdx new file mode 100644 index 00000000..b474e350 --- /dev/null +++ b/dgraph/reference/deploy/cli-command-reference.mdx @@ -0,0 +1,1114 @@ +--- +title: Dgraph CLI Reference +--- + +You can use the Dgraph command-line interface (CLI) to deploy and manage Dgraph. +You use it in self-managed deployment scenarios; such as running Dgraph on +on-premises servers hosted on your physical infrastructure, or running Dgraph in +the cloud on your AWS, GCP, or Azure infrastructure. + +Dgraph has a root command used throughout its CLI: `dgraph`. The `dgraph` +command is supported by multiple subcommands (such as `alpha` or `update`), some +of which are also supported by their own subcommands. For example, the +`dgraph acl` command requires you to specify one of its subcommands: `add`, +`del`, `info` or `mod`. As with other CLIs, you provide command options using +flags like `--help` or `--telemetry`. + + + The term _command_ is used instead of _subcommand_ throughout this document, + except when clarifying relationships in the CLI command hierarchy. The term + _command_ is also used for combinations of commands and their subcommands, + such as `dgraph alpha debug`. + + +## Dgraph CLI superflags in release v21.03 + +Some flags are deprecated and replaced in release v21.03. In previous Dgraph +releases, multiple related flags are often used in a command, causing some +commands to be very long. Starting in release v21.03, Dgraph uses _superflags_ +for some flags used by the most complex commands: `alpha`, `backup`, `bulk`, +`debug`, `live` and `zero`. Superflags are compound flags: they contain one or +more options that let you define multiple settings in a semicolon-delimited +list. Semicolons are required between superflag options, but a semicolon after +the last superflag option is optional. + +The general syntax for superflags is as follows: +`-- option-a=value; option-b=value` + + You should encapsulate the options for a superflag in +double-quotes (`"`) if any of those option values include spaces. You can +encapsulate options in double-quotes to improve readability. You can also use +the following syntax for superflags: +`-- "option-a=value; option-b=value"`. + +Release v21.03 includes the following superflags: + +- `--acl` +- `--badger` +- `--cache` +- `--encryption` +- `--graphql` +- `--limit` +- `--raft` +- `--security` +- `--telemetry` +- `--tls` +- `--trace` +- `--vault` + +The following table maps Dgraph CLI flags from release v20.11 and earlier that +have been replaced by superflags (and their options) in release v21.03. Any +flags not shown here are unchanged in release v21.03. + +### ACL superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ------------------: | :------------ | ------------------------: | :------------------------------------------------------------------------------ | :--------: | :-------------------------------------------------------------------------: | +| | | **`--acl`** | | | [Access Control List](./enterprise-features/access-control-lists) superflag | +| `--acl_secret_file` | string | `secret-file` | string | `alpha` | File that stores the HMAC secret that is used for signing the JWT | +| `--acl_access_ttl` | time.Duration | `access-ttl` | [string](https://github.com/dgraph-io/ristretto/blob/master/z/flags.go#L80-L98) | `alpha` | TTL for the access JWT | +| `--acl_refresh_ttl` | time.Duration | `refresh-ttl` | [string](https://github.com/dgraph-io/ristretto/blob/master/z/flags.go#L80-L98) | `alpha` | The TTL for the refresh JWT | + +### Badger superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ---------------------: | :------- | -----------------------------------------: | :------- | :-----------------------: | :-----------------------------------------------: | +| | | **`--badger`** | | | [Badger](https://dgraph.io/docs/badger) superflag | +| `--badger.compression` | string | `compression` | string | `alpha`, `bulk`, `backup` | Specifies the compression level and algorithm | +| | | (new) [`numgoroutines`](./troubleshooting) | int | `alpha`, `bulk`, `backup` | Number of Go routines used by Dgraph | + + + The `--badger` superflag allows you to set many advanced [Badger + options](https://pkg.go.dev/github.com/dgraph-io/badger/v3#Options), + including: `dir`, `valuedir`, `syncwrites`, `numversionstokeep`, `readonly`, + `inmemory`, `metricsenabled`, `memtablesize`, `basetablesize`, + `baselevelsize`, `levelsizemultiplier`, `tablesizemultiplier`, `maxlevels`, + `vlogpercentile`, `valuethreshold`, `nummemtables`, `blocksize`, + `bloomfalsepositive`, `blockcachesize`, `indexcachesize`, + `numlevelzerotables`, `numlevelzerotablesstall`, `valuelogfilesize`, + `valuelogmaxentries`, `numcompactors`, `compactl0onclose`, `lmaxcompaction`, + `zstdcompressionlevel`, `verifyvaluechecksum`, + `encryptionkeyrotationduration`, `bypasslockguard`, + `checksumverificationmode`, `detectconflicts`, `namespaceoffset`. + + +### Cache superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| -----------------: | :------- | ------------------------: | :------- | :--------: | :--------------------------------------------------: | +| | | **`--cache`** | | | Cache superflag | +| `cache_mb` | string | `size-mb` | string | `alpha` | Total size of cache (in MB) per shard in the reducer | +| `cache_percentage` | string | `percentage` | string | `alpha` | Cache percentages for block cache and index cache | + +### Encryption superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ----------------------: | :------- | ------------------------: | :------- | :---------------------------------------------------------------------: | :------------------------------------: | +| | | **`--encryption`** | | | Encryption superflag | +| `--encryption_key_file` | string | `key-file` | string | `alpha`, `bulk`, `live`, `restore`, `debug`, `decrypt`, `export_backup` | The file that stores the symmetric key | + +### GraphQL superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ------------------------: | :------------ | ------------------------: | :------------------------------------------------------------------------------ | :--------: | :----------------------------------------------------------------------------: | +| | | **`--graphql`** | | | GraphQL superflag | +| `--graphql_introspection` | bool | `introspection` | bool | `alpha` | Enables GraphQL schema introspection | +| `--graphql_debug` | bool | `debug` | bool | `alpha` | Enables debug mode in GraphQL | +| `--graphql_extensions` | bool | `extensions` | bool | `alpha` | Enables extensions in GraphQL response body | +| `--graphql_poll_interval` | time.Duration | `poll-interval` | [string](https://github.com/dgraph-io/ristretto/blob/master/z/flags.go#L80-L98) | `alpha` | The polling interval for GraphQL subscriptions | +| `--graphql_lambda_url` | string | `lambda-url` | string | `alpha` | The URL of a lambda server that implements custom GraphQL JavaScript resolvers | + +### Limit superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ------------------------: | :------- | ------------------------: | :------- | :--------: | :----------------------------------------------------------------------------------------------------------------: | +| | | **`--limit`** | | | Limit-setting superflag for Dgraph Alpha | +| `--abort_older_than` | string | `txn-abort-after` | string | `alpha` | Abort any pending transactions older than this duration | +| `--disable_admin_http` | string | `disable-admin-http` | string | `zero` | Turn on/off the administrative endpoints | +| `--max_retries` | int | `max-retries` | int | `alpha` | Maximum number of retries | +| `--mutations` | string | `mutations` | string | `alpha` | Mutation mode: `allow`, `disallow`, or `strict` | +| `--query_edge_limit` | uint64 | `query-edge` | uint64 | `alpha` | Maximum number of edges that can be returned in a query | +| `--normalize_node_limit` | int | `normalize-node` | int | `alpha` | Maximum number of nodes that can be returned in a query that uses the normalize directive | +| `--mutations_nquad_limit` | int | `mutations-nquad` | int | `alpha` | Maximum number of nquads that can be inserted in a mutation request | +| `--max-pending-queries` | int | `max-pending-queries` | int | `alpha` | Maximum number of concurrently processing requests allowed before requests are rejected with 429 Too Many Requests | + +### Raft superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| --------------------: | :------- | -----------------------------: | :------- | :-------------: | :--------------------------------------------------------------------------------------------------------: | +| | | **`--raft`** | | | [Raft](./design-concepts/raft) superflag | +| `--pending_proposals` | int | `pending-proposals` | int | `alpha` | Maximum number of pending mutation proposals; useful for rate limiting | +| `--idx` | int | `idx` | int | `alpha`, `zero` | Provides an optional Raft ID that an Alpha node can use to join Raft groups | +| `--group` | int | `group` | int | `alpha` | Provides an optional Raft group ID that an Alpha node can use to request group membership from a Zero node | +| | | (new)`learner` | bool | `alpha`, `zero` | Make this Alpha a learner node (used for read-only replicas) | +| | | (new)`snapshot-after-duration` | int | `alpha` | Frequency at which Raft snapshots are created | +| `--snapshot-after` | int | `snapshot-after-entries` | int | `alpha` | Create a new Raft snapshot after the specified number of Raft entries | + +### Security superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ----------------: | :------- | ------------------------: | :------- | :----------------: | :---------------------------------------------------------------------------------------------: | +| | | **`--security`** | | | Security superflag | +| `--auth_token` | string | `token` | string | `alpha` | Authentication token | +| `--whitelist` | string | `whitelist` | string | `alpha` | A comma separated list of IP addresses, IP ranges, CIDR blocks, or hostnames for administration | +| | | **`--telemetry`** | | | Telemetry superflag | +| `--telemetry` | bool | `reports` | bool | `alpha` and `zero` | Sends anonymous telemetry data to Dgraph | +| `--enable_sentry` | bool | `sentry` | bool | `alpha` and `zero` | Enable sending crash events to Sentry | + +### TLS superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ----------------------------: | :------- | ------------------------: | :------- | :---------------------------------------: | :--------------------------------------------------------------------------------: | +| | | **`--tls`** | | | [TLS](./tls-configuration) superflag | +| `--tls_cacert` | string | `ca-cert` | string | `alpha`, `zero`, `bulk`, `backup`, `live` | The CA cert file used to verify server certificates | +| `--tls_use_system_ca` | bool | `use-system-ca` | bool | `alpha`, `zero`, `bulk`, `backup`, `live` | Include System CA with Dgraph Root CA | +| `--tls_server_name` | string | `server-name` | string | `alpha`, `zero`, `bulk`, `backup`, `live` | Server name, used for validating the server’s TLS host name | +| `--tls_client_auth` | string | `client-auth-type` | string | `alpha`, `zero` | TLS client authentication used to validate client connections from external ports | +| `--tls_node_cert` | string | `server-cert` | string | `alpha` and `zero` | Path and filename of the node certificate (for example, `node.crt`) | +| `--tls_node_key` | string | `server-key` | string | `alpha` and `zero` | Path and filename of the node certificate private key (for example, `node.key`) | +| `--tls_internal_port_enabled` | bool | `internal-port` | bool | `alpha`, `zero`, `bulk`, `backup`, `live` | Makes internal ports (by default, 5080 and 7080) use the REQUIREANDVERIFY setting. | +| `--tls_cert` | string | `client-cert` | string | `alpha`, `zero`, `bulk`, `backup`, `live` | User cert file provided by the client to the Alpha node | +| `--tls_key` | string | `client-key` | string | `alpha`, `zero`, `bulk`, `backup`, `live` | User private key file provided by the client to the Alpha node | + +### Trace superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| --------------------: | :------- | ------------------------: | :------- | :-------------: | :--------------------------------------: | +| | | **`--trace`** | | | [Tracing](./tracing) superflag | +| `--trace` | float64 | `ratio` | float64 | `alpha`, `zero` | The ratio of queries to trace | +| `--jaeger.collector` | string | `jaeger` | string | `alpha`, `zero` | URL of Jaeger to send OpenCensus traces | +| `--datadog.collector` | string | `datadog` | string | `alpha`, `zero` | URL of Datadog to send OpenCensus traces | + +### Vault superflag + +| Old flag | Old type | New superflag and options | New type | Applies to | Notes | +| ----------------------: | :------- | ------------------------: | :------- | :----------------------------------------: | :-----------------------------------------------------------------------------------------: | +| | | **`--vault`** | | | Vault superflag | +| `--vault_addr` | string | `addr` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | Vault server address, formatted as of `http://ip-address:port` | +| `--vault_roleid_file` | string | `role-id-file` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | File containing Vault `role-id` used for AppRole authentication | +| `--vault_secretid_file` | string | `secret-id-file` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | File containing Vault `secret-id` used for AppRole authentication | +| `--vault_path` | string | `path` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | Vault key=value store path (example: `secret/data/dgraph` for kv-v2, `kv/dgraph` for kv-v1) | +| `--vault_field` | string | `field` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | Vault key=value store field whose value is the base64 encoded encryption key | +| `--vault_format` | string | `format` | string | `alpha`, `bulk`, `backup`, `live`, `debug` | Vault field format (`raw` or `base64`) | + +To learn more about each superflag and its options, see the `--help` output of +the Dgraph CLI commands listed in the following section. + +## Dgraph CLI command help listing + +The Dgraph CLI includes the root `dgraph` command and its subcommands. The CLI +help for these commands is replicated inline below for your reference, or you +can find help by calling these commands (or their subcommands) using the +`--help` flag. + + + Although many of the commands listed below have subcommands, only `dgraph` and + subcommands of `dgraph` are included in this listing. + + +The Dgraph CLI has several commands, which are organized into the following +groups: + +- [Dgraph core](#dgraph-core-commands) +- [Data loading](#data-loading-commands) +- [Dgraph security](#dgraph-security-commands) +- [Dgraph debug](#dgraph-debug-commands) +- [Dgraph tools](#dgraph-tools-commands) + +The commands in these groups are shown in the following table: + +| Group | Command | Note | +| --------------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------- | +| (root) | [`dgraph`](#dgraph-root-command) | Root command for Dgraph CLI | +| Dgraph core | [`alpha`](#dgraph-alpha) | Dgraph Alpha database node commands | +| Dgraph core | [`zero`](#dgraph-zero) | Dgraph Zero management node commands | +| Data loading | [`bulk`](#dgraph-bulk) | Dgraph [Bulk Loader](./bulk-loader) commands | +| Data loading | [`live`](#dgraph-live) | Dgraph [Live Loader](./live-loader) commands | +| Data loading | [`restore`](#dgraph-restore) | Command used to restore backups created using Dgraph Enterprise Edition | +| Dgraph security | [`acl`](#dgraph-acl) | Dgraph [Access Control List (ACL)](./enterprise-features/access-control-lists) commands | +| Dgraph security | [`audit`](#dgraph-audit) | Decrypt audit files | +| Dgraph security | [`cert`](#dgraph-cert) | Configure TLS and manage TLS certificates | +| Dgraph debug | [`debug`](#dgraph-debug) | Used to debug issues with Dgraph | +| Dgraph debug | [`debuginfo`](#dgraph-debuginfo) | Generates information about the current node for use in debugging issues with Dgraph clusters | +| Dgraph tools | [`completion`](#dgraph-completion) | Generates shell completion scripts for `bash` and `zsh` | +| Dgraph tools | [`conv`](#dgraph-conv) | Converts geographic files into RDF so that they can be consumed by Dgraph | +| Dgraph tools | [`decrypt`](#dgraph-decrypt) | Decrypts an export file created by an encrypted Dgraph Cluster | +| Dgraph tools | [`export_backup`](#dgraph-export_backup) | Converts a binary backup created using Dgraph Enterprise Edition into an exported folder. | +| Dgraph tools | [`increment`](#dgraph-increment) | Increments a counter transactionally to confirm that a Dgraph Alpha node can handle query and mutation requests | +| Dgraph tools | [`lsbackup`](#dgraph-lsbackup) | Lists information on backups in a given location | +| Dgraph tools | [`migrate`](#dgraph-migrate) | Migrates data from a MySQL database to Dgraph | +| Dgraph tools | [`raftmigrate`](#dgraph-raftmigrate) | Dgraph Raft migration tool | +| Dgraph tools | [`upgrade`](#dgraph-upgrade) | Upgrades Dgraph to a newer version | + +### `dgraph` root command + +This command is the root for all commands in the Dgraph CLI. Key information +from the help listing for `dgraph --help` is shown below: + +```shell +Usage: + dgraph [command] + +Generic: + help Help about any command + version Prints the dgraph version details + +Available Commands: + +Dgraph Core: + alpha Run Dgraph Alpha database server + zero Run Dgraph Zero management server + +Data Loading: + bulk Run Dgraph Bulk Loader + live Run Dgraph Live Loader + restore Restore backup from Dgraph Enterprise Edition + +Dgraph Security: + acl Run the Dgraph Enterprise Edition ACL tool + audit Dgraph audit tool + cert Dgraph TLS certificate management + +Dgraph Debug: + debug Debug Dgraph instance + debuginfo Generate debug information on the current node + +Dgraph Tools: + completion Generates shell completion scripts for bash or zsh + conv Dgraph Geo file converter + decrypt Run the Dgraph decryption tool + export_backup Export data inside single full or incremental backup + increment Increment a counter transactionally + lsbackup List info on backups in a given location + migrate Run the Dgraph migration tool from a MySQL database to Dgraph + raftmigrate Run the Raft migration tool + upgrade Run the Dgraph upgrade tool + +Flags: + --alsologtostderr log to standard error as well as files + --bindall Use 0.0.0.0 instead of localhost to bind to all addresses on local machine. (default true) + --block_rate int Block profiling rate. Must be used along with block profile_mode + --config string Configuration file. Takes precedence over default values, but is overridden to values set with environment variables and flags. + --cwd string Change working directory to the path specified. The parent must exist. + --expose_trace Allow trace endpoint to be accessible from remote + -h, --help help for dgraph + --log_backtrace_at traceLocation when logging hits line file:N, emit a stack trace (default :0) + --log_dir string If non-empty, write log files in this directory + --logtostderr log to standard error instead of files + --profile_mode string Enable profiling mode, one of [cpu, mem, mutex, block] + -v, --v Level log level for V logs + --vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging + +``` + +### Dgraph core commands + +Dgraph core commands provide core deployment and management functionality for +the Dgraph Alpha database nodes and Dgraph Zero management nodes in your +deployment. + +#### `dgraph alpha` + +This command is used to configure and run the Dgraph Alpha database nodes in +your deployment. The following replicates the help listing for +`dgraph alpha --help`: + +```shell +A Dgraph Alpha instance stores the data. Each Dgraph Alpha is responsible for +storing and serving one data group. If multiple Alphas serve the same group, +they form a Raft group and provide synchronous replication. + +Usage: + dgraph alpha [flags] + +Flags: + --acl string [Enterprise Feature] ACL options + access-ttl=6h; The TTL for the access JWT. + refresh-ttl=30d; The TTL for the refresh JWT. + secret-file=; The file that stores the HMAC secret, which is used for signing the JWT and should have at least 32 ASCII characters. Required to enable ACLs. + (default "access-ttl=6h; refresh-ttl=30d; secret-file=;") + --audit string Audit options + compress=false; Enables the compression of old audit logs. + days=10; The number of days audit logs will be preserved. + encrypt-file=; The path to the key file to be used for audit log encryption. + output=; [stdout, /path/to/dir] This specifies where audit logs should be output to. + "stdout" is for standard output. You can also specify the directory where audit logs + will be saved. When stdout is specified as output other fields will be ignored. + size=100; The audit log max size in MB after which it will be rolled over. + (default "compress=false; days=10; size=100; dir=; output=; encrypt-file=;") + --badger string Badger options + compression=snappy; [none, zstd:level, snappy] Specifies the compression algorithm and + compression level (if applicable) for the postings directory."none" would disable + compression, while "zstd:1" would set zstd compression at level 1. + numgoroutines=8; The number of goroutines to use in badger.Stream. + max-retries=-1; Commits to disk will give up after these number of retries to prevent locking the worker in a failed state. Use -1 to retry infinitely. + (default "compression=snappy; numgoroutines=8; max-retries=-1;") + --cache string Cache options + percentage=0,65,35; Cache percentages summing up to 100 for various caches (FORMAT: PostingListCache,PstoreBlockCache,PstoreIndexCache) + size-mb=1024; Total size of cache (in MB) to be used in Dgraph. + (default "size-mb=1024; percentage=0,65,35;") + --cdc string Change Data Capture options + ca-cert=; The path to CA cert file for TLS encryption. + client-cert=; The path to client cert file for TLS encryption. + client-key=; The path to client key file for TLS encryption. + file=; The path where audit logs will be stored. + kafka=; A comma separated list of Kafka hosts. + sasl-password=; The SASL password for Kafka. + sasl-user=; The SASL username for Kafka. + (default "file=; kafka=; sasl_user=; sasl_password=; ca_cert=; client_cert=; client_key=;") + --custom_tokenizers string Comma separated list of tokenizer plugins for custom indices. + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + --export string Folder in which to store exports. (default "export") + --graphql string GraphQL options + debug=false; Enables debug mode in GraphQL. This returns auth errors to clients, and we do not recommend turning it on for production. + extensions=true; Enables extensions in GraphQL response body. + introspection=true; Enables GraphQL schema introspection. + lambda-url=; The URL of a lambda server that implements custom GraphQL Javascript resolvers. + poll-interval=1s; The polling interval for GraphQL subscription. + (default "introspection=true; debug=false; extensions=true; poll-interval=1s; lambda-url=;") + -h, --help help for alpha + --limit string Limit options + disallow-drop=false; Set disallow-drop to true to block drop-all and drop-data operation. It still allows dropping attributes and types. + mutations-nquad=1000000; The maximum number of nquads that can be inserted in a mutation request. + mutations=allow; [allow, disallow, strict] The mutations mode to use. + normalize-node=10000; The maximum number of nodes that can be returned in a query that uses the normalize directive. + query-edge=1000000; The maximum number of edges that can be returned in a query. This applies to shortest path and recursive queries. + query-timeout=0ms; Maximum time after which a query execution will fail. If set to 0, the timeout is infinite. + txn-abort-after=5m; Abort any pending transactions older than this duration. The liveness of a transaction is determined by its last mutation. + max-pending-queries=10000; Number of maximum pending queries before we reject them as too many requests. + (default "mutations=allow; query-edge=1000000; normalize-node=10000; mutations-nquad=1000000; disallow-drop=false; query-timeout=0ms; txn-abort-after=5m; max-pending-queries=10000") + --my string addr:port of this server, so other Dgraph servers can talk to this. + -o, --port_offset int Value added to all listening port numbers. [Internal=7080, HTTP=8080, Grpc=9080] + -p, --postings string Directory to store posting lists. (default "p") + --raft string Raft options + group=; Provides an optional Raft Group ID that this Alpha would indicate to Zero to join. + idx=; Provides an optional Raft ID that this Alpha would use to join Raft groups. + learner=false; Make this Alpha a "learner" node. In learner mode, this Alpha will not participate in Raft elections. This can be used to achieve a read-only replica. + pending-proposals=256; Number of pending mutation proposals. Useful for rate limiting. + snapshot-after-duration=30m; Frequency at which we should create a new raft snapshots. Set to 0 to disable duration based snapshot. + snapshot-after-entries=10000; Create a new Raft snapshot after N number of Raft entries. The lower this number, the more frequent snapshot creation will be. Snapshots are created only if both snapshot-after-duration and snapshot-after-entries threshold are crossed. + (default "learner=false; snapshot-after-entries=10000; snapshot-after-duration=30m; pending-proposals=256; idx=; group=;") + --security string Security options + token=; If set, all Admin requests to Dgraph will need to have this token. The token can be passed as follows: for HTTP requests, in the X-Dgraph-AuthToken header. For Grpc, in auth-token key in the context. + whitelist=; A comma separated list of IP addresses, IP ranges, CIDR blocks, or hostnames you wish to whitelist for performing admin actions (i.e., --security "whitelist=144.142.126.254,127.0.0.1:127.0.0.3,192.168.0.0/16,host.docker.internal"). + (default "token=; whitelist=;") + --survive string Choose between "process" or "filesystem". + If set to "process", there would be no data loss in case of process crash, but the behavior would be nondeterministic in case of filesystem crash. + If set to "filesystem", blocking sync would be called after every write, hence guaranteeing no data loss in case of hard reboot. + Most users should be OK with choosing "process". (default "process") + --telemetry string Telemetry (diagnostic) options + reports=true; Send anonymous telemetry data to Dgraph devs. + sentry=true; Send crash events to Sentry. + (default "reports=true; sentry=true;") + --tls string TLS Server options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-auth-type=VERIFYIFGIVEN; The TLS client authentication method. + client-cert=; (Optional) The client Cert file which is needed to connect as a client with the other nodes in the cluster. + client-key=; (Optional) The private client Key file which is needed to connect as a client with the other nodes in the cluster. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-cert=; The server Cert file which is needed to initiate the server in the cluster. + server-key=; The server Key file which is needed to initiate the server in the cluster. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; client-auth-type=VERIFYIFGIVEN; internal-port=false;") + --tmp string Directory to store temporary buffers. (default "t") + --trace string Trace options + datadog=; URL of Datadog to send OpenCensus traces. As of now, the trace exporter does not support annotation logs and discards them. + jaeger=; URL of Jaeger to send OpenCensus traces. + ratio=0.01; The ratio of queries to trace. + (default "ratio=0.01; jaeger=; datadog=;") + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + -w, --wal string Directory to store raft write-ahead logs. (default "w") + -z, --zero string Comma separated list of Dgraph Zero addresses of the form IP_ADDRESS:PORT. (default "localhost:5080") + +Use "dgraph alpha [command] --help" for more information about a command. +``` + +#### `dgraph zero` + +This command is used to configure and run the Dgraph Zero management nodes in +your deployment. The following replicates the help listing shown when you run +`dgraph zero --help`: + +```shell +A Dgraph Zero instance manages the Dgraph cluster. Typically, a single Zero +instance is sufficient for the cluster; however, one can run multiple Zero +instances to achieve high-availability. + +Usage: + dgraph zero [flags] + +Flags: + --audit string Audit options + compress=false; Enables the compression of old audit logs. + days=10; The number of days audit logs will be preserved. + encrypt-file=; The path to the key file to be used for audit log encryption. + output=; [stdout, /path/to/dir] This specifies where audit logs should be output to. + "stdout" is for standard output. You can also specify the directory where audit logs + will be saved. When stdout is specified as output other fields will be ignored. + size=100; The audit log max size in MB after which it will be rolled over. + (default "compress=false; days=10; size=100; dir=; output=; encrypt-file=;") + --enterprise_license string Path to the enterprise license file. + -h, --help help for zero + --limit string Limit options + disable-admin-http=false; Turn on/off the administrative endpoints exposed over Zero's HTTP port. + refill-interval=30s; The interval after which the tokens for UID lease are replenished. + uid-lease=0; The maximum number of UIDs that can be leased by namespace (except default namespace) + in an interval specified by refill-interval. Set it to 0 to remove limiting. + (default "uid-lease=0; refill-interval=30s; disable-admin-http=false;") + --my string addr:port of this server, so other Dgraph servers can talk to this. + --peer string Address of another dgraphzero server. + -o, --port_offset int Value added to all listening port numbers. [Grpc=5080, HTTP=6080] + --raft string Raft options + idx=1; Provides an optional Raft ID that this Alpha would use to join Raft groups. + learner=false; Make this Zero a "learner" node. In learner mode, this Zero will not participate in Raft elections. This can be used to achieve a read-only replica. + (default "idx=1; learner=false;") + --rebalance_interval duration Interval for trying a predicate move. (default 8m0s) + --replicas int How many Dgraph Alpha replicas to run per data shard group. The count includes the original shard. (default 1) + --survive string Choose between "process" or "filesystem". + If set to "process", there would be no data loss in case of process crash, but the behavior would be nondeterministic in case of filesystem crash. + If set to "filesystem", blocking sync would be called after every write, hence guaranteeing no data loss in case of hard reboot. + Most users should be OK with choosing "process". (default "process") + --telemetry string Telemetry (diagnostic) options + reports=true; Send anonymous telemetry data to Dgraph devs. + sentry=true; Send crash events to Sentry. + (default "reports=true; sentry=true;") + --tls string TLS Server options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-auth-type=VERIFYIFGIVEN; The TLS client authentication method. + client-cert=; (Optional) The client Cert file which is needed to connect as a client with the other nodes in the cluster. + client-key=; (Optional) The private client Key file which is needed to connect as a client with the other nodes in the cluster. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-cert=; The server Cert file which is needed to initiate the server in the cluster. + server-key=; The server Key file which is needed to initiate the server in the cluster. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; client-auth-type=VERIFYIFGIVEN; internal-port=false;") + --trace string Trace options + datadog=; URL of Datadog to send OpenCensus traces. As of now, the trace exporter does not support annotation logs and discards them. + jaeger=; URL of Jaeger to send OpenCensus traces. + ratio=0.01; The ratio of queries to trace. + (default "ratio=0.01; jaeger=; datadog=;") + -w, --wal string Directory storing WAL. (default "zw") + +Use "dgraph zero [command] --help" for more information about a command. +``` + +### Data loading commands + +#### `dgraph bulk` + +This command is used to bulk load data with the Dgraph +[Bulk Loader](./bulk-loader.md) tool. The following replicates the help listing +shown when you run `dgraph bulk --help`: + +```shell + Run Dgraph Bulk Loader +Usage: + dgraph bulk [flags] + +Flags: + --badger string Badger options (Refer to badger documentation for all possible options) + compression=snappy; Specifies the compression algorithm and compression level (if applicable) for the postings directory. "none" would disable compression, while "zstd:1" would set zstd compression at level 1. + numgoroutines=8; The number of goroutines to use in badger.Stream. + (default "compression=snappy; numgoroutines=8;") + --cleanup_tmp Clean up the tmp directory after the loader finishes. Setting this to false allows the bulk loader can be re-run while skipping the map phase. (default true) + --custom_tokenizers string Comma separated list of tokenizer plugins + --encrypted Flag to indicate whether schema and data files are encrypted. Must be specified with --encryption or vault option(s). + --encrypted_out Flag to indicate whether to encrypt the output. Must be specified with --encryption or vault option(s). + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + -f, --files string Location of *.rdf(.gz) or *.json(.gz) file(s) to load. + --force-namespace uint Namespace onto which to load the data. If not set, will preserve the namespace. (default 18446744073709551615) + --format string Specify file format (rdf or json) instead of getting it from filename. + -g, --graphql_schema string Location of the GraphQL schema file. + -h, --help help for bulk + --http string Address to serve http (pprof). (default "localhost:8080") + --ignore_errors ignore line parsing errors in rdf files + --map_shards int Number of map output shards. Must be greater than or equal to the number of reduce shards. Increasing allows more evenly sized reduce shards, at the expense of increased memory usage. (default 1) + --mapoutput_mb int The estimated size of each map file output. Increasing this increases memory usage. (default 2048) + --new_uids Ignore UIDs in load files and assign new ones. + -j, --num_go_routines int Number of worker threads to use. MORE THREADS LEAD TO HIGHER RAM USAGE. (default 1) + --out string Location to write the final dgraph data directories. (default "./out") + --partition_mb int Pick a partition key every N megabytes of data. (default 4) + --reduce_shards int Number of reduce shards. This determines the number of dgraph instances in the final cluster. Increasing this potentially decreases the reduce stage runtime by using more parallelism, but increases memory usage. (default 1) + --reducers int Number of reducers to run concurrently. Increasing this can improve performance, and must be less than or equal to the number of reduce shards. (default 1) + --replace_out Replace out directory and its contents if it exists. + -s, --schema string Location of schema file. + --skip_map_phase Skip the map phase (assumes that map output files already exist). + --store_xids Generate an xid edge for each node. + --tls string TLS Client options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-cert=; (Optional) The Cert file provided by the client to the server. + client-key=; (Optional) The private Key file provided by the clients to the server. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-name=; Used to verify the server hostname. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; internal-port=false;") + --tmp string Temp directory used to use for on-disk scratch space. Requires free space proportional to the size of the RDF file and the amount of indexing used. (default "tmp") + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + --version Prints the version of Dgraph Bulk Loader. + --xidmap string Directory to store xid to uid mapping + -z, --zero string gRPC address for Dgraph zero (default "localhost:5080") + +Use "dgraph bulk [command] --help" for more information about a command. +``` + +#### `dgraph live` + +This command is used to load live data with the Dgraph +[Live Loader](./live-loader) tool. The following replicates the help listing +shown when you run `dgraph live --help`: + +```shell + Run Dgraph Live Loader +Usage: + dgraph live [flags] + +Flags: + -a, --alpha string Comma-separated list of Dgraph alpha gRPC server addresses (default "127.0.0.1:9080") + -t, --auth_token string The auth token passed to the server for Alter operation of the schema file. If used with --slash_grpc_endpoint, then this should be set to the API token issuedby Slash GraphQL + -b, --batch int Number of N-Quads to send as part of a mutation. (default 1000) + -m, --bufferSize string Buffer for each thread (default "100") + -c, --conc int Number of concurrent requests to make to Dgraph (default 10) + --creds string Various login credentials if login is required. + user defines the username to login. + password defines the password of the user. + namespace defines the namespace to log into. + Sample flag could look like --creds user=username;password=mypass;namespace=2 + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + -f, --files string Location of *.rdf(.gz) or *.json(.gz) file(s) to load + --force-namespace int Namespace onto which to load the data.Only guardian of galaxy should use this for loading data into multiple namespaces or somespecific namespace. Setting it to negative value will preserve the namespace. + --format string Specify file format (rdf or json) instead of getting it from filename + -h, --help help for live + --http string Address to serve http (pprof). (default "localhost:6060") + --new_uids Ignore UIDs in load files and assign new ones. + -s, --schema string Location of schema file + --slash_grpc_endpoint string Path to Slash GraphQL GRPC endpoint. If --slash_grpc_endpoint is set, all other TLS options and connection options will beignored + --tls string TLS Client options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-cert=; (Optional) The Cert file provided by the client to the server. + client-key=; (Optional) The private Key file provided by the clients to the server. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-name=; Used to verify the server hostname. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; internal-port=false;") + --tmp string Directory to store temporary buffers. (default "t") + -U, --upsertPredicate string run in upsertPredicate mode. the value would be used to store blank nodes as an xid + -C, --use_compression Enable compression on connection to alpha server + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + --verbose Run the live loader in verbose mode + -x, --xidmap string Directory to store xid to uid mapping + -z, --zero string Dgraph zero gRPC server address (default "127.0.0.1:5080") + +Use "dgraph live [command] --help" for more information about a command. +``` + +#### `dgraph restore` + +This command loads objects from available backups. The following replicates the +help listing shown when you run `dgraph restore --help`: + +```shell +Restore loads objects created with the backup feature in Dgraph Enterprise Edition (EE). + +Backups taken using the GraphQL API can be restored using CLI restore +command. Restore is intended to be used with new Dgraph clusters in offline state. + +The --location flag indicates a source URI with Dgraph backup objects. This URI supports all +the schemes used for backup. + +Source URI formats: + [scheme]://[host]/[path]?[args] + [scheme]:///[path]?[args] + /[path]?[args] (only for local or NFS) + +Source URI parts: + scheme - service handler, one of: "s3", "minio", "file" + host - remote address. ex: "dgraph.s3.amazonaws.com" + path - directory, bucket or container at target. ex: "/dgraph/backups/" + args - specific arguments that are ok to appear in logs. + +The --posting flag sets the posting list parent dir to store the loaded backup files. + +Using the --zero flag will use a Dgraph Zero address to update the start timestamp using +the restored version. Otherwise, the timestamp must be manually updated through Zero's HTTP +'assign' command. + +Dgraph backup creates a unique backup object for each node group, and restore will create +a posting directory 'p' matching the backup group ID. Such that a backup file +named '.../r32-g2.backup' will be loaded to posting dir 'p2'. + +Usage examples: + +# Restore from local dir or NFS mount: +$ dgraph restore -p . -l /var/backups/dgraph + +# Restore from S3: +$ dgraph restore -p /var/db/dgraph -l s3://s3.us-west-2.amazonaws.com/srfrog/dgraph + +# Restore from dir and update Ts: +$ dgraph restore -p . -l /var/backups/dgraph -z localhost:5080 + + +Usage: + dgraph restore [flags] + +Flags: + --backup_id string The ID of the backup series to restore. If empty, it will restore the latest series. + -b, --badger string Badger options + compression=snappy; Specifies the compression algorithm and compression level (if applicable) for the postings directory. "none" would disable compression, while "zstd:1" would set zstd compression at level 1. + goroutines=; The number of goroutines to use in badger.Stream. + (default "compression=snappy; numgoroutines=8;") + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + --force_zero If false, no connection to a zero in the cluster will be required. Keep in mind this requires you to manually update the timestamp and max uid when you start the cluster. The correct values are printed near the end of this command's output. (default true) + -h, --help help for restore + -l, --location string Sets the source location URI (required). + -p, --postings string Directory where posting lists are stored (required). + --tls string TLS Client options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-cert=; (Optional) The Cert file provided by the client to the server. + client-key=; (Optional) The private Key file provided by the clients to the server. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-name=; Used to verify the server hostname. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; internal-port=false;") + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + -z, --zero string gRPC address for Dgraph zero. ex: localhost:5080 + +Use "dgraph restore [command] --help" for more information about a command. +``` + +### Dgraph security commands + +Dgraph security commands let you manage access control lists (ACLs), manage +certificates, and audit database usage. + +#### `dgraph acl` + +This command runs the Dgraph Enterprise Edition ACL tool. The following +replicates the help listing shown when you run `dgraph acl --help`: + +```shell +Run the Dgraph Enterprise Edition ACL tool +Usage: + dgraph acl [command] + +Available Commands: + add Run Dgraph acl tool to add a user or group + del Run Dgraph acl tool to delete a user or group + info Show info about a user or group + mod Run Dgraph acl tool to modify a user's password, a user's group list, or agroup's predicate permissions + +Flags: + -a, --alpha string Dgraph Alpha gRPC server address (default "127.0.0.1:9080") + --guardian-creds string Login credentials for the guardian + user defines the username to login. + password defines the password of the user. + namespace defines the namespace to log into. + Sample flag could look like --guardian-creds user=username;password=mypass;namespace=2 + -h, --help help for acl + --tls string TLS Client options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-cert=; (Optional) The Cert file provided by the client to the server. + client-key=; (Optional) The private Key file provided by the clients to the server. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-name=; Used to verify the server hostname. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; internal-port=false;") + +Use "dgraph acl [command] --help" for more information about a command. +``` + +#### `dgraph audit` + +This command decrypts audit files. These files are created using the `--audit` +when you run the `dgraph alpha` command. The following replicates the help +listing shown when you run `dgraph audit --help`: + +```shell +Dgraph audit tool +Usage: + dgraph audit [command] + +Available Commands: + decrypt Run Dgraph Audit tool to decrypt audit files + +Flags: + -h, --help help for audit + +Use "dgraph audit [command] --help" for more information about a command. +``` + +#### `dgraph cert` + +This command lets you manage [TLS certificates](./tls-configuration). The +following replicates the help listing shown when you run `dgraph cert --help`: + +```shell +Dgraph TLS certificate management +Usage: + dgraph cert [flags] + dgraph cert [command] + +Available Commands: + ls lists certificates and keys + +Flags: + -k, --ca-key string path to the CA private key (default "ca.key") + -c, --client string create cert/key pair for a client name + -d, --dir string directory containing TLS certs and keys (default "tls") + --duration int duration of cert validity in days (default 365) + -e, --elliptic-curve string ECDSA curve for private key. Values are: "P224", "P256", "P384", "P521". + --force overwrite any existing key and cert + -h, --help help for cert + -r, --keysize int RSA key bit size for creating new keys (default 2048) + -n, --nodes strings creates cert/key pair for nodes + --verify verify certs against root CA when creating (default true) + +Use "dgraph cert [command] --help" for more information about a command. +``` + +### Dgraph debug commands + +Dgraph debug commands provide support for debugging issues with Dgraph +deployments. To learn more, see +[Using the Debug Tool](./howto/using-debug-tool). + +#### `dgraph debug` + +This command is used to debug issues with a Dgraph database instance. The +following replicates the help listing shown when you run `dgraph debug --help`: + +```shell + Debug Dgraph instance +Usage: + dgraph debug [flags] + +Flags: + --at uint Set read timestamp for all txns. (default 18446744073709551615) + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + -h, --help help for debug + --histogram Show a histogram of the key and value sizes. + -y, --history Show all versions of a key. + --item Output item meta as well. Set to false for diffs. (default true) + --jepsen string Disect Jepsen output. Can be linear/binary. + -l, --lookup string Hex of key to lookup. + --nokeys Ignore key_. Only consider amount when calculating total. + --only-summary If true, only show the summary of the p directory. + -p, --postings string Directory where posting lists are stored. + -r, --pred string Only output specified predicate. + --prefix string Uses a hex prefix. + -o, --readonly Open in read only mode. (default true) + --rollup string Hex of key to rollup. + -s, --snap string Set snapshot term,index,readts to this. Value must be comma-separated list containing the value for these vars in that order. + -t, --truncate uint Remove data from Raft entries until but not including this index. + --vals Output values along with keys. + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + -w, --wal string Directory where Raft write-ahead logs are stored. + +Use "dgraph debug [command] --help" for more information about a command. +``` + +#### `dgraph debuginfo` + +This command generates information about the current node that is useful for +debugging. The following replicates the help listing shown when you run +`dgraph debuginfo --help`: + +```shell +Generate debug information on the current node +Usage: + dgraph debuginfo [flags] + +Flags: + -a, --alpha string Address of running dgraph alpha. (default "localhost:8080") + -x, --archive Whether to archive the generated report (default true) + -d, --directory string Directory to write the debug info into. + -h, --help help for debuginfo + -p, --profiles strings List of pprof profiles to dump in the report. (default [goroutine,heap,threadcreate,block,mutex,profile,trace]) + -s, --seconds uint32 Duration for time-based profile collection. (default 15) + -z, --zero string Address of running dgraph zero. + +Use "dgraph debuginfo [command] --help" for more information about a command. +``` + +### Dgraph tools commands + +Dgraph tools provide a variety of tools to make it easier for you to deploy and +manage Dgraph. + +#### `dgraph completion` + +This command generates shell completion scripts for `bash` and `zsh` CLIs. The +following replicates the help listing shown when you run +`dgraph completion --help`: + +```shell +Generates shell completion scripts for bash or zsh +Usage: + dgraph completion [command] + +Available Commands: + bash bash shell completion + zsh zsh shell completion + +Flags: + -h, --help help for completion + +Use "dgraph completion [command] --help" for more information about a command. +``` + +#### `dgraph conv` + +This command runs the Dgraph geographic file converter, which converts +geographic files into RDF so that they can be consumed by Dgraph. The following +replicates the help listing shown when you run `dgraph conv --help`: + +```shell +Dgraph Geo file converter +Usage: + dgraph conv [flags] + +Flags: + --geo string Location of geo file to convert + --geopred string Predicate to use to store geometries (default "loc") + -h, --help help for conv + --out string Location of output rdf.gz file (default "output.rdf.gz") + +Use "dgraph conv [command] --help" for more information about a command. +``` + +#### `dgraph decrypt` + +This command lets you decrypt an export file created by an encrypted Dgraph +Cluster. The following replicates the help listing shown when you run +`dgraph decrypt --help`: + +```shell + A tool to decrypt an export file created by an encrypted Dgraph cluster +Usage: + dgraph decrypt [flags] + +Flags: + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + -f, --file string Path to file to decrypt. + -h, --help help for decrypt + -o, --out string Path to the decrypted file. + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + +Use "dgraph decrypt [command] --help" for more information about a command. +``` + +#### `dgraph export_backup` + +This command is used to convert a +[binary backup](./enterprise-features/binary-backups) created using Dgraph +Enterprise Edition into an exported folder. The following replicates key +information from the help listing shown when you run +`dgraph export_backup --help`: + +```shell +Export data inside single full or incremental backup +Usage: + dgraph export_backup [flags] + +Flags: + -d, --destination string The folder to which export the backups. + --encryption string [Enterprise Feature] Encryption At Rest options + key-file=; The file that stores the symmetric key of length 16, 24, or 32 bytes. The key size determines the chosen AES cipher (AES-128, AES-192, and AES-256 respectively). + (default "key-file=;") + -f, --format string The format of the export output. Accepts a value of either rdf or json (default "rdf") + -h, --help help for export_backup + -l, --location string Sets the location of the backup. Both file URIs and s3 are supported. + This command will take care of all the full + incremental backups present in the location. + --upgrade If true, retrieve the CORS from DB and append at the end of GraphQL schema. + It also deletes the deprecated types and predicates. + Use this option when exporting a backup of 20.11 for loading onto 21.03. + --vault string Vault options + acl-field=; Vault field containing ACL key. + acl-format=base64; ACL key format, can be 'raw' or 'base64'. + addr=http://localhost:8200; Vault server address (format: http://ip:port). + enc-field=; Vault field containing encryption key. + enc-format=base64; Encryption key format, can be 'raw' or 'base64'. + path=secret/data/dgraph; Vault KV store path (e.g. 'secret/data/dgraph' for KV V2, 'kv/dgraph' for KV V1). + role-id-file=; Vault RoleID file, used for AppRole authentication. + secret-id-file=; Vault SecretID file, used for AppRole authentication. + (default "addr=http://localhost:8200; role-id-file=; secret-id-file=; path=secret/data/dgraph; acl-field=; acl-format=base64; enc-field=; enc-format=base64") + +Use "dgraph export_backup [command] --help" for more information about a command. +``` + +#### `dgraph increment` + +This command increments a counter transactionally, so that you can confirm that +an Alpha node is able to handle both query and mutation requests. To learn more, +see [Using the Increment Tool](./howto/using-increment-tool). The following +replicates the help listing shown when you run `dgraph increment --help`: + +```shell +Increment a counter transactionally +Usage: + dgraph increment [flags] + +Flags: + --alpha string Address of Dgraph Alpha. (default "localhost:9080") + --be Best-effort. Read counter value without retrieving timestamp from Zero. + --creds string Various login credentials if login is required. + user defines the username to login. + password defines the password of the user. + namespace defines the namespace to log into. + Sample flag could look like --creds user=username;password=mypass;namespace=2 + -h, --help help for increment + --jaeger string Send opencensus traces to Jaeger. + --num int How many times to run. (default 1) + --pred string Predicate to use for storing the counter. (default "counter.val") + --retries int How many times to retry setting up the connection. (default 10) + --ro Read-only. Read the counter value without updating it. + --tls string TLS Client options + ca-cert=; The CA cert file used to verify server certificates. Required for enabling TLS. + client-cert=; (Optional) The Cert file provided by the client to the server. + client-key=; (Optional) The private Key file provided by the clients to the server. + internal-port=false; (Optional) Enable inter-node TLS encryption between cluster nodes. + server-name=; Used to verify the server hostname. + use-system-ca=true; Includes System CA into CA Certs. + (default "use-system-ca=true; internal-port=false;") + --wait duration How long to wait. + +Use "dgraph increment [command] --help" for more information about a command. +``` + +#### `dgraph lsbackup` + +This command lists information on backups in a given location for Dgraph +Enterprise Edition. To learn more, see +[Backup List Tool](./enterprise-features/lsbackup). The following replicates the +help listing shown when you run `dgraph lsbackup --help`: + +```shell +List info on backups in a given location +Usage: + dgraph lsbackup [flags] + +Flags: + -h, --help help for lsbackup + -l, --location string Sets the source location URI (required). + --verbose Outputs additional info in backup list. + +Use "dgraph lsbackup [command] --help" for more information about a command. +``` + +#### `dgraph migrate` + +This command runs the Dgraph [migration tool](./migration/migrate-tool) to move +data from a MySQL database to Dgraph. The following replicates the help listing +shown when you run `dgraph migrate --help`: + +```shell +Run the Dgraph migration tool from a MySQL database to Dgraph +Usage: + dgraph migrate [flags] + +Flags: + --db string The database to import + -h, --help help for migrate + --host string The hostname or IP address of the database server. (default "localhost") + -o, --output_data string The data output file (default "sql.rdf") + -s, --output_schema string The schema output file (default "schema.txt") + --password string The password used for logging in + --port string The port of the database server. (default "3306") + -q, --quiet Enable quiet mode to suppress the warning logs + -p, --separator string The separator for constructing predicate names (default ".") + --tables string The comma separated list of tables to import, an empty string means importing all tables in the database + --user string The user for logging in + +Use "dgraph migrate [command] --help" for more information about a command. +``` + +#### `dgraph upgrade` + +This command helps you to upgrade from an earlier Dgraph release to a newer +release. The following replicates the help listing shown when you run +`dgraph upgrade --help`: + +```shell +This tool is supported only for the mainstream release versions of Dgraph, not for the beta releases. +Usage: + dgraph upgrade [flags] + +Flags: + --acl upgrade ACL from v1.2.2 to >=v20.03.0 + -a, --alpha string Dgraph Alpha gRPC server address (default "127.0.0.1:9080") + -d, --deleteOld Delete the older ACL types/predicates (default true) + --dry-run dry-run the upgrade + -f, --from string The version string from which to upgrade, e.g.: v1.2.2 + -h, --help help for upgrade + -p, --password string Password of ACL user + -t, --to string The version string till which to upgrade, e.g.: v20.03.0 + -u, --user string Username of ACL user + +Use "dgraph upgrade [command] --help" for more information about a command. +``` diff --git a/dgraph/reference/deploy/cluster-checklist.mdx b/dgraph/reference/deploy/cluster-checklist.mdx new file mode 100644 index 00000000..b293d27a --- /dev/null +++ b/dgraph/reference/deploy/cluster-checklist.mdx @@ -0,0 +1,15 @@ +--- +title: Cluster Checklist +--- + +In setting up a cluster be sure the check the following. + +- Is at least one Dgraph Zero node running? +- Is each Dgraph Alpha instance in the cluster set up correctly? +- Will each Dgraph Alpha instance be accessible to all peers on 7080 (+ any port + offset)? +- Does each instance have a unique ID on startup? +- Has `--bindall=true` been set for networked communication? + +See the [Production Checklist](./installation/production-checklist) docs for +more info. diff --git a/dgraph/reference/deploy/cluster-setup.mdx b/dgraph/reference/deploy/cluster-setup.mdx new file mode 100644 index 00000000..91c80d01 --- /dev/null +++ b/dgraph/reference/deploy/cluster-setup.mdx @@ -0,0 +1,48 @@ +--- +title: Cluster Setup +--- + +## Understanding Dgraph cluster + +Dgraph is a truly distributed graph database. It shards by predicate and +replicates predicates across the cluster, queries can be run on any node and +joins are handled over the distributed data. A query is resolved locally for +predicates the node stores, and using distributed joins for predicates stored on +other nodes. + +To effectively running a Dgraph cluster, it's important to understand how +sharding, replication and rebalancing works. + +### Sharding + +Dgraph colocates data per predicate (_ P _, in RDF terminology), thus the +smallest unit of data is one predicate. To shard the graph, one or many +predicates are assigned to a group. Each Alpha node in the cluster serves a +single group. Dgraph Zero assigns a group to each Alpha node. + +### Shard rebalancing + +Dgraph Zero tries to rebalance the cluster based on the disk usage in each +group. If Zero detects an imbalance, it will try to move a predicate along with +its indices to a group that has lower disk usage. This can make the predicate +temporarily read-only. Queries for the predicate will still be serviced, but any +mutations for the predicate will be rejected and should be retried after the +move is finished. + +Zero would continuously try to keep the amount of data on each server even, +typically running this check on a 10-min frequency. Thus, each additional Dgraph +Alpha instance would allow Zero to further split the predicates from groups and +move them to the new node. + +### Consistent Replication + +When starting Zero nodes, you can pass, to each one, the `--replicas` flag to +assign the same group to multiple nodes. The number passed to the `--replicas` +flag causes that Zero node to assign the same group to the specified number of +nodes. These nodes will then form a Raft group (or quorum), and every write will +be consistently replicated to the quorum. + +To achieve consensus, it's important that the size of quorum be an odd number. +Therefore, we recommend setting `--replicas` to 1, 3 or 5 (not 2 or 4). This +allows 0, 1, or 2 nodes serving the same group to be down, respectively, without +affecting the overall health of that group. diff --git a/dgraph/reference/deploy/config.mdx b/dgraph/reference/deploy/config.mdx new file mode 100644 index 00000000..cbf3b9a3 --- /dev/null +++ b/dgraph/reference/deploy/config.mdx @@ -0,0 +1,199 @@ +--- +title: Config +--- + +You can see the list of available subcommands with `dgraph --help`. You can view +the full set of configuration options for a given subcommand with +`dgraph --help` (for example, `dgraph zero --help`). + +You can configure options in multiple ways, which are listed below from highest +precedence to lowest precedence: + +- Using command line flags (as described in the help output). +- Using environment variables. +- Using a configuration file. + +If no configuration for an option is used, then the default value as described +in the `--help` output applies. + +You can use multiple configuration methods at the same time, so a core set of +options could be set in a config file, and instance specific options could be +set using environment vars or flags. + +## Command line flags + +Dgraph has _global flags_ that apply to all subcommands and flags specific to a +subcommand. + +Some flags have been deprecated and replaced in release `v21.03`, and flags for +several commands (`alpha`, `backup`, `bulk`,`debug`, `live`, and `zero`) now +have superflags. Superflags are compound flags that contain one or more options +that let you define multiple settings in a semicolon-delimited list. The general +syntax for superflags is as follows: +`-- option-a=value-a; option-b=value-b`. + +The following example shows how to use superflags when running the +`dgraph alpha` command. + +```bash +dgraph alpha --my=alpha.example.com:7080 --zero=zero.example.com:5080 \ + --badger "compression=zstd:1" \ + --block_rate "10" \ + --trace "jaeger=http://jaeger:14268" \ + --tls "ca-cert=/dgraph/tls/ca.crt;client-auth-type=REQUIREANDVERIFY;server-cert=/dgraph/tls/node.crt;server-key=/dgraph/tls/node.key;use-system-ca=true;internal-port=true;client-cert=/dgraph/tls/client.dgraphuser.crt;client-key=/dgraph/tls/client.dgraphuser.key" + --security "whitelist=10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" +``` + +## Environment variables + +The environment variable names for Dgraph mirror the flag names shown in the +Dgraph CLI `--help` output. These environment variable names are formed the +concatenation of `DGRAPH`, the subcommand invoked (`ALPHA`, `ZERO`, `LIVE`, or +`BULK`), and then the name of the flag (in uppercase). For example, instead +running a command like `dgraph alpha --block_rate 10`, you could set the +following environment variable: `DGRAPH_ALPHA_BLOCK_RATE=10 dgraph alpha`. + +So, the environment variable syntax for a superflag +(`-- option-a=value; option-b=value`) is +`="option-a=value;option-b=value"`. + +The following is an example of environment variables for `dgraph alpha`: + +```bash +DGRAPH_ALPHA_BADGER="compression=zstd:1" +DGRAPH_ALPHA_BLOCK_RATE="10" +DGRAPH_ALPHA_TRACE="jaeger=http://jaeger:14268" +DGRAPH_ALPHA_TLS="ca-cert=/dgraph/tls/ca.crt;client-auth-type=REQUIREANDVERIFY;server-cert=/dgraph/tls/node.crt;server-key=/dgraph/tls/node.key;use-system-ca=true;internal-port=true;client-cert=/dgraph/tls/client.dgraphuser.crt;client-key=/dgraph/tls/client.dgraphuser.key" +DGRAPH_ALPHA_SECURITY="whitelist=10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" +``` + +## Configuration file + +You can specify a configuration file using the Dgraph CLI with the `--config` +flag (for example, `dgraph alpha --config my_config.json`), or using an +environment variable, (for example, +`DGRAPH_ALPHA_CONFIG=my_config.json dgraph alpha`). + +Dgraph supports configuration file formats that it detects based on file +extensions ([`.json`](https://www.json.org/json-en.html), +[`.yml`](https://yaml.org/) or [`.yaml`](https://yaml.org/)). In these files, +the name of the superflag is used as a key that points to a hash. The hash +consists of `key: value` pairs that correspond to the superflag's list of +`option=value` pairs. + + + The formats [`.toml`](https://toml.io/en/), + [`.hcl`](https://github.com/hashicorp/hcl), and + [`.properties`](https://en.wikipedia.org/wiki/.properties) are not supported + in release `v21.03.0`. + + + + When representing the superflag options in the hash, you can use either + _kebab-case_ or _snake_case_ for names of the keys. + + +### JSON config file + +In JSON, you can represent a superflag and its options +(`-- option-a=value;option-b=value`) as follows: + +```json +{ + "": { + "option-a": "value", + "option-b": "value" + } +} +``` + +The following example JSON config file (`config.json`) using _kebab-case_: + +```json +{ + "badger": { "compression": "zstd:1" }, + "trace": { "jaeger": "http://jaeger:14268" }, + "security": { "whitelist": "10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" }, + "tls": { + "ca-cert": "/dgraph/tls/ca.crt", + "client-auth-type": "REQUIREANDVERIFY", + "server-cert": "/dgraph/tls/node.crt", + "server-key": "/dgraph/tls/node.key", + "use-system-ca": true, + "internal-port": true, + "client-cert": "/dgraph/tls/client.dgraphuser.crt", + "client-key": "/dgraph/tls/client.dgraphuser.key" + } +} +``` + +The following example JSON config file (`config.json`) using _snake_case_: + +```json +{ + "badger": { "compression": "zstd:1" }, + "trace": { "jaeger": "http://jaeger:14268" }, + "security": { "whitelist": "10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" }, + "tls": { + "ca_cert": "/dgraph/tls/ca.crt", + "client_auth_type": "REQUIREANDVERIFY", + "server_cert": "/dgraph/tls/node.crt", + "server_key": "/dgraph/tls/node.key", + "use_system_ca": true, + "internal_port": true, + "client_cert": "/dgraph/tls/client.dgraphuser.crt", + "client_key": "/dgraph/tls/client.dgraphuser.key" + } +} +``` + +### YAML config file + +In YAML, you can represent a superflag and its options +(`-- option-a=value;option-b=value`) as follows: + +```yaml +: + option-a: value + option-b: value +``` + +The following example YAML config file (`config.yml`) uses _kebab-case_: + +```yaml +badger: + compression: zstd:1 +trace: + jaeger: http://jaeger:14268 +security: + whitelist: 10.0.0.0/8,172.0.0.0/8,192.168.0.0/16 +tls: + ca-cert: /dgraph/tls/ca.crt + client-auth-type: REQUIREANDVERIFY + server-cert: /dgraph/tls/node.crt + server-key: /dgraph/tls/node.key + use-system-ca: true + internal-port: true + client-cert: /dgraph/tls/client.dgraphuser.crt + client-key: /dgraph/tls/client.dgraphuser.key +``` + +The following example YAML config file (`config.yml`) uses _snake_case_: + +```yaml +badger: + compression: zstd:1 +trace: + jaeger: http://jaeger:14268 +security: + whitelist: 10.0.0.0/8,172.0.0.0/8,192.168.0.0/16 +tls: + ca_cert: /dgraph/tls/ca.crt + client_auth_type: REQUIREANDVERIFY + server_cert: /dgraph/tls/node.crt + server_key: /dgraph/tls/node.key + use_system_ca: true + internal_port: true + client_cert: /dgraph/tls/client.dgraphuser.crt + client_key: /dgraph/tls/client.dgraphuser.key +``` diff --git a/dgraph/reference/deploy/decrypt.mdx b/dgraph/reference/deploy/decrypt.mdx new file mode 100644 index 00000000..85676992 --- /dev/null +++ b/dgraph/reference/deploy/decrypt.mdx @@ -0,0 +1,76 @@ +--- +title: Data Decryption +--- + +You might need to decrypt data from an encrypted Dgraph cluster for a variety of +reasons, including: + +- Migration of data from an encrypted cluster to a non-encrypted cluster +- Changing your data or schema by directly editing an RDF file or schema file + +To support these scenarios, Dgraph includes a `decrypt` command that decrypts +encrypted RDF and schema files. To learn how to export RDF and schema files from +Dgraph, see: +[Dgraph Administration: Export database](./dgraph-administration.md#export-database). + +The `decrypt` command supports a variety of symmetric key lengths, which +determine the AES cypher used for encryption and decryption, as follows: + +| Symmetric key length | AES encryption cypher | +| -------------------- | --------------------- | +| 128 bits (16-bytes) | AES-128 | +| 192 bits (24-bytes) | AES-192 | +| 256 bits (32-bytes) | AES-256 | + +The `decrypt` command also supports the use of +[Hashicorp Vault](https://www.vaultproject.io/) to store secrets, including +support for Vault's +[AppRole authentication](https://www.vaultproject.io/docs/auth/approle.html). + +## Decryption options + +The following decryption options (or _flags_) are available for the `decrypt` +command: + +| Flag or Superflag | Superflag Option | Notes | +| ----------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------ | +| `--encryption` | `key-file` | Encryption key filename | +| `-f`, `--file` | | Path to file for the encrypted RDF or schema **.gz** file | +| `-h`, `--help` | | Help for the decrypt command | +| `-o`, `--out` | | Path to file for the decrypted **.gz** file that decrypt creates | +| `--vault` | `addr` | Vault server address, in **http://<_ip-address_>:<_port_>** format (default: `http://localhost:8200` ) | +| | `enc-field` | Name of the Vault server's key/value store field that holds the Base64 encryption key | +| | `enc-format` | Vault server field format; can be `raw` or `base64` (default: `base64`) | +| | `path` | Vault server key/value store path (default: `secret/data/dgraph`) | +| | `role-id-file` | File containing the [Vault](https://www.vaultproject.io/) `role_id` used for AppRole authentication | +| | `secret-id-file` | File containing the [Vault](https://www.vaultproject.io/) `secret_id` used for AppRole authentication | + +To learn more about the `--vault` superflag and its options that have replaced +the `--vault_*` options in release v20.11 and earlier, see +[Dgraph CLI Command Reference](./deploy/cli-command-reference). + +## Data decryption examples + +For example, you could use the following command with an encrypted RDF file +(**encrypted.rdf.gz**) and an encryption key file (**enc_key_file**), to create +a decrypted RDF file: + +```bash +# Encryption Key from the file path +dgraph decrypt --file "encrypted.rdf.gz" --out "decrypted_rdf.gz" --encryption key-file="enc-key-file" + +# Encryption Key from HashiCorp Vault +dgraph decrypt --file "encrypted.rdf.gz" --out "decrypted_rdf.gz" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" +``` + +You can use similar syntax to create a decrypted schema file: + +```bash +# Encryption Key from the file path +dgraph decrypt --file "encrypted.schema.gz" --out "decrypted_schema.gz" --encryption key-file="enc-key-file" + +# Encryption Key from HashiCorp Vault +dgraph decrypt --file "encrypted.schema.gz" --out "decrypted_schema.gz" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" +``` diff --git a/dgraph/reference/deploy/dgraph-alpha.mdx b/dgraph/reference/deploy/dgraph-alpha.mdx new file mode 100644 index 00000000..5e306e91 --- /dev/null +++ b/dgraph/reference/deploy/dgraph-alpha.mdx @@ -0,0 +1,89 @@ +--- +title: More about Dgraph Alpha +--- + +Dgraph Alpha provides several HTTP endpoints for administrators, as follows: + +- `/health?all` returns information about the health of all the servers in the + cluster. +- `/admin/shutdown` initiates a proper + [shutdown](./dgraph-administration.md#shutting-down-database) of the Alpha. + +By default the Alpha listens on `localhost` for admin actions (the loopback +address only accessible from the same machine). The `--bindall=true` option +binds to `0.0.0.0` and thus allows external connections. + + + Set max file descriptors to a high value like 10000 if you are going to load a + lot of data. + + +## Querying Health + +You can query the `/admin` graphql endpoint with a query like the one below to +get a JSON consisting of basic information about health of all the servers in +the cluster. + +```graphql +query { + health { + instance + address + version + status + lastEcho + group + uptime + ongoing + indexing + } +} +``` + +Here’s an example of JSON returned from the above query: + +```json +{ + "data": { + "health": [ + { + "instance": "zero", + "address": "localhost:5080", + "version": "v2.0.0-rc1", + "status": "healthy", + "lastEcho": 1582827418, + "group": "0", + "uptime": 1504 + }, + { + "instance": "alpha", + "address": "localhost:7080", + "version": "v2.0.0-rc1", + "status": "healthy", + "lastEcho": 1582827418, + "group": "1", + "uptime": 1505, + "ongoing": ["opIndexing"], + "indexing": ["name", "age"] + } + ] + } +} +``` + +- `instance`: Name of the instance. Either `alpha` or `zero`. +- `status`: Health status of the instance. Either `healthy` or `unhealthy`. +- `version`: Version of Dgraph running the Alpha or Zero server. +- `uptime`: Time in nanoseconds since the Alpha or Zero server is up and + running. +- `address`: IP_ADDRESS:PORT of the instance. +- `group`: Group assigned based on the replication factor. Read more + [here](/deploy/cluster-setup). +- `lastEcho`: Last time, in Unix epoch, when the instance was contacted by + another Alpha or Zero server. +- `ongoing`: List of ongoing operations in the background. +- `indexing`: List of predicates for which indexes are built in the background. + Read more [here](./dql-schema.md#indexes-in-background). + +The same information (except `ongoing` and `indexing`) is available from the +`/health` and `/health?all` endpoints of Alpha server. diff --git a/dgraph/reference/deploy/dgraph-zero.mdx b/dgraph/reference/deploy/dgraph-zero.mdx new file mode 100644 index 00000000..a7a6b0ef --- /dev/null +++ b/dgraph/reference/deploy/dgraph-zero.mdx @@ -0,0 +1,279 @@ +--- +title: More about Dgraph Zero +--- + +Dgraph Zero controls the Dgraph cluster, and stores information about it. It +automatically moves data between different Dgraph Alpha instances based on the +size of the data served by each Alpha instance. + +Before you can run `dgraph alpha`, you must run at least one `dgraph zero` node. +You can see the options available for `dgraph zero` by using the following +command: + +```bash +dgraph zero --help +``` + +The `--replicas` option controls the replication factor: the number of replicas +per data shard, including the original shard. For consensus, the replication +factor must be set to an odd number, and the following error will occur if it is +set to an even number (for example, `2`): + +```nix +ERROR: Number of replicas must be odd for consensus. Found: 2 +``` + +When a new Alpha joins the cluster, it is assigned to a group based on the +replication factor. If the replication factor is set to `1`, then each Alpha +node will serve a different group. If the replication factor is set to `3` and +you then launch six Alpha nodes, the first three Alpha nodes will serve group 1 +and next three nodes will serve group 2. Zero monitors the space occupied by +predicates in each group and moves predicates between groups as-needed to +rebalance the cluster. + +## Endpoints + +Like Alpha, Zero also exposes HTTP on port 6080 (plus any ports specified by +`--port_offset`). You can query this port using a **GET** request to access the +following endpoints: + +- `/state` returns information about the nodes that are part of the cluster. + This includes information about the size of predicates and which groups they + belong to. +- `/assign?what=uids&num=100` allocates a range of UIDs specified by the `num` + argument, and returns a JSON map containing the `startId` and `endId` that + defines the range of UIDs (inclusive). This UID range can be safely assigned + externally to new nodes during data ingestion. +- `/assign?what=timestamps&num=100` requests timestamps from Zero. This is + useful to "fast forward" the state of the Zero node when starting from a + postings directory that already has commits higher than Zero's leased + timestamp. +- `/removeNode?id=3&group=2` removes a dead Zero or Alpha node. When a replica + node goes offline and can't be recovered, you can remove it and add a new node + to th quorum. To remove dead Zero nodes, pass `group=0` and the id of the Zero + node to this endpoint. + + + Before using the API ensure that the node is down and ensure that it doesn't + come back up ever again. Do not use the same `idx` of a node that was removed + earlier. + + +- `/moveTablet?tablet=name&group=2` Moves a tablet to a group. Zero already + rebalances shards every 8 mins, but this endpoint can be used to force move a + tablet. + +You can also use the following **POST** endpoint on HTTP port 6080: + +- `/enterpriseLicense` applies an enterprise license to the cluster by supplying + it as part of the body. + +### More About the /state Endpoint + +The `/state` endpoint of Dgraph Zero returns a JSON document of the current +group membership info, which includes the following: + +- Instances which are part of the cluster. +- Number of instances in Zero group and each Alpha groups. +- Current leader of each group. +- Predicates that belong to a group. +- Estimated size in bytes of each predicate. +- Enterprise license information. +- Max Leased transaction ID. +- Max Leased UID. +- CID (Cluster ID). + +Here’s an example of JSON for a cluster with three Alpha nodes and three Zero +nodes returned from the `/state` endpoint: + +```json +{ + "counter": "22", + "groups": { + "1": { + "members": { + "1": { + "id": "1", + "groupId": 1, + "addr": "alpha2:7082", + "leader": true, + "amDead": false, + "lastUpdate": "1603350485", + "clusterInfoOnly": false, + "forceGroupId": false + }, + "2": { + "id": "2", + "groupId": 1, + "addr": "alpha1:7080", + "leader": false, + "amDead": false, + "lastUpdate": "0", + "clusterInfoOnly": false, + "forceGroupId": false + }, + "3": { + "id": "3", + "groupId": 1, + "addr": "alpha3:7083", + "leader": false, + "amDead": false, + "lastUpdate": "0", + "clusterInfoOnly": false, + "forceGroupId": false + } + }, + "tablets": { + "dgraph.cors": { + "groupId": 1, + "predicate": "dgraph.cors", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + }, + "dgraph.graphql.schema": { + "groupId": 1, + "predicate": "dgraph.graphql.schema", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + }, + "dgraph.graphql.schema_created_at": { + "groupId": 1, + "predicate": "dgraph.graphql.schema_created_at", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + }, + "dgraph.graphql.schema_history": { + "groupId": 1, + "predicate": "dgraph.graphql.schema_history", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + }, + "dgraph.graphql.xid": { + "groupId": 1, + "predicate": "dgraph.graphql.xid", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + }, + "dgraph.type": { + "groupId": 1, + "predicate": "dgraph.type", + "force": false, + "space": "0", + "remove": false, + "readOnly": false, + "moveTs": "0" + } + }, + "snapshotTs": "22", + "checksum": "18099480229465877561" + } + }, + "zeros": { + "1": { + "id": "1", + "groupId": 0, + "addr": "zero1:5080", + "leader": true, + "amDead": false, + "lastUpdate": "0", + "clusterInfoOnly": false, + "forceGroupId": false + }, + "2": { + "id": "2", + "groupId": 0, + "addr": "zero2:5082", + "leader": false, + "amDead": false, + "lastUpdate": "0", + "clusterInfoOnly": false, + "forceGroupId": false + }, + "3": { + "id": "3", + "groupId": 0, + "addr": "zero3:5083", + "leader": false, + "amDead": false, + "lastUpdate": "0", + "clusterInfoOnly": false, + "forceGroupId": false + } + }, + "maxUID": "10000", + "maxTxnTs": "10000", + "maxRaftId": "3", + "removed": [], + "cid": "2571d268-b574-41fa-ae5e-a6f8da175d6d", + "license": { + "user": "", + "maxNodes": "18446744073709551615", + "expiryTs": "1605942487", + "enabled": true + } +} +``` + +This JSON provides information that includes the following, with node members +shown with their node name and HTTP port number: + +- Group 1 members: + - alpha2:7082, id: 1, leader + - alpha1:7080, id: 2 + - alpha3:7083, id: 3 +- Group 0 members (Dgraph Zero nodes) + - zero1:5080, id: 1, leader + - zero2:5082, id: 2 + - zero3:5083, id: 3 +- `maxUID` + - The current maximum lease of UIDs used for blank node UID assignment. + - This increments in batches of 10,000 IDs. Once the maximum lease is reached, + another 10,000 IDs are leased. In the event that the Zero leader is lost, + the new leader starts a new lease from `maxUID`+1. Any UIDs lost between + these leases will never be used for blank-node UID assignment. + - An admin can use the Zero endpoint HTTP GET `/assign?what=uids&num=1000` to + reserve a range of UIDs (in this case, 1000) to use externally. Zero will + **never** use these UIDs for blank node UID assignment, so the user can use + the range to assign UIDs manually to their own data sets. +- `maxTxnTs` + - The current maximum lease of transaction timestamps used to hand out start + timestamps and commit timestamps. This increments in batches of 10,000 IDs. + After the max lease is reached, another 10,000 IDs are leased. If the Zero + leader is lost, then the new leader starts a new lease from `maxTxnTs`+1 . + Any lost transaction IDs between these leases will never be used. + - An admin can use the Zero endpoint HTTP GET + `/assign?what=timestamps&num=1000` to increase the current transaction + timestamp (in this case, by 1000). This is mainly useful in special-case + scenarios; for example, using an existing `-p directory` to create a fresh + cluster to be able to query the latest data in the DB. +- `maxRaftId` + - The number of Zeros available to serve as a leader node. Used by the + [RAFT](/design-concepts/raft/) consensus algorithm. +- `CID` + - This is a unique UUID representing the _cluster-ID_ for this cluster. It is + generated during the initial DB startup and is retained across restarts. +- Enterprise license + - Enabled + - `maxNodes`: unlimited + - License expiration, shown in seconds since the Unix epoch. + + + The terms "tablet", "predicate", and "edge" are currently synonymous. In + future, Dgraph might improve data scalability to shard a predicate into + separate tablets that can be assigned to different groups. + diff --git a/dgraph/reference/deploy/index.mdx b/dgraph/reference/deploy/index.mdx new file mode 100644 index 00000000..a8ef2a27 --- /dev/null +++ b/dgraph/reference/deploy/index.mdx @@ -0,0 +1,32 @@ +--- +title: Self-Managed Cluster +--- + +You can deploy and manage Dgraph database in a variety of self-managed +deployment scenarios, including: + +- Running Dgraph on your on-premises infrastructure (bare-metal physical + servers) +- Running Dgraph on your cloud infrastructure (AWS, GCP and Azure) + +This section focuses exclusively on deployment and management for these +self-managed scenarios. To learn about fully-managed options that let you focus +on building apps and websites, rather than managing infrastructure, see the +[Dgraph cloud services docs](https://dgraph.io/docs/cloud/), or +[Try Dgraph Cloud](https://cloud.dgraph.io/). + +A Dgraph cluster consists of the following: + +- **Dgraph Alpha database server nodes**: The Dgraph Alpha server nodes in your + deployment host and serve data. These nodes also host an `/admin` HTTP and + GRPC endpoint that can be used for data and node administration tasks such as + backup, export, draining, and shutdown. +- **Dgraph Zero management server nodes**: The Dgraph Zero nodes in your + deployment control the nodes in your Dgraph cluster. Dgraph Zero automatically + moves data between different Dgraph Alpha instances based on the volume of + data served by each Alpha instance. + +You need at least one node of each type to run Dgraph. You need three nodes of +each type to run Dgraph in a high-availability (HA) cluster configuration. To +learn more about 2-node and 6-node deployment options, see the +[Production Checklist](./installation/production-checklist). diff --git a/dgraph/reference/deploy/installation/download.mdx b/dgraph/reference/deploy/installation/download.mdx new file mode 100644 index 00000000..99b3b55a --- /dev/null +++ b/dgraph/reference/deploy/installation/download.mdx @@ -0,0 +1,83 @@ +--- +title: Download +description: + Download the images and source files to build and install for a + production-ready Dgraph cluster +--- + +You can obtain Dgraph binary for the latest version as well as previous releases +using automatic install script, manual download, through Docker images or by +building the binary from the open source code. + + + + + +1. Install Docker. + +1. Pull the latest Dgraph image using docker: + + ```sh + docker pull dgraph/dgraph:latest + ``` + + To set up a [learning environment](./single-host-setup), you may pull the + [dgraph standalone](https://hub.docker.com/r/dgraph/standalone) image : + + ```sh + docker pull dgraph/standalone:latest + ``` + +1. Verify that the image is downloaded: + + ```sh + docker images + ``` + + + + + +On linux system, you can get the binary using the automatic script: + +1. Download the Dgraph installation script to install Dgraph automatically: + + ```sh + curl https://get.dgraph.io -sSf | bash + ``` + +1. Verify that it works fine, by running: ` dgraph version ` For more + information about the various installation scripts that you can use, see + [install scripts](https://github.com/dgraph-io/Install-Dgraph). + + + + + +On linux system, you can download a tar file and install manually. Download the +appropriate tar for your platform from +**[Dgraph releases](https://github.com/dgraph-io/dgraph/releases)**. After +downloading the tar for your platform from Github, extract the binary to +`/usr/local/bin` like so. + +1. Download the installation file: + + ```sh + sudo tar -C /usr/local/bin -xzf dgraph-linux-amd64-VERSION.tar.gz + ``` + +1. Verify that it works fine, by running: ` dgraph version ` + + + + + +You can also build **Dgraph** and **Ratel UI** from the source code by following +the instructions from +[Contributing to Dgraph](https://github.com/dgraph-io/dgraph/blob/master/CONTRIBUTING.md) +or +[Building and running ratel](https://github.com/dgraph-io/ratel/blob/master/INSTRUCTIONS.md). + + + + diff --git a/dgraph/reference/deploy/installation/index.mdx b/dgraph/reference/deploy/installation/index.mdx new file mode 100644 index 00000000..6607c2de --- /dev/null +++ b/dgraph/reference/deploy/installation/index.mdx @@ -0,0 +1,7 @@ +--- +title: Installation +--- + +This section is about installing Dgraph in dev or hobbyist environment as well +as production environments with HA and horizontal scalability using multiple +Alpha nodes in a cluster. diff --git a/dgraph/reference/deploy/installation/kubernetes/cluster-types.mdx b/dgraph/reference/deploy/installation/kubernetes/cluster-types.mdx new file mode 100644 index 00000000..943ac6e8 --- /dev/null +++ b/dgraph/reference/deploy/installation/kubernetes/cluster-types.mdx @@ -0,0 +1,175 @@ +--- +title: Cluster Types +--- + +### Terminology + +An **N-node cluster** is a Dgraph cluster that contains N number of Dgraph +instances. For example, a 6-node cluster means six Dgraph instances. The +**replication setting** specifies the number of Dgraph Alpha replicas that are +in each group. If this is higher than 1, each alpha in a group will hold a full +copy of that group's data. The replication setting is a configuration flag +(`--replicas`) on Dgraph Zero. Sharding is done (typically for databases near +1TB in size) by creating multiple **Dgraph Alpha groups**. Every Dgraph Alpha +group is automatically assigned a set of distinct predicates to store and serve, +thus dividing up the data. + +Examples of different cluster settings: + +- No sharding + - 2-node cluster: 1 Zero, 1 Alpha (one group). + - HA equivalent: x3 = 6-node cluster. +- With 2-way sharding: + - 3-node cluster: 1 Zero, 2 Alphas (two groups). + - HA equivalent: x3 = 9-node cluster. + +In the following examples we outline the two most common cluster configurations: +a 2-node cluster and a 6-node cluster. + +### Basic setup: 2-node cluster + +We provide sample configs for both +[Docker Compose](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/docker/docker-compose.yml) +and +[Kubernetes](https://github.com/dgraph-io/dgraph/tree/main/contrib/config/kubernetes/dgraph-single) +for a 2-node cluster. You can also run Dgraph directly on your host machines. + +![2-node cluster](/images/deploy-guide-1.png) + +Configuration can be set either as command-line flags, environment variables, or +in a config file (see [Config](./deploy/config)). + +Dgraph Zero: + +- The `--my` flag should be set to the address:port (the internal-gRPC port) + that will be accessible to the Dgraph Alpha (default: `localhost:5080`). +- The `--raft` superflag's `idx` option should be set to a unique Raft ID within + the Dgraph Zero group (default: `1`). +- The `--wal` flag should be set to the directory path to store write-ahead-log + entries on disk (default: `zw`). +- The `--bindall` flag should be set to true for machine-to-machine + communication (default: `true`). +- Recommended: For better issue diagnostics, set the log level verbosity to 2 + with the option `--v=2`. + +Dgraph Alpha: + +- The `--my` flag should be set to the address:port (the internal-gRPC port) + that will be accessible to the Dgraph Zero (default: `localhost:7080`). +- The `--zero` flag should be set to the corresponding Zero address set for + Dgraph Zero's `--my` flag. +- The `--postings` flag should be set to the directory path for data storage + (default: `p`). +- The `--wal` flag should be set to the directory path for write-ahead-log + entries (default: `w`) +- The `--bindall` flag should be set to true for machine-to-machine + communication (default: `true`). +- Recommended: For better issue diagnostics, set the log level verbosity to 2 + `--v=2`. + +### HA setup: 6-node cluster + +We provide sample configs for both +[Docker Compose](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/docker/docker-compose-ha.yml) +and +[Kubernetes](https://github.com/dgraph-io/dgraph/tree/main/contrib/config/kubernetes/dgraph-ha) +for a 6-node cluster with 3 Alpha replicas per group. You can also run Dgraph +directly on your host machines. + +A Dgraph cluster can be configured in a high-availability setup with Dgraph Zero +and Dgraph Alpha each set up with peers. These peers are part of Raft consensus +groups, which elect a single leader among themselves. The non-leader peers are +called followers. In the event that the peers cannot communicate with the leader +(e.g., a network partition or a machine shuts down), the group automatically +elects a new leader to continue. + +Configuration can be set either as command-line flags, environment variables, or +in a config file (see [Config](./deploy/config)). + +In this setup, we assume the following hostnames are set: + +- `zero1` +- `zero2` +- `zero3` +- `alpha1` +- `alpha2` +- `alpha3` + +We will configure the cluster with 3 Alpha replicas per group. The cluster +group-membership topology will look like the following: + +![Dgraph cluster image](/images/deploy-guide-2.png) + +#### Set up Dgraph Zero group + +In the Dgraph Zero group you must set unique Raft IDs (`--raft` superflag's +`idx` option) per Dgraph Zero. Dgraph will not auto-assign Raft IDs to Dgraph +Zero instances. + +The first Dgraph Zero that starts will initiate the database cluster. Any +following Dgraph Zero instances must connect to the cluster via the `--peer` +flag to join. If the `--peer` flag is omitted from the peers, then the Dgraph +Zero will create its own independent Dgraph cluster. + +**First Dgraph Zero** example: +`dgraph zero --replicas=3 --raft idx=1 --my=zero1:5080` + +The `--my` flag must be set to the address:port of this instance that peers will +connect to. The `--raft` superflag's `idx` option sets its Raft ID to `1`. + +**Second Dgraph Zero** example: +`dgraph zero --replicas=3 --raft idx=2 --my=zero2:5080 --peer=zero1:5080` + +The `--my` flag must be set to the address:port of this instance that peers will +connect to. The `--raft` superflag's `idx` option sets its Raft ID to 2, and the +`--peer` flag specifies a request to connect to the Dgraph cluster of zero1 +instead of initializing a new one. + +**Third Dgraph Zero** example: +`dgraph zero --replicas=3 --raft idx=3 --my=zero3:5080 --peer=zero1:5080`: + +The `--my` flag must be set to the address:port of this instance that peers will +connect to. The `--raft` superflag's `idx` option sets its Raft ID to 3, and the +`--peer` flag specifies a request to connect to the Dgraph cluster of zero1 +instead of initializing a new one. + +Dgraph Zero configuration options: + +- The `--my` flag should be set to the address:port (the internal-gRPC port) + that will be accessible to Dgraph Alpha (default: `localhost:5080`). +- The `--raft` superflag's `idx` option should be set to a unique Raft ID within + the Dgraph Zero group (default: `1`). +- The `--wal` flag should be set to the directory path to store write-ahead-log + entries on disk (default: `zw`). +- The `--bindall` flag should be set to true for machine-to-machine + communication (default: `true`). +- Recommended: For more informative log info, set the log level verbosity to 2 + with the option `--v=2`. + +#### Set up Dgraph Alpha group + +The number of replica members per Alpha group depends on the setting of Dgraph +Zero's `--replicas` flag. Above, it is set to 3. So when Dgraph Alphas join the +cluster, Dgraph Zero will assign it to an Alpha group to fill in its members up +to the limit per group set by the `--replicas` flag. + +First Alpha example: `dgraph alpha --my=alpha1:7080 --zero=zero1:5080` + +Second Alpha example: `dgraph alpha --my=alpha2:7080 --zero=zero1:5080` + +Third Alpha example: `dgraph alpha --my=alpha3:7080 --zero=zero1:5080` + +Dgraph Alpha configuration options: + +- The `--my` flag should be set to the address:port (the internal-gRPC port) + that will be accessible to the Dgraph Zero (default: `localhost:7080`). +- The `--zero` flag should be set to the corresponding Zero address set for + Dgraph Zero's `--my`flag. +- The `--postings` flag should be set to the directory path for data storage + (default: `p`). +- The `--wal` flag should be set to the directory path for write-ahead-log + entries (default: `w`) +- The `--bindall` flag should be set to true for machine-to-machine + communication (default: `true`). +- Recommended: For more informative log info, set the log level verbosity to 2 + `--v=2`. diff --git a/dgraph/reference/deploy/installation/kubernetes/ha-cluster.mdx b/dgraph/reference/deploy/installation/kubernetes/ha-cluster.mdx new file mode 100644 index 00000000..5b667d14 --- /dev/null +++ b/dgraph/reference/deploy/installation/kubernetes/ha-cluster.mdx @@ -0,0 +1,418 @@ +--- +title: Highly Available Cluster Setup +--- + +You can run three Dgraph Alpha servers and three Dgraph Zero servers in a highly +available cluster setup. For a highly available setup, start the Dgraph Zero +server with `--replicas 3` flag, so that all data is replicated on three Alpha +servers and forms one Alpha group. You can install a highly available cluster +using: + +- [dgraph-ha.yaml](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/kubernetes/dgraph-ha/dgraph-ha.yaml) + file +- Helm charts. + +### Install a highly available Dgraph cluster using YAML or Helm + + + + + +#### Before you begin: + +- Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). +- Ensure that you have a production-ready Kubernetes cluster with at least three + worker nodes running in a cloud provider of your choice. +- (Optional) To run Dgraph Alpha with TLS, see + [TLS Configuration](/tls-configuration). + +#### Installing a highly available Dgraph cluster + +1. Verify that you are able to access the nodes in the Kubernetes cluster: + + ```bash + kubectl get nodes + ``` + + An output similar to this appears: + + ```bash + NAME STATUS ROLES AGE VERSION + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ``` + + After your Kubernetes cluster is up, you can use + [dgraph-ha.yaml](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/kubernetes/dgraph-ha/dgraph-ha.yaml) + to start the cluster. + +1. Start a StatefulSet that creates Pods with `Zero`, `Alpha`, and `Ratel UI`: + + ```bash + kubectl create --filename https://raw.githubusercontent.com/dgraph-io/dgraph/main/contrib/config/kubernetes/dgraph-ha/dgraph-ha.yaml + ``` + + An output similar to this appears: + + ```bash + service/dgraph-zero-public created + service/dgraph-alpha-public created + service/dgraph-ratel-public created + service/dgraph-zero created + service/dgraph-alpha created + statefulset.apps/dgraph-zero created + statefulset.apps/dgraph-alpha created + deployment.apps/dgraph-ratel created + ``` + +1. Confirm that the Pods were created successfully. + + ```bash + kubectl get pods + ``` + + An output similar to this appears: + + ```bash + NAME READY STATUS RESTARTS AGE + dgraph-alpha-0 1/1 Running 0 6m24s + dgraph-alpha-1 1/1 Running 0 5m42s + dgraph-alpha-2 1/1 Running 0 5m2s + dgraph-ratel- 1/1 Running 0 6m23s + dgraph-zero-0 1/1 Running 0 6m24s + dgraph-zero-1 1/1 Running 0 5m41s + dgraph-zero-2 1/1 Running 0 5m6s + ``` + + You can check the logs for the Pod using + `kubectl logs --follow `.. + +1. Port forward from your local machine to the Pod: + + ```bash + kubectl port-forward service/dgraph-alpha-public 8080:8080 + kubectl port-forward service/dgraph-ratel-public 8000:8000 + ``` + +1. Go to `http://localhost:8000` to access Dgraph using the Ratel UI. + + You can also access the service on its External IP address. + +#### Deleting highly available Dgraph resources + +Delete all the resources using: + +```sh +kubectl delete --filename https://raw.githubusercontent.com/dgraph-io/dgraph/main/contrib/config/kubernetes/dgraph-ha/dgraph-ha.yaml +kubectl delete persistentvolumeclaims --selector app=dgraph-zero +kubectl delete persistentvolumeclaims --selector app=dgraph-alpha +``` + + + + + +#### Before you begin + +- Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). +- Ensure that you have a production-ready Kubernetes cluster with atleast three + worker nodes running in a cloud provider of your choice. +- Install [Helm](https://helm.sh/docs/intro/install/). +- (Optional) To run Dgraph Alpha with TLS, see + [TLS Configuration](/tls-configuration). + +#### Installing a highly available Dgraph cluster using Helm + +1. Verify that you are able to access the nodes in the Kubernetes cluster: + + ```bash + kubectl get nodes + ``` + + An output similar to this appears: + + ```bash + NAME STATUS ROLES AGE VERSION + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ``` + + After your Kubernetes cluster is up and running, you can use of the + [Dgraph Helm chart](https://github.com/dgraph-io/charts/) to install a + highly available Dgraph cluster + +1. Add the Dgraph helm repository:: + + ```bash + helm repo add dgraph https://charts.dgraph.io + ``` + +1. Install the chart with ``: + + ```bash + helm install dgraph/dgraph + ``` + + You can also specify the version using: + + ```bash + helm install dgraph/dgraph --set image.tag="[version]" + ``` + + When configuring the Dgraph image tag, be careful not to use `latest` or + `main` in a production environment. These tags may have the Dgraph version + change, causing a mixed-version Dgraph cluster that can lead to an outage + and potential data loss. + + An output similar to this appears: + + ```bash + NAME: + LAST DEPLOYED: Wed Feb 1 21:26:32 2023 + NAMESPACE: default + STATUS: deployed + REVISION: 1 + TEST SUITE: None + NOTES: + 1. You have just deployed Dgraph, version 'v21.12.0'. + + For further information: + * Documentation: https://dgraph.io/docs/ + * Community and Issues: https://discuss.dgraph.io/ + 2. Get the Dgraph Alpha HTTP/S endpoint by running these commands. + export ALPHA_POD_NAME=$(kubectl get pods --namespace default --selector "statefulset.kubernetes.io/pod-name=-dgraph-alpha-0,release=-dgraph" --output jsonpath="{.items[0].metadata.name}") + echo "Access Alpha HTTP/S using http://localhost:8080" + kubectl --namespace default port-forward $ALPHA_POD_NAME 8080:8080 + + NOTE: Change "http://" to "https://" if TLS was added to the Ingress, Load Balancer, or Dgraph Alpha service. + ``` + +1. Get the name of the Pods in the cluster using `kubectl get pods`: + + ```bash + NAME READY STATUS RESTARTS AGE + -dgraph-alpha-0 1/1 Running 0 4m48s + -dgraph-alpha-1 1/1 Running 0 4m2s + -dgraph-alpha-2 1/1 Running 0 3m31s + -dgraph-zero-0 1/1 Running 0 4m48s + -dgraph-zero-1 1/1 Running 0 4m10s + -dgraph-zero-2 1/1 Running 0 3m50s + + ``` + +1. Get the Dgraph Alpha HTTP/S endpoint by running these commands: + ```bash + export ALPHA_POD_NAME=$(kubectl get pods --namespace default --selector "statefulset.kubernetes.io/pod-name=-dgraph-alpha-0,release=-dgraph" --output jsonpath="{.items[0].metadata.name}") + echo "Access Alpha HTTP/S using http://localhost:8080" + kubectl --namespace default port-forward $ALPHA_POD_NAME 8080:8080 + ``` + +#### Deleting the resources from the cluster + +1. Delete the Helm deployment using: + + ```sh + helm delete my-release + ``` + +2. Delete associated Persistent Volume Claims: + + ```sh + kubectl delete pvc --selector release=my-release + ``` + + + + + +### Dgraph configuration files + +You can create a Dgraph [Config](./deploy/config) files for Alpha server and +Zero server with Helm chart configuration values, ``. For more +information about the values, see the latest +[configuration settings](https://github.com/dgraph-io/charts/blob/master/charts/dgraph/README.md#configuration). + +1. Open an editor of your choice and create a config file named + `.yaml`: + +```yaml +# .yaml +alpha: + configFile: + config.yaml: | + alsologtostderr: true + badger: + compression_level: 3 + tables: mmap + vlog: mmap + postings: /dgraph/data/p + wal: /dgraph/data/w +zero: + configFile: + config.yaml: | + alsologtostderr: true + wal: /dgraph/data/zw +``` + +2. Change to the director in which you created ``.yaml and + then install with Alpha and Zero configuration using: + +```sh +helm install dgraph/dgraph --values .yaml +``` + +### Exposing Alpha and Ratel Services + +By default Zero and Alpha services are exposed only within the Kubernetes +cluster as Kubernetes service type `ClusterIP`. + +In order to expose the Alpha service and Ratel service publicly you can use +Kubernetes service type `LoadBalancer` or an Ingress resource. + + + + + +##### Public Internet + +To use an external load balancer, set the service type to `LoadBalancer`. + + + For security purposes we recommend limiting access to any public endpoints, + such as using a white list. + + +1. To expose Alpha service to the Internet use: + +```sh +helm install dgraph/dgraph --set alpha.service.type="LoadBalancer" +``` + +2. To expose Alpha and Ratel services to the Internet use: + +```sh +helm install dgraph/dgraph --set alpha.service.type="LoadBalancer" --set ratel.service.type="LoadBalancer" +``` + +##### Private Internal Network + +An external load balancer can be configured to face internally to a private +subnet rather the public Internet. This way it can be accessed securely by +clients on the same network, through a VPN, or from a jump server. In +Kubernetes, this is often configured through service annotations by the +provider. Here's a small list of annotations from cloud providers: + +| Provider | Documentation Reference | Annotation | +| ------------ | -------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------- | +| AWS | [Amazon EKS: Load Balancing](https://docs.aws.amazon.com/eks/latest/userguide/load-balancing.html) | `service.beta.kubernetes.io/aws-load-balancer-internal: "true"` | +| Azure | [AKS: Internal Load Balancer](https://docs.microsoft.com/azure/aks/internal-lb) | `service.beta.kubernetes.io/azure-load-balancer-internal: "true"` | +| Google Cloud | [GKE: Internal Load Balancing](https://cloud.google.com/kubernetes-engine/docs/how-to/internal-load-balancing) | `cloud.google.com/load-balancer-type: "Internal"` | + +As an example, using Amazon [EKS](https://aws.amazon.com/eks/) as the provider. + +1. Create a Helm chart configuration values file ``.yaml file: + +```yaml +# .yaml +alpha: + service: + type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-internal: "true" +ratel: + service: + type: LoadBalancer + annotations: + service.beta.kubernetes.io/aws-load-balancer-internal: "true" +``` + +1. To expose Alpha and Ratel services privately, use: + +```sh +helm install dgraph/dgraph --values .yaml +``` + + + + + +You can expose Alpha and Ratel using an +[ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) +resource that can route traffic to service resources. Before using this option +you may need to install an +[ingress controller](https://kubernetes.io/docs/concepts/services-networking/ingress-controllers/) +first, as is the case with [AKS](https://docs.microsoft.com/azure/aks/) and +[EKS](https://aws.amazon.com/eks/), while in the case of +[GKE](https://cloud.google.com/kubernetes-engine), this comes bundled with a +default ingress controller. When routing traffic based on the `hostname`, you +may want to integrate an addon like +[ExternalDNS](https://github.com/kubernetes-sigs/external-dns) so that DNS +records can be registered automatically when deploying Dgraph. + +As an example, you can configure a single ingress resource that uses +[ingress-nginx](https://github.com/kubernetes/ingress-nginx) for Alpha and Ratel +services. + +1. Create a Helm chart configuration values file, ``.yaml + file: + +```yaml +# .yaml +global: + ingress: + enabled: false + annotations: + kubernetes.io/ingress.class: nginx + ratel_hostname: "ratel." + alpha_hostname: "alpha." +``` + +2. To expose Alpha and Ratel services through an ingress: + +```sh +helm install dgraph/dgraph --values .yaml +``` + +You can run `kubectl get ingress` to see the status and access these through +their hostname, such as `http://alpha.` and +`http://ratel.` + + + Ingress controllers will likely have an option to configure access for private + internal networks. Consult documentation from the ingress controller provider + for further information. + + + + + + +### Upgrading the Helm chart + +You can update your cluster configuration by updating the configuration of the +Helm chart. Dgraph is a stateful database that requires some attention on +upgrading the configuration carefully in order to update your cluster to your +desired configuration. + +In general, you can use [`helm upgrade`][helm-upgrade] to update the +configuration values of the cluster. Depending on your change, you may need to +upgrade the configuration in multiple steps. + +[helm-upgrade]: https://helm.sh/docs/helm/helm_upgrade/ + +To upgrade to an [HA cluster setup](./#ha-cluster-setup-using-kubernetes): + +1. Ensure that the shard replication setting is more than one and + `zero.shardReplicaCount`. For example, set the shard replica flag on the Zero + node group to 3,`zero.shardReplicaCount=3`. +2. Run the Helm upgrade command to restart the Zero node group: + ```sh + helm upgrade dgraph/dgraph [options] + ``` +3. Set the Alpha replica count flag. For example: `alpha.replicaCount=3`. +4. Run the Helm upgrade command again: + ```sh + helm upgrade dgraph/dgraph [options] + ``` diff --git a/dgraph/reference/deploy/installation/kubernetes/index.mdx b/dgraph/reference/deploy/installation/kubernetes/index.mdx new file mode 100644 index 00000000..6a64f2a5 --- /dev/null +++ b/dgraph/reference/deploy/installation/kubernetes/index.mdx @@ -0,0 +1,3 @@ +--- +title: Production Environment +--- diff --git a/dgraph/reference/deploy/installation/kubernetes/monitoring-cluster.mdx b/dgraph/reference/deploy/installation/kubernetes/monitoring-cluster.mdx new file mode 100644 index 00000000..fc231e92 --- /dev/null +++ b/dgraph/reference/deploy/installation/kubernetes/monitoring-cluster.mdx @@ -0,0 +1,343 @@ +--- +title: Monitoring the Cluster +--- + +## Monitoring the Kubernetes Cluster + +Dgraph exposes Prometheus metrics to monitor the state of various components +involved in the cluster, including Dgraph Alpha and Zero nodes. You can setup +Prometheus monitoring for your cluster. + +You can use Helm to install +[kube-prometheus-stack](https://github.com/prometheus-operator/kube-prometheus) +chart. This Helm chart is a collection of Kubernetes manifests, +[Grafana](http://grafana.com/) dashboards, +[Prometheus rules](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) +combined with scripts to provide monitoring with +[Prometheus](https://prometheus.io/) using the +[Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). +This Helm chart also installs [Grafana](http://grafana.com/), +[node_exporter](https://github.com/prometheus/node_exporter), +[kube-state-metrics](https://github.com/kubernetes/kube-state-metrics). + +### Before you begin: + +- Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). +- Ensure that you have a production-ready Kubernetes cluster with at least three + worker nodes running in a cloud provider of your choice. +- Install [Helm](https://helm.sh/docs/intro/install/) + +### Install using Helm Chart + +1. Create a `YAML` file named `dgraph-prometheus-operator.yaml` and edit the + values as appropriate for adding endpoints, adding alert rules, adjusting + alert manager configuration, adding Grafana dashboard, and others. For more + information see, + [Dgraph helm chart values](https://github.com/dgraph-io/dgraph/tree/main/contrib/config/monitoring/prometheus/chart-values). + + ```yaml + prometheusOperator: + createCustomResource: true + + grafana: + enabled: true + persistence: + enabled: true + accessModes: ["ReadWriteOnce"] + size: 5Gi + defaultDashboardsEnabled: true + service: + type: ClusterIP + + alertmanager: + service: + labels: + app: dgraph-io + alertmanagerSpec: + storage: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + replicas: 1 + logLevel: debug + config: + global: + resolve_timeout: 2m + route: + group_by: ['job'] + group_wait: 30s + group_interval: 5m + repeat_interval: 12h + receiver: 'null' + routes: + - match: + alertname: Watchdog + receiver: 'null' + receivers: + - name: 'null' + + prometheus: + service: + type: ClusterIP + serviceAccount: + create: true + name: prometheus-dgraph-io + + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 25Gi + resources: + requests: + memory: 400Mi + enableAdminAPI: false + + additionalServiceMonitors: + - name: zero-dgraph-io + endpoints: + - port: http-zero + path: /debug/prometheus_metrics + namespaceSelector: + any: true + selector: + matchLabels: + monitor: zero-dgraph-io + - name: alpha-dgraph-io + endpoints: + - port: http-alpha + path: /debug/prometheus_metrics + namespaceSelector: + any: true + selector: + matchLabels: + monitor: alpha-dgraph-io + ``` + +1. Create a `YAML` file named `secrets.yaml` that has the credentials for + Grafana. + + ```yaml + grafana: + adminPassword: + ``` + +1. Add the `prometheus-operator` Helm chart: + + ```bash + helm repo add stable https://charts.helm.sh/stable + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo update + ``` + +1. Install + [kube-prometheus-stack](https://github.com/prometheus-operator/kube-prometheus) + with the `` in the namescape named `monitoring`: + + ```bash + helm install \ + --values dgraph-prometheus-operator.yaml \ + --values secrets.yaml \ + prometheus-community/kube-prometheus-stack --namespace monitoring + ``` + + An output similar to the following appears: + + ```bash + NAME: dgraph-prometheus-release + LAST DEPLOYED: Sun Feb 5 21:35:45 2023 + NAMESPACE: monitoring + STATUS: deployed + REVISION: 1 + NOTES: + kube-prometheus-stack has been installed. Check its status by running: + kubectl --namespace monitoring get pods -l "release=dgraph-prometheus-release" + + Visit https://github.com/prometheus-operator/kube-prometheus instructions on how to create & configure Alertmanager and Prometheus instances using the Operator. + ``` + +1. Check the list of services in the `monitoring` namespace using + `kubectl get svc -n monitoring`: + + ```bash + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE + alertmanager-operated ClusterIP None 9093/TCP,9094/TCP,9094/UDP 29s + dgraph-prometheus-release-alertmanager ClusterIP 10.128.239.240 9093/TCP 32s + dgraph-prometheus-release-grafana ClusterIP 10.128.213.70 80/TCP 32s + dgraph-prometheus-release-kube-state-metrics ClusterIP 10.128.139.145 8080/TCP 32s + dgraph-prometheus-release-operator ClusterIP 10.128.6.5 443/TCP 32s + dgraph-prometheus-release-prometheus ClusterIP 10.128.255.88 9090/TCP 32s + dgraph-prometheus-release-prometheus-node-exporter ClusterIP 10.128.103.131 9100/TCP 32s + prometheus-operated ClusterIP None 9090/TCP 29s + + ``` + +1. Use + `kubectl port-forward svc/dgraph-prometheus-release-prometheus -n monitoring 9090` + to access Prometheus at `localhost:9090`. +1. Use `kubectl --namespace monitoring port-forward svc/grafana 3000:80` to + access Grafana at `localhost:3000`. +1. Log in to Grafna using the password that you had set in the `secrets.yaml` + file. +1. In the **Dashboards** menu of Grafana, select **Import**. +1. In the **Dashboards/Import dashboard** page copy the contents of the + [dgraph-kubernetes-grafana-dashboard.json](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/monitoring/grafana/dgraph-kubernetes-grafana-dashboard.json) + file in **Import via panel json** and click **Load**. + + You can visualize all Dgraph Alpha and Zero Kubernetes Pods, using the regex + pattern `"/dgraph-.*-[0-9]*$/`. You can change this in the dashboard + configuration and select the variable Pod. For example, if you have multiple + releases, and only want to visualize the current release named + `my-release-3`, change the regex pattern to + `"/my-release-3.*dgraph-.*-[0-9]*$/"` in the Pod variable of the dashboard + configuration. By default, the Prometheus that you installed is configured as + the `Datasource` in Grafana. + +## Kubernetes Storage + +The Kubernetes configurations in the previous sections were configured to run +Dgraph with any storage type (`storage-class: anything`). On the common cloud +environments like AWS, GCP, and Azure, the default storage type are slow disks +like hard disks or low IOPS SSDs. We highly recommend using faster disks for +ideal performance when running Dgraph. + +### Local storage + +The AWS storage-optimized i-class instances provide locally attached NVMe-based +SSD storage which provide consistent very high IOPS. The Dgraph team uses +i3.large instances on AWS to test Dgraph. + +You can create a Kubernetes `StorageClass` object to provision a specific type +of storage volume which you can then attach to your Dgraph Pods. You can set up +your cluster with local SSDs by using +[Local Persistent Volumes](https://kubernetes.io/blog/2018/04/13/local-persistent-volumes-beta/). +This Kubernetes feature is in beta at the time of this writing (Kubernetes +v1.13.1). You can first set up an EC2 instance with locally attached storage. +Once it is formatted and mounted properly, then you can create a StorageClass to +access it.: + +```yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +``` + +Currently, Kubernetes does not allow automatic provisioning of local storage. So +a PersistentVolume with a specific mount path should be created: + +```yaml +apiVersion: v1 +kind: PersistentVolume +metadata: + name: +spec: + capacity: + storage: 475Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: + local: + path: /data + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - +``` + +Then, in the StatefulSet configuration you can claim this local storage in +.spec.volumeClaimTemplate: + +```yaml +kind: StatefulSet +--- +volumeClaimTemplates: + - metadata: + name: datadir + spec: + accessModes: + - ReadWriteOnce + storageClassName: + resources: + requests: + storage: 500Gi +``` + +You can repeat these steps for each instance that's configured with local node +storage. + +### Non-local persistent disks + +EBS volumes on AWS and PDs on GCP are persistent disks that can be configured +with Dgraph. The disk performance is much lower than locally attached storage +but can be sufficient for your workload such as testing environments. + +When using EBS volumes on AWS, we recommend using Provisioned IOPS SSD EBS +volumes (the io1 disk type) which provide consistent IOPS. The available IOPS +for AWS EBS volumes is based on the total disk size. With Kubernetes, you can +request io1 disks to be provisioned with this config with 50 IOPS/GB using the +`iopsPerGB` parameter: + +``` +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: +provisioner: kubernetes.io/aws-ebs +parameters: + type: io1 + iopsPerGB: "50" + fsType: ext4 +``` + +Example: Requesting a disk size of 250Gi with this storage class would provide +12.5K IOPS. + +## Removing a Dgraph Pod + +In the event that you need to completely remove a Pod (e.g., its disk got +corrupted and data cannot be recovered), you can use the `/removeNode` API to +remove the node from the cluster. With a Kubernetes StatefulSet, you'll need to +remove the node in this order: + +1. On the Zero leader, call `/removeNode` to remove the Dgraph instance from the + cluster (see [More about Dgraph Zero](.//deploy/dgraph-zero)). The removed + instance will immediately stop running. Any further attempts to join the + cluster will fail for that instance since it has been removed. +2. Remove the PersistentVolumeClaim associated with the Pod to delete its data. + This prepares the Pod to join with a clean state. +3. Restart the Pod. This will create a new PersistentVolumeClaim to create new + data directories. + +When an Alpha Pod restarts in a replicated cluster, it will join as a new member +of the cluster, be assigned a group and an unused index from Zero, and receive +the latest snapshot from the Alpha leader of the group. + +When a Zero Pod restarts, it must join the existing group with an unused index +ID. You set the index ID with the `--raft` superflag's `idx` option. This might +require you to update the StatefulSet configuration. + +## Kubernetes and Bulk Loader + +You may want to initialize a new cluster with an existing data set such as data +from the [Dgraph Bulk Loader](./bulk-loader). You can use +[Init Containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) +to copy the data to the Pod volume before the Alpha process runs. + +See the `initContainers` configuration in +[dgraph-ha.yaml](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/kubernetes/dgraph-ha/dgraph-ha.yaml) +to learn more. diff --git a/dgraph/reference/deploy/installation/kubernetes/single-server-cluster.mdx b/dgraph/reference/deploy/installation/kubernetes/single-server-cluster.mdx new file mode 100644 index 00000000..51c61eda --- /dev/null +++ b/dgraph/reference/deploy/installation/kubernetes/single-server-cluster.mdx @@ -0,0 +1,95 @@ +--- +title: Single Server Cluster Setup +--- + +You can install a single server Dgraph cluster in Kubernetes. + +## Before you begin + +- Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). +- Ensure that you have a production-ready Kubernetes cluster running in a cloud + provider of your choice. +- (Optional) To run Dgraph Alpha with TLS, see + [TLS Configuration](/tls-configuration). + +## Installing a single server Dgraph cluster + +1. Verify that you are able to access the nodes in the Kubernetes cluster: + + ```bash + kubectl get nodes + ``` + + An output similar to this appears: + + ```bash + NAME STATUS ROLES AGE VERSION + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ..compute.internal Ready 1m v1.15.11-eks-af3caf + ``` + + After your Kubernetes cluster is up, you can use + [dgraph-single.yaml](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/kubernetes/dgraph-single/dgraph-single.yaml) + to start a Zero, Alpha, and Ratel UI services. + +1. Start a StatefulSet that creates a single Pod with `Zero`, `Alpha`, and + `Ratel UI`: + + ```bash + kubectl create --filename https://raw.githubusercontent.com/dgraph-io/dgraph/main/contrib/config/kubernetes/dgraph-single/dgraph-single.yaml + ``` + + An output similar to this appears: + + ```bash + service/dgraph-public created + statefulset.apps/dgraph created + ``` + +1. Confirm that the Pod was created successfully. + + ```bash + kubectl get pods + ``` + + An output similar to this appears: + + ```bash + NAME READY STATUS RESTARTS AGE + dgraph-0 3/3 Running 0 1m + ``` + +1. List the containers running in the Pod `dgraph-0`: + + ```bash + kubectl get pods dgraph-0 -o jsonpath='{range .spec.containers[*]}{.name}{"\n"}{end}' + ``` + + An output similar to this appears: + + ```bash + ratel + zero + alpha + ``` + + You can check the logs for the containers in the pod using + `kubectl logs --follow dgraph-0 `. + +1. Port forward from your local machine to the Pod: + + ```bash + kubectl port-forward pod/dgraph-0 8080:8080 + kubectl port-forward pod/dgraph-0 8000:8000 + ``` + +1. Go to `http://localhost:8000` to access Dgraph using the Ratel UI. + +## Deleting Dgraph single server resources + +Delete all the resources using: + +```sh +kubectl delete --filename https://raw.githubusercontent.com/dgraph-io/dgraph/main/contrib/config/kubernetes/dgraph-single/dgraph-single.yaml +kubectl delete persistentvolumeclaims --selector app=dgraph +``` diff --git a/dgraph/reference/deploy/installation/lambda-server.mdx b/dgraph/reference/deploy/installation/lambda-server.mdx new file mode 100644 index 00000000..3a9fa4a7 --- /dev/null +++ b/dgraph/reference/deploy/installation/lambda-server.mdx @@ -0,0 +1,110 @@ +--- +title: Lambda Server +description: + Setup a Dgraph database with a lambda server. Dgraph Lambda is a serverless + platform for running JavaScript on Dgraph and Dgraph Cloud +--- + +In this article you'll learn how to setup a Dgraph database with a lambda +server. + +## Dgraph Lambda + +[Dgraph Lambda](https://github.com/dgraph-io/dgraph-lambda) is a serverless +platform for running JavaScript on Dgraph and +[Dgraph Cloud](https://dgraph.io/cloud). + +You can +[download the latest version](https://github.com/dgraph-io/dgraph-lambda/releases/latest) +or review the implementation in our +[open-source repository](https://github.com/dgraph-io/dgraph-lambda). + +### Running with Docker + +To run a Dgraph Lambda server with Docker: + +```bash +docker run -it --rm -p 8686:8686 -v /path/to/script.js:/app/script/script.js -e DGRAPH_URL=http://host.docker.internal:8080 dgraph/dgraph-lambda +``` + + + `host.docker.internal` doesn't work on older versions of Docker on Linux. You + can use `DGRAPH_URL=http://172.17.0.1:8080` instead. + + +### Adding libraries + +If you would like to add libraries to Dgraph Lambda, use +`webpack --target=webworker` to compile your script. + +### Working with TypeScript + +You can import `@slash-graphql/lambda-types` to get types for +`addGraphQLResolver` and `addGraphQLMultiParentResolver`. + +## Dgraph Alpha + +To set up Dgraph Alpha, you need to define the `--graphql` superflag's +`lambda-url` option, which is used to set the URL of the lambda server. All the +`@lambda` fields will be resolved through the lambda functions implemented on +the given lambda server. + +For example: + +```bash +dgraph alpha --graphql lambda-url=http://localhost:8686/graphql-worker +``` + +Then test it out with the following `curl` command: + +```bash +curl localhost:8686/graphql-worker -H "Content-Type: application/json" -d '{"resolver":"MyType.customField","parent":[{"customField":"Dgraph Labs"}]}' +``` + +### Docker settings + +If you're using Docker, you need to add the `--graphql` superflag's `lambda-url` +option to your Alpha configuration. For example: + +```yml +command: + /gobin/dgraph alpha --zero=zero1:5180 -o 100 --expose_trace --trace ratio=1.0 + --profile_mode block --block_rate 10 --logtostderr -v=2 --security + whitelist=10.0.0.0/8,172.16.0.0/12,192.168.0.0/16 --my=alpha1:7180 --graphql + lambda-url=http://lambda:8686/graphql-worker +``` + +Next, you need to add the Dgraph Lambda server configuration, and map the +JavaScript file that contains the code for lambda functions to the +`/app/script/script.js` file. Remember to set the `DGRAPH_URL` environment +variable to your Alpha server. + +Here's a complete Docker example that uses the base Dgraph image and adds Lambda +server support: + +```yml +services: + dgraph: + image: dgraph/standalone:latest + environment: + DGRAPH_ALPHA_GRAPHQL: "lambda-url=http://dgraph_lambda:8686/graphql-worker" + ports: + - "8080:8080" + - "9080:9080" + - "8000:8000" + volumes: + - dgraph:/dgraph + + dgraph_lambda: + image: dgraph/dgraph-lambda:latest + + ports: + - "8686:8686" + environment: + DGRAPH_URL: http://dgraph:8080 + volumes: + - ./gql/script.js:/app/script/script.js:ro + +volumes: + dgraph: {} +``` diff --git a/dgraph/reference/deploy/installation/production-checklist.mdx b/dgraph/reference/deploy/installation/production-checklist.mdx new file mode 100644 index 00000000..e4154c33 --- /dev/null +++ b/dgraph/reference/deploy/installation/production-checklist.mdx @@ -0,0 +1,207 @@ +--- +title: Production Checklist +description: Requirements to install Dgraph in a production environment +--- + +This guide describes important setup recommendations for a production-ready +Dgraph cluster, ensuring high availability with external persistent storage, +automatic recovery of failed services, automatic recovery of failed systems such +as virtual machines, and disaster recovery such as backup/restore or +export/import with automation. + + + In this guide, a node refers to a Dgraph instance unless specified otherwise. + + +A **Dgraph cluster** is comprised of multiple **Dgraph instances** or nodes +connected together to form a single distributed database. A Dgraph instance is +either a **Dgraph Zero** or **Dgraph Alpha**, each of which serves a different +role in the cluster. + +Once installed you may also install or use a **Dgraph client** to communicate +with the database and perform queries, mutations, alter schema operations and so +on. Pure HTTP calls from curl, Postman, or another program are also possible +without a specific client, but there are a range of clients that provide +higher-level language bindings, and which use optimized gRPC for communications +to the database. Any standards-compliant GraphQL client will work with Dgraph to +run GraphQL operations. To run DQL and other Dgraph-specific operations, use a +Dgraph client. + +Dgraph provides official clients for Go, Java, Python, and JavaScript, and C#, +and the JavaScript client supports both gRPC and HTTP to run more easily in a +browser. Community-developed Dgraph clients for other languages are also +available. The full list of clients can be found in [Clients](./clients) page. +One particular client, Dgraph Ratel, is a more sophisticated UI tool used to +visualize queries, run mutations, and manage schemas in both GraphQL and DQL. +Note that clients are not part of a database cluster, and simply connect to one +or more Dgraph Alpha instances. + +### Cluster Requirements + +A minimum of one Dgraph Zero and one Dgraph Alpha is needed for a working +cluster. + +There can be multiple Dgraph Zeros and Dgraph Alphas running in a single +cluster. + +### Machine Requirements + +To ensure predictable performance characteristics, Dgraph instances should +**not** run on "burstable" or throttled machines that limit resources. That +includes t2 class machines on AWS. + +To ensure that Dgraph is highly-available, we recommend each Dgraph instance be +deployed to a different underlying host machine, and ideally that machines are +in different availability zones or racks. In the event of an underlying machine +failure, it is critical that only one Dgraph alpha and one Dgraph zero be +offline so that 2 of the 3 instances in each group maintain a quorum. Also when +using VMs or Docker/K8s, ensure machines are not over-subscribed and ideally not +co-resident with other processes that will interrupt and delay Dgraph +processing. + +If you'd like to run Dgraph with fewer machines, then the recommended +configuration is to run a single Dgraph Zero and a single Dgraph Alpha per +machine. In a high availability setup, that allows the cluster to lose a single +machine (simultaneously losing a Dgraph Zero and a Dgraph Alpha) with continued +availability of the database. + +Do not run multiple Dgraph Zeros or Dgraph Alpha processes on a single machine. +This can affect performance due to shared resource issues and reduce +availability in the event of machine failures. + +### Operating System + +Dgraph is designed to run on Linux. To run Dgraph on Windows and macOS, use the +[standalone Docker image](./dgraph-overview#to-run-dgraph-using-the-standalone-docker-image). + +### CPU and Memory + +We recommend 8 vCPUs or cores on each of three HA alpha instances for production +loads, with 16 GiB+ memory per node. + +You'll want a ensure that your CPU and memory resources are sufficient for your +production workload. A common configuration for Dgraph is 16 CPUs and 32 GiB of +memory per machine. Dgraph is designed with concurrency in mind, so more cores +means quicker processing and higher throughput of requests. + +You may find you'll need more CPU cores and memory for your specific use case. + +In addition, we highly recommend that your CPU clock rate is equal or above +3.4GHz. + +### Disk + +Dgraph instances make heavy use of disks, so storage with high IOPS is highly +recommended to ensure reliable performance. Specifically SSDs, not HDDs. + +Regarding disk IOPS, the recommendation is: + +- 1000 IOPS minimum +- 3000 IOPS for medium and large datasets + +Instances such as c5d.4xlarge have locally-attached NVMe SSDs with high IOPS. +You can also use EBS volumes with provisioned IOPS (io1). If you are not running +performance-critical workloads, you can also choose to use cheaper gp2 EBS +volumes. Typically, AWS +[gp3](https://aws.amazon.com/about-aws/whats-new/2020/12/introducing-new-amazon-ebs-general-purpose-volumes-gp3/?nc1=h_ls) +disks are a good option and have 3000 Baseline IOPS at any disk size. + +Recommended disk sizes for Dgraph Zero and Dgraph Alpha: + +- Dgraph Zero: 200 GB to 300 GB. Dgraph Zero stores cluster metadata information + and maintains a write-ahead log for cluster operations. +- Dgraph Alpha: 250 GB to 750 GB. Dgraph Alpha stores database data, including + the schema, indices, and the data values. It maintains a write-ahead log of + changes to the database. Your cloud provider may provide better disk + performance based on the volume size. +- If you plan to store over 1.1TB per Dgraph Alpha instance, you must increase + either the MaxLevels or TableSizeMultiplier. + +Additional recommendations: + +- The recommended Linux filesystem is ext4. +- Avoid using shared storage such as NFS, CIFS, and CEPH storage. + +### Firewall Rules + +Dgraph instances communicate over several ports. Firewall rules should be +configured appropriately for the ports documented in +[Ports Usage](./ports-usage). + +Internal ports must be accessible by all Zero and Alpha peers for proper +cluster-internal communication. Database clients must be able to connect to +Dgraph Alpha external ports either directly or through a load balancer. + +Dgraph Zeros can be set up in a private network where communication is only with +Dgraph Alphas, database administrators, internal services (such as Prometheus or +Jaeger), and possibly developers (see note below). Dgraph Zero's 6080 external +port is only necessary for database administrators. For example, it can be used +to inspect the cluster metadata (/state), allocate UIDs or set txn timestamps +(/assign), move data shards (/moveTablet), or remove cluster nodes +(/removeNode). The full docs about Zero's administrative tasks are in +[More About Dgraph Zero](./deploy/dgraph-zero). + + + Developers using Dgraph Live Loader or Dgraph Bulk Loader require access to + both Dgraph Zero port 5080 and Dgraph Alpha port 9080. When using those tools, + consider using them within your environment that has network access to both + ports of the cluster. + + +### Operating System Tuning + +The OS should be configured with the recommended settings to ensure that Dgraph +runs properly. + +#### File Descriptors Limit + +Dgraph can use a large number of open file descriptors. Most operating systems +set a default limit that is lower than what is required. + +It is recommended to set the file descriptors limit to unlimited. If that is not +possible, set it to at least a million (1,048,576) which is recommended to +account for cluster growth over time. + +### Deployment + +A Dgraph instance is run as a single process from a single static binary. It +does not require any additional dependencies or separate services in order to +run (see the [Supplementary Services](./#supplementary-services) section for +third-party services that work alongside Dgraph). A Dgraph cluster is set up by +running multiple Dgraph processes networked together. + +### Backup Policy + +A backup policy is a predefined, set schedule used to schedule backups of +information from business applications. A backup policy helps to ensure data +recoverability in the event of accidental data deletion, data corruption, or a +system outage. + +For Dgraph, backups are created using the +[backups enterprise feature](.//enterprise-features/binary-backups). You can +also create full exports of your data and schema using +[data exports](./dgraph-administration.md#exporting-database) available as an +open source feature. + +We **strongly** recommend that you have a backup policy in place before moving +your application to the production phase, and we also suggest that you have a +backup policy even for pre-production apps supported by Dgraph database +instances running in development, staging, QA or pre-production clusters. + +We suggest that your policy include frequent full and incremental backups. +Accordingly, we suggest the following backup policy for your production apps: + +- [full backup](https://dgraph.io/docs/enterprise-features/binary-backups/#forcing-a-full-backup) + every 24hrs +- incremental backup every 2/4hrs + +### Supplementary Services + +These services are not required for a Dgraph cluster to function but are +recommended for better insight when operating a Dgraph cluster. + +- [Metrics] and [monitoring][] with Prometheus and Grafana. +- [Distributed tracing][] with Jaeger. + +[Metrics]: ./metrics [Monitoring]: ./deploy/monitoring [Distributed tracing]: +./tracing diff --git a/dgraph/reference/deploy/installation/single-host-setup.mdx b/dgraph/reference/deploy/installation/single-host-setup.mdx new file mode 100644 index 00000000..5067e6ac --- /dev/null +++ b/dgraph/reference/deploy/installation/single-host-setup.mdx @@ -0,0 +1,229 @@ +--- +title: Learning Environment +--- + +To learn about Dgraph and the components, you can install and run Dgraph cluster +on a single host using Docker, Docker Compose, or Dgraph command line. + + + + + +Dgraph cluster can be setup running as containers on a single host. + + + To evaluate Dgraph on Windows and macOS use the [standalone Docker + image](./dgraph-overview#to-run-dgraph-using-the-standalone-docker-image). + + +#### Before you begin + +Ensure that you have installed: + +- Docker [Desktop](https://docs.docker.com/desktop/) (required for windows or + mac) +- Docker [Engine](https://docs.docker.com/engine/install/) + +#### Launch a Dgraph standalone cluster using Docker + +1. Select a name `` for you Docker container and create a + directory `` that will hold the Dgraph data on your local + file system. +1. Run a container with the dgraph/standalone image: + ```sh + docker run --name -d -p "8080:8080" -p "9080:9080" -v :/dgraph dgraph/standalone:latest + ``` +1. Optionally launch [Ratel UI](./ratel/overview) using the dgraph/ratel docker + image : + `sh docker run --name ratel -d -p "8000:8000" dgraph/ratel:latest ` + You can now use Ratel UI on your browser at localhost:8000 and connect to + you Dgraph cluster at localhost:8080 + +#### Setup a Dgraph cluster on a single host using Docker + +1. Get the `` of the host using: + ```sh + ip addr # On Arch Linux + ifconfig # On Ubuntu/Mac + ``` +1. Pull the latest Dgraph image using docker: + ```sh + docker pull dgraph/dgraph:latest + ``` +1. Verify that the image is downloaded: + + ```sh + docker images + ``` + +1. Create a `` using: + ```sh + docker network create + ``` +1. Create a directory ``to store data for Dgraph Zero and run the + container: + + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 5080:5080 --network -p 6080:6080 -v ~/:/dgraph dgraph/dgraph:latest dgraph zero --my=:5080 + ``` + +1. Create a directory `` to store for Dgraph Alpha and run the + container: + + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 7080:7080 --network -p 8080:8080 -p 9080:9080 -v ~/:/dgraph dgraph/dgraph:latest dgraph alpha --zero=:5080 --my=:7080 + ``` + +1. Create a directory `` to store for the second Dgraph Alpha and + run the container: + + ```sh + mkdir ~/ # Or any other directory where data should be stored. + + docker run -it -p 7081:7081 --network -p 8081:8081 -p 9081:9081 -v ~/:/dgraph dgraph/dgraph:latest dgraph alpha --zero=:5080 --my=:7081 -o=1 + ``` + To override the default ports for the second Alpha use `-o`. + +1. Connect the Dgraph cluster that are running using https://play.dgraph.io/. + For information about connecting, see [Ratel UI](/ratel/connection). + + + + + +You can run Dgraph directly on a single Linux host. + +#### Before you begin + +Ensure that you have: + +- Installed [Dgraph](./download) on the Linux host. +- Made a note of the `` of the host. + +#### Using Dgraph Command Line + +You can start Dgraph on a single host using the dgraph command line. + +1. Run Dgraph zero + + ```sh + dgraph zero --my=:5080 + ``` + + The `--my` flag is the connection that Dgraph alphas dial to talk to zero. + So, the port `5080` and the IP address must be visible to all the Dgraph + alphas. For all other various flags, run `dgraph zero --help`. + +1. Run two Dgraph alpha nodea: + + ```sh + dgraph alpha --my=:7080 --zero=localhost:5080 + dgraph alpha --my=:7081 --zero=localhost:5080 -o=1 + ``` + + Dgraph alpha nodes use two directories to persist data and + [WAL logs](/consistency-model), and these directories must be different for + each alpha if they are running on the same host. You can use `-p` and `-w` to + change the location of the data and WAL directories.To learn more about other + flags, run `dgraph alpha --help`. + +1. Connect the Dgraph cluster that are running using https://play.dgraph.io/. + For information about connecting, see [Ratel UI](/ratel/connection). + + + + + +You can install Dgraph using the Docker Compose on a system hosted on any of the +cloud provider. + +#### Before you begin + +- Ensure that you have installed Docker + [Compose](https://docs.docker.com/compose/). +- IP address of the system on cloud ``. +- IP address of the local host ``. + +#### Using Docker Compose + +1. Download the Dgraph `docker-compose.yml` file: + + wget https://github.com/dgraph-io/dgraph/raw/main/contrib/config/docker/docker-compose.yml + + By default only the localhost IP 127.0.0.1 is allowed. When you run Dgraph + on Docker, the containers are assigned IPs and those IPs need to be added to + the allowed list. + +1. Add a list of IPs allowed for Dgraph so that you can create the schema. Use + an editor of your choice and add the `` of the local host in + `docker-compose.yml` file: + + ```txt + # This Docker Compose file can be used to quickly boot up Dgraph Zero + # and Alpha in different Docker containers. + # It mounts /tmp/data on the host machine to /dgraph within the + # container. You will need to change /tmp/data to a more appropriate location. + # Run `docker-compose up` to start Dgraph. + version: "3.2" + services: + zero: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 5080:5080 + - 6080:6080 + restart: on-failure + command: dgraph zero --my=zero:5080 + alpha: + image: dgraph/dgraph:latest + volumes: + - /tmp/data:/dgraph + ports: + - 8080:8080 + - 9080:9080 + restart: on-failure + command: dgraph alpha --my=alpha:7080 --zero=zero:5080 --security whitelist= + ratel: + image: dgraph/ratel:latest + ports: + - 8000:8000 + + ``` + +1. Run the `docker-compose` command to start the Dgraph services in the docker + container: + + sudo docker-compose up + + After Dgraph is installed on Docker, you can view the images and the + containers running in Docker for Dgraph. + +1. View the containers running for Dgraph using: + + sudo docker ps -a + + An output similar to the following appears: + + ```bash + CONTAINER ID IMAGE COMMAND CREATED + 4b67157933b6 dgraph/dgraph:latest "dgraph zero --my=ze…" 2 days ago + 3faf9bba3a5b dgraph/ratel:latest "/usr/local/bin/dgra…" 2 days ago + a6b5823b668d dgraph/dgraph:latest "dgraph alpha --my=a…" 2 days ago + ``` + +1. To access the Ratel UI for queries, mutations, and altering schema, open + your web browser and navigate to `http://:8000`. +1. Click **Launch Latest** to access the latest stable release of Ratel UI. +1. In the **Dgraph Server Connection** dialog that set the **Dgraph server + URL** as `http://:8080` +1. Click **Connect** . The connection health appears green. +1. Click **Continue** to query or run mutations. + + + + diff --git a/dgraph/reference/deploy/monitoring.mdx b/dgraph/reference/deploy/monitoring.mdx new file mode 100644 index 00000000..f8b0a1c4 --- /dev/null +++ b/dgraph/reference/deploy/monitoring.mdx @@ -0,0 +1,159 @@ +--- +title: Monitoring +--- + +Dgraph exposes metrics via the `/debug/vars` endpoint in json format and the +`/debug/prometheus_metrics` endpoint in Prometheus's text-based format. Dgraph +doesn't store the metrics and only exposes the value of the metrics at that +instant. You can either poll this endpoint to get the data in your monitoring +systems or install +**[Prometheus](https://prometheus.io/docs/introduction/install/)**. Replace +targets in the below config file with the ip of your Dgraph instances and run +prometheus using the command `prometheus --config.file my_config.yaml`. + +```sh +scrape_configs: + - job_name: "dgraph" + metrics_path: "/debug/prometheus_metrics" + scrape_interval: "2s" + static_configs: + - targets: + - 172.31.9.133:6080 # For Dgraph zero, 6080 is the http endpoint exposing metrics. + - 172.31.15.230:8080 # For Dgraph alpha, 8080 is the http endpoint exposing metrics. + - 172.31.0.170:8080 + - 172.31.8.118:8080 +``` + + + Raw data exported by Prometheus is available via `/debug/prometheus_metrics` + endpoint on Dgraph alphas. + + +Install **[Grafana](http://docs.grafana.org/installation/)** to plot the +metrics. Grafana runs at port 3000 in default settings. Create a prometheus +datasource by following these +**[steps](https://prometheus.io/docs/visualization/grafana/#creating-a-prometheus-data-source)**. +Import +**[grafana_dashboard.json](https://github.com/dgraph-io/benchmarks/blob/master/scripts/grafana_dashboard.json)** +by following this +**[link](http://docs.grafana.org/reference/export_import/#importing-a-dashboard)**. + +## CloudWatch + +Route53's health checks can be leveraged to create standard CloudWatch alarms to +notify on change in the status of the `/health` endpoints of Alpha and Zero. + +Considering that the endpoints to monitor are publicly accessible and you have +the AWS credentials and [awscli](https://aws.amazon.com/cli/) setup, we’ll go +through an example of setting up a simple CloudWatch alarm configured to alert +via email for the Alpha endpoint `alpha.acme.org:8080/health`. Dgraph Zero's +`/health` endpoint can also be monitored in a similar way. + +### Create the Route53 Health Check + +```sh +aws route53 create-health-check \ + --caller-reference $(date "+%Y%m%d%H%M%S") \ + --health-check-config file:///tmp/create-healthcheck.json \ + --query 'HealthCheck.Id' +``` + +The file `/tmp/create-healthcheck.json` would need to have the values for the +parameters required to create the health check as such: + +```sh +{ + "Type": "HTTPS", + "ResourcePath": "/health", + "FullyQualifiedDomainName": "alpha.acme.org", + "Port": 8080, + "RequestInterval": 30, + "FailureThreshold": 3 +} +``` + +The reference for the values one can specify while creating or updating a health +check can be found on the AWS +[documentation](https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/health-checks-creating-values.html). + +The response to the above command would be the ID of the created health check. + +```sh +"29bdeaaa-f5b5-417e-a5ce-7dba1k5f131b" +``` + +Make a note of the health check ID. This will be used to integrate CloudWatch +alarms with the health check. + + + Currently, Route53 metrics are only + (available)[https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/monitoring-health-checks.html] + in the **US East (N. Virginia)** region. The Cloudwatch Alarm (and the SNS + Topic) should therefore be created in `us-east-1`. + + +### [Optional] Creating an SNS Topic + +SNS topics are used to create message delivery channels. If you do not have any +SNS topics configured, one can be created by running the following command: + +```sh +aws sns create-topic --region=us-east-1 --name ops --query 'TopicArn' +``` + +The response to the above command would be as follows: + +```sh +"arn:aws:sns:us-east-1:123456789012:ops" +``` + +Be sure to make a note of the topic ARN. This would be used to configure the +CloudWatch alarm's action parameter. + +Run the following command to subscribe your email to the SNS topic: + +```sh +aws sns subscribe \ + --topic-arn arn:aws:sns:us-east-1:123456789012:ops \ + --protocol email \ + --notification-endpoint ops@acme.org +``` + +The subscription will need to be confirmed through _AWS Notification - +Subscription Confirmation_ sent through email. Once the subscription is +confirmed, CloudWatch can be configured to use the SNS topic to trigger the +alarm notification. + +### Creating a CloudWatch Alarm + +The following command creates a CloudWatch alarm with `--alarm-actions` set to +the ARN of the SNS topic and the `--dimensions` of the alarm set to the health +check ID. + +```sh +aws cloudwatch put-metric-alarm \ + --region=us-east-1 \ + --alarm-name dgraph-alpha \ + --alarm-description "Alarm for when Alpha is down" \ + --metric-name HealthCheckStatus \ + --dimensions "Name=HealthCheckId,Value=29bdeaaa-f5b5-417e-a5ce-7dba1k5f131b" \ + --namespace AWS/Route53 \ + --statistic Minimum \ + --period 60 \ + --threshold 1 \ + --comparison-operator LessThanThreshold \ + --evaluation-periods 1 \ + --treat-missing-data breaching \ + --alarm-actions arn:aws:sns:us-east-1:123456789012:ops +``` + +One can verify the alarm status from the CloudWatch or Route53 consoles. + +#### Internal Endpoints + +If the Alpha endpoint is internal to the VPC network - one would need to create +a Lambda function that would periodically (triggered using CloudWatch Event +Rules) request the `/health` path and create CloudWatch metrics which could then +be used to create the required CloudWatch alarms. The architecture and the +CloudFormation template to achieve the same can be found +[here](https://aws.amazon.com/blogs/networking-and-content-delivery/performing-route-53-health-checks-on-private-resources-in-a-vpc-with-aws-lambda-and-amazon-cloudwatch/). diff --git a/dgraph/reference/deploy/security/index.mdx b/dgraph/reference/deploy/security/index.mdx new file mode 100644 index 00000000..77350687 --- /dev/null +++ b/dgraph/reference/deploy/security/index.mdx @@ -0,0 +1,3 @@ +--- +title: Security +--- diff --git a/dgraph/reference/deploy/security/ports-usage.mdx b/dgraph/reference/deploy/security/ports-usage.mdx new file mode 100644 index 00000000..8d2d9eac --- /dev/null +++ b/dgraph/reference/deploy/security/ports-usage.mdx @@ -0,0 +1,117 @@ +--- +title: Ports Usage +--- + +Dgraph cluster nodes use a range of ports to communicate over gRPC and HTTP. +Choose these ports carefully based on your topology and mode of deployment, as +this will impact the access security rules or firewall configurations required +for each port. + +## Types of ports + +Dgraph Alpha and Dgraph Zero nodes use a variety of gRPC and HTTP ports, as +follows: + +- **gRPC-internal-private**: Used between the cluster nodes for internal + communication and message exchange. Communication using these ports is + TLS-encrypted. +- **gRPC-external-private**: Used by Dgraph Live Loader and Dgraph Bulk loader + to access APIs over gRPC. +- **gRPC-external-public**: Used by Dgraph clients to access APIs in a session + that can persist after a query. +- **HTTP-external-private**: Used for monitoring and administrative tasks. +- **HTTP-external-public:** Used by clients to access APIs over HTTP. + +## Default ports used by different nodes + +| Dgraph Node Type | gRPC-internal-private | gRPC-external-private | gRPC-external-public | HTTP-external-private | HTTP-external-public | +| ---------------- | --------------------- | --------------------- | -------------------- | --------------------- | -------------------- | +| zero | 50801 | 50801 | | 60802 | +| alpha | 7080 | | 9080 | | 8080 | +| ratel | | | | | 8000 | + +1: Dgraph Zero uses port 5080 for internal communication within the +cluster, and to support the [data import](./about_import) tools: Dgraph Live +Loader and Dgraph Bulk Loader. + +2: Dgraph Zero uses port 6080 for +[administrative](./deploy/dgraph-zero) operations. Dgraph clients cannot access +this port. + +Users must modify security rules or open firewall ports depending upon their +underlying network to allow communication between cluster nodes, between the +Dgraph instances, and between Dgraph clients. In general, you should configure +the gRPC and HTTP `external-public` ports for open access by Dgraph clients, and +configure the gRPC-internal ports for open access by the cluster nodes. + +**Ratel UI** accesses Dgraph Alpha on the `HTTP-external-public port` (which +defaults to localhost:8080) and can be configured to talk to a remote Dgraph +cluster. This way you can run Ratel on your local machine and point to a remote +cluster. But, if you are deploying Ratel along with Dgraph cluster, then you may +have to expose port 8000 to the public. + +**Port Offset** To make it easier for users to set up a cluster, Dgraph has +default values for the ports used by Dgraph nodes. To support multiple nodes +running on a single machine or VM, you can set a node to use different ports +using an offset (using the command option `--port_offset`). This command +increments the actual ports used by the node by the offset value provided. You +can also use port offsets when starting multiple Dgraph Zero nodes in a +development environment. + +For example, when a user runs Dgraph Alpha with the `--port_offset 2` setting, +then the Alpha node binds to port 7082 (`gRPC-internal-private`), 8082 +(`HTTP-external-public`) and 9082 (`gRPC-external-public`), respectively. + +**Ratel UI** by default listens on port 8000. You can use the `-port` flag to +configure it to listen on any other port. + +## High Availability (HA) cluster configuration + +In a HA cluster configuration, you should run three or five replicas for the +Zero node, and three or five replicas for the Alpha node. A Dgraph cluster is +divided into Raft groups, where Dgraph Zero is group 0 and each shard of Dgraph +Alpha is a subsequent numbered group (group 1, group 2, etc.). The number of +replicas in each Raft group must be an odd number for the group to have +consensus, which will exist when the majority of nodes in a group are available. + + + If the number of replicas in a Raft group is **2N + 1**, up to **N** nodes can + go offline without any impact on reads or writes. So, if there are five + replicas, three must be online to avoid an impact to reads or writes. + + +### Dgraph Zero + +Run three Dgraph Zero instances, assigning a unique integer ID to each using the +`--raft` superflag's `idx` option, and passing the address of any healthy Dgraph +Zero instance using the `--peer` flag. + +To run three replicas for the Alpha nodes, set `--replicas=3`. Each time a new +Alpha node is added, the Zero node will check the existing groups and assign +them as appropriate. + +### Dgraph Alpha + +You can run as many Dgraph Alpha nodes as you want. You can manually set the +`--raft` superflag's `idx` option, or you can leave that flag empty, and the +Zero node will auto-assign an id to the Alpha node. This id persists in the +write-ahead log, so be careful not to delete it. + +The new Alpha nodes will automatically detect each other by communicating with +Dgraph Zero and establish connections to each other. If you don't have a proxy +or load balancer for the Zero nodes, you can provide a list of Zero node +addresses for Alpha nodes to use at startup with the `--zero` flag. The Alpha +node will try to connect to one of the Zero nodes starting from the first Zero +node address in the list. For example: `--zero=zero1,zero2,zero3` where `zero1` +is the `host:port` of a zero instance. + +Typically, a Zero node would first attempt to replicate a group, by assigning a +new Alpha node to run the same group previously assigned to another. After the +group has been replicated per the `--replicas` flag, Dgraph Zero creates a new +group. + +Over time, the data will be evenly split across all of the groups. So, it's +important to ensure that the number of Alpha nodes is a multiple of the +replication setting. For example, if you set `--replicas=3` in for a Zero node, +and then run three Alpha nodes for no sharding, but 3x replication. Or, if you +run six Alpha nodes, sharding the data into two groups, with 3x replication. diff --git a/dgraph/reference/deploy/security/tls-configuration.mdx b/dgraph/reference/deploy/security/tls-configuration.mdx new file mode 100644 index 00000000..733d7a6d --- /dev/null +++ b/dgraph/reference/deploy/security/tls-configuration.mdx @@ -0,0 +1,424 @@ +--- +title: TLS Configuration +--- + +Connections between Dgraph database and its clients can be secured using TLS. In +addition, Dgraph can now secure gRPC communications between Dgraph Alpha and +Dgraph Zero server nodes using mutual TLS (mTLS). Dgraph can now also secure +communications over the Dgraph Zero `gRPC-external-private` port used by +Dgraph's Live Loader and Bulk Loader clients. To learn more about the HTTP and +gRPC ports used by Dgraph Alpha and Dgraph Zero, see +[Ports Usage](./ports-usage). Password-protected private keys are **not +supported**. + +To further improve TLS security, only TLS v1.2 cypher suites that use 128-bit or +greater RSA or AES encryption are supported. + + + If you're generating encrypted private keys with `openssl`, be sure to specify + the encryption algorithm explicitly (like `-aes256`). This will force + `openssl` to include `DEK-Info` header in private key, which is required to + decrypt the key by Dgraph. When default encryption is used, `openssl` doesn't + write that header and key can't be decrypted. + + +## Dgraph Certificate Management Tool + + + This section refers to the `dgraph cert` command which was introduced in + v1.0.9. For previous releases, see the previous [TLS configuration + documentation](https://github.com/dgraph-io/dgraph/blob/release/v1.0.7/wiki/content/deploy/index.md#tls-configuration). + + +The `dgraph cert` program creates and manages CA-signed certificates and private +keys using a generated Dgraph Root CA. There are three types of certificate/key +pairs: + +1. Root CA certificate/key pair: This is used to sign and verify node and client + certificates. If the root CA certificate is changed then you must regenerate + all certificates, and this certificate must be accessible to the Alpha nodes. +2. Node certificate/key pair: This is shared by the Dgraph Alpha nodes and used + for accepting TLS connections. +3. Client certificate/key pair: This is used by the clients (like live loader + and Ratel) to communicate with Dgraph Alpha server nodes where client + authentication with mTLS is required. + +```sh +# To see the available flags. +$ dgraph cert --help + +# Create Dgraph Root CA, used to sign all other certificates. +$ dgraph cert + +# Create node certificate and private key +$ dgraph cert -n localhost + +# Create client certificate and private key for mTLS (mutual TLS) +$ dgraph cert -c dgraphuser + +# Combine all in one command +$ dgraph cert -n localhost -c dgraphuser + +# List all your certificates and keys +$ dgraph cert ls +``` + +The default location where the _cert_ command stores certificates (and keys) is +`tls` under the Dgraph working directory. The default directory path can be +overridden using the `--dir` option. For example: + +```sh +$ dgraph cert --dir ~/mycerts +``` + +### File naming conventions + +The following file naming conventions are used by Dgraph for proper TLS setup. + +| File name | Description | Use | +| ----------------- | -------------------------- | ------------------------------------------------- | +| ca.crt | Dgraph Root CA certificate | Verify all certificates | +| ca.key | Dgraph CA private key | Validate CA certificate | +| node.crt | Dgraph node certificate | Shared by all nodes for accepting TLS connections | +| node.key | Dgraph node private key | Validate node certificate | +| client._name_.crt | Dgraph client certificate | Authenticate a client _name_ | +| client._name_.key | Dgraph client private key | Validate _name_ client certificate | + +For client authentication, each client must have their own certificate and key. +These are then used to connect to the Dgraph server nodes. + +The node certificate `node.crt` can support multiple node names using multiple +host names and/or IP address. Just separate the names with commas when +generating the certificate. + +```sh +$ dgraph cert -n localhost,104.25.165.23,dgraph.io,2400:cb00:2048:1::6819:a417 +``` + + + You must delete the old node cert and key before you can generate a new pair. + + + + When using host names for node certificates, including _localhost_, your + clients must connect to the matching host name -- such as _localhost_ not + 127.0.0.1. If you need to use IP addresses, then add them to the node + certificate. + + +### Certificate inspection + +The command `dgraph cert ls` lists all certificates and keys in the `--dir` +directory (default `dgraph-tls`), along with details to inspect and validate +cert/key pairs. + +Example of command output: + +```sh +-rw-r--r-- ca.crt - Dgraph Root CA certificate + Issuer: Dgraph Labs, Inc. + S/N: 043c4d8fdd347f06 + Expiration: 02 Apr 29 16:56 UTC +SHA-256 Digest: 4A2B0F0F 716BF5B6 C603E01A 6229D681 0B2AFDC5 CADF5A0D 17D59299 116119E5 + +-r-------- ca.key - Dgraph Root CA key +SHA-256 Digest: 4A2B0F0F 716BF5B6 C603E01A 6229D681 0B2AFDC5 CADF5A0D 17D59299 116119E5 + +-rw-r--r-- client.admin.crt - Dgraph client certificate: admin + Issuer: Dgraph Labs, Inc. + CA Verify: PASSED + S/N: 297e4cb4f97c71f9 + Expiration: 03 Apr 24 17:29 UTC +SHA-256 Digest: D23EFB61 DE03C735 EB07B318 DB70D471 D3FE8556 B15D084C 62675857 788DF26C + +-rw------- client.admin.key - Dgraph Client key +SHA-256 Digest: D23EFB61 DE03C735 EB07B318 DB70D471 D3FE8556 B15D084C 62675857 788DF26C + +-rw-r--r-- node.crt - Dgraph Node certificate + Issuer: Dgraph Labs, Inc. + CA Verify: PASSED + S/N: 795ff0e0146fdb2d + Expiration: 03 Apr 24 17:00 UTC + Hosts: 104.25.165.23, 2400:cb00:2048:1::6819:a417, localhost, dgraph.io +SHA-256 Digest: 7E243ED5 3286AE71 B9B4E26C 5B2293DA D3E7F336 1B1AFFA7 885E8767 B1A84D28 + +-rw------- node.key - Dgraph Node key +SHA-256 Digest: 7E243ED5 3286AE71 B9B4E26C 5B2293DA D3E7F336 1B1AFFA7 885E8767 B1A84D28 +``` + +Important points: + +- The cert/key pairs should always have matching SHA-256 digests. Otherwise, the + cert(s) must be regenerated. If the Root CA pair differ, all cert/key must be + regenerated; the flag `--force` can help. +- All certificates must pass Dgraph CA verification. +- All key files should have the least access permissions, especially the + `ca.key`, but be readable. +- Key files won't be overwritten if they have limited access, even with + `--force`. +- Node certificates are only valid for the hosts listed. +- Client certificates are only valid for the named client/user. + +## TLS options + +Starting in release v21.03, pre-existing TLS configuration options have been +replaced by the `--tls` [superflag](./deploy/cli-command-reference) and its +options. The following `--tls` configuration options are available for Dgraph +Alpha and Dgraph Zero nodes: + +- `ca-cert ` - Path and filename of the Dgraph Root CA (for example, + `ca.crt`) +- `server-cert ` - Path and filename of the node certificate (for example, + `node.crt`) +- `server-key ` - Path and filename of the node certificate private key + (for example, `node.key`) +- `use-system-ca` - Include System CA with Dgraph Root CA. +- `client-auth-type ` - TLS client authentication used to validate + client connections from external ports. To learn more, see + [Client Authentication Options](#client-authentication-options). + + + Dgraph now allows you to specify the path and filename of the CA root + certificate, the node certificate, and the node certificate private key. So, + these files do not need to have specific filenames or exist in the same + directory, as in previous Dgraph versions that used the `--tls_dir` flag. + + +You can configure Dgraph Live Loader with the following `--tls` options: + +- `ca-cert ` - Dgraph root CA, such as `./tls/ca.crt` +- `use-system-ca` - Include System CA with Dgraph Root CA. +- `client-cert` - User cert file provided by the client to Alpha +- `client-key` - User private key file provided by the client to Alpha +- `server-name ` - Server name, used for validating the server's TLS + host name. + +### Using TLS with only external ports encrypted + +To encrypt communication between Dgraph server nodes and clients over external +ports, you can configure certificates and run Dgraph Alpha and Dgraph Zero using +the following commands: + +Dgraph Alpha: + +```sh +# First, create the root CA, Alpha node certificate and private keys, if not already created. +# Note that you must specify in node.crt the host name or IP addresses that clients use connect: +$ dgraph cert -n localhost,104.25.165.23,104.25.165.25,104.25.165.27 +# Set up Dgraph Alpha nodes using the following default command (after generating certificates and private keys) +$ dgraph alpha --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key" +``` + +Dgraph Zero: + +```sh +# First, copy the root CA, node certificates and private keys used to set up Dgraph Alpha (above) to the Dgraph Zero node. +# Optionally, you can generate and use a separate Zero node certificate, where you specify the host name or IP addresses used by Live Loader and Bulk Loader to connect to Dgraph Zero. +# Next, set up Dgraph Zero nodes using the following default command: +$ dgraph zero --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key" +``` + +You can then run Dgraph Live Loader on a Dgraph Alpha node using the following +command: + +```sh +# Now, connect to server using TLS +$ dgraph live --tls "ca-cert=./dgraph-tls/ca.crt; server-name=localhost" -s 21million.schema -f 21million.rdf.gz +``` + +### Using TLS with internal and external ports encrypted + +If you require client authentication (mutual TLS, or mTLS), you can configure +certificates and run Dgraph Alpha and Dgraph Zero with settings that encrypt +both internal ports (those used within the cluster) as well as external ports +(those used by clients that connect to the cluster, including Bulk Loader and +Live Loader). + +The following example shows how to encrypt both internal and external ports: + +Dgraph Alpha: + +```sh +# First create the root CA, node certificates and private keys, if not already created. +# Note that you must specify the host name or IP address for other nodes that will share node.crt. +$ dgraph cert -n localhost,104.25.165.23,104.25.165.25,104.25.165.27 +# Set up Dgraph Alpha nodes using the following default command (after generating certificates and private keys) +$ dgraph alpha + --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key; +internal-port=true; client-cert=/dgraph-tls/client.alpha1.crt; client-key=/dgraph-tls/client.alpha1.key" +``` + +Dgraph Zero: + +```sh +# First, copy the certificates and private keys used to set up Dgraph Alpha (above) to the Dgraph Zero node. +# Next, set up Dgraph Zero nodes using the following default command: +$ dgraph zero + --tls "ca-cert=/dgraph-tls/ca.crt; server-cert=/dgraph-tls/node.crt; server-key=/dgraph-tls/node.key; internal-port=true; client-cert=/dgraph-tls/client.zero1.crt; client-key=/dgraph-tls/client.zero1.key" +``` + +You can then run Dgraph Live Loader using the following: + +```sh +# Now, connect to server using mTLS (mutual TLS) +$ dgraph live + --tls "ca-cert=./tls/ca.crt; client-cert=./tls/client.dgraphuser.crt; client-key=./tls/client.dgraphuser.key; server-name=localhost; internal-port=true" \ + -s 21million.schema \ + -f 21million.rdf.gz +``` + +### Client Authentication Options + +The server will always **request** client authentication. There are four +different values for the `client-auth-type` option that change the security +policy of the client certificate. + +| Value | Client Cert/Key | Client Certificate Verified | +| ------------------ | --------------- | -------------------------------------------------------------- | +| `REQUEST` | optional | Client certificate is not VERIFIED if provided. (least secure) | +| `REQUIREANY` | required | Client certificate is never VERIFIED | +| `VERIFYIFGIVEN` | optional | Client certificate is VERIFIED if provided (default) | +| `REQUIREANDVERIFY` | required | Client certificate is always VERIFIED (most secure) | + +`REQUIREANDVERIFY` is the most secure but also the most difficult to configure +for clients. When using this value, the value of `server-name` is matched +against the certificate SANs values and the connection host. + + + If mTLS is enabled using `internal-port=true`, internal ports (by default, + 5080 and 7080) use the `REQUIREANDVERIFY` setting. Unless otherwise + configured, external ports (by default, 9080, 8080 and 6080) use the + `VERIFYIFGIVEN` setting. Changing the `client-auth-type` option to another + setting only affects client authentication on external ports. + + +## Using Ratel UI with Client authentication + +Ratel UI (and any other JavaScript clients built on top of `dgraph-js-http`) +connect to Dgraph servers via HTTP, when TLS is enabled servers begin to expect +HTTPS requests only. + +If you haven't already created the CA certificate and the node certificate for +alpha servers from the earlier instructions (see +[Dgraph Certificate Management Tool](#dgraph-certificate-management-tool)), the +first step would be to generate these certificates, it can be done by the +following command: + +```sh +# Create rootCA and node certificates/keys +$ dgraph cert -n localhost +``` + +If Dgraph Alpha's `client-auth-type` option is set to `REQUEST` or +`VERIFYIFGIVEN` (default), then client certificate is not mandatory. The steps +after generating CA/node certificate are as follows: + +### Step 1. Install Dgraph Root CA into System CA + +##### Linux (Debian/Ubuntu) + +```sh +# Copy the generated CA to the ca-certificates directory +$ cp /path/to/ca.crt /usr/local/share/ca-certificates/ca.crt +# Update the CA store +$ sudo update-ca-certificates` +``` + +### Step 2. Install Dgraph Root CA into Web Browsers Trusted CA List + +##### Firefox + +- Choose Preferences -> Privacy & Security -> View Certificates -> Authorities +- Click on Import and import the `ca.crt` + +##### Chrome + +- Choose Settings -> Privacy and Security -> Security -> Manage Certificates -> + Authorities +- Click on Import and import the `ca.crt` + +### Step 3. Point Ratel to the `https://` endpoint of alpha server. + +- Change the Dgraph Alpha server address to `https://` instead of `http://`, for + example `https://localhost:8080`. + +For `REQUIREANY` and `REQUIREANDVERIFY` as `client-auth-type` option, you need +to follow the steps above and you also need to install client certificate on +your browser: + +1. Generate a client certificate: `dgraph cert -c laptopuser`. +2. Convert it to a `.p12` file: + + ```sh + openssl pkcs12 -export \ + -out laptopuser.p12 \ + -in tls/client.laptopuser.crt \ + -inkey tls/client.laptopuser.key + ``` + + Use any password you like for export, it is used to encrypt the p12 file. + +3. Import the client certificate to your browser. It can be done in Chrome as + follows: + - Choose Settings -> Privacy and Security -> Security -> Manage Certificates + -> Your Certificates + - Click on Import and import the `laptopuser.p12`. + + + Mutual TLS may not work in Firefox because Firefox is unable to send + privately-signed client certificates, this issue is filed + [here](https://bugzilla.mozilla.org/show_bug.cgi?id=1662607). + + +Next time you use Ratel to connect to an alpha with Client authentication +enabled the browser will prompt you for a client certificate to use. Select the +client's certificate you've imported in the step above and queries/mutations +will succeed. + +## Using Curl with Client authentication + +When TLS is enabled, `curl` requests to Dgraph will need some specific options +to work. For instance (for changing draining mode): + +``` +curl --silent https://localhost:8080/admin/draining +``` + +If you are using `curl` with +[Client Authentication](#client-authentication-options) set to `REQUIREANY` or +`REQUIREANDVERIFY`, you will need to provide the client certificate and private +key. For instance (for an export request): + +``` +curl --silent --cacert ./tls/ca.crt --cert ./tls/client.dgraphuser.crt --key ./tls/client.dgraphuser.key https://localhost:8080/admin/draining +``` + +Refer to the `curl` documentation for further information on its TLS options. + +## Access Data Using a Client + +Some examples of connecting via a [Client](/clients) when TLS is in use can be +found below: + +- [dgraph4j](https://github.com/dgraph-io/dgraph4j#creating-a-secure-client-using-tls) +- [dgraph-js](https://github.com/dgraph-io/dgraph-js/tree/master/examples/tls) +- [dgo](https://github.com/dgraph-io/dgraph/blob/main/tlstest/acl/acl_over_tls_test.go) +- [pydgraph](https://github.com/dgraph-io/pydgraph/tree/master/examples/tls) + +## Troubleshooting Ratel's Client authentication + +If you are getting errors in Ratel when TLS is enabled, try opening your Dgraph +Alpha URL as a web page. + +Assuming you are running Dgraph on your local machine, opening +`https://localhost:8080/` in the browser should produce a message +`Dgraph browser is available for running separately using the dgraph-ratel binary`. + +In case you are getting a connection error, try not passing the +`client-auth-type` flag when starting an alpha. If you are still getting an +error, check that your hostname is correct and the port is open; then make sure +that "Dgraph Root CA" certificate is installed and trusted correctly. + +After that, if things work without passing `client-auth-type` but stop working +when `REQUIREANY` and `REQUIREANDVERIFY` are set, make sure the `.p12` file is +installed correctly. diff --git a/dgraph/reference/deploy/troubleshooting.mdx b/dgraph/reference/deploy/troubleshooting.mdx new file mode 100644 index 00000000..8407d0b2 --- /dev/null +++ b/dgraph/reference/deploy/troubleshooting.mdx @@ -0,0 +1,61 @@ +--- +title: Troubleshooting +--- + +This page provides tips on how to troubleshoot issues with running Dgraph. + +### Running out of memory (OOM) + +When you [bulk load](./bulk-loader) or +[backup](.//enterprise-features/binary-backups) your data, Dgraph can consume +more memory than usual due to a high volume of writes. This can cause OOM +crashes. + +You can take the following steps to help avoid OOM crashes: + +- **Increase the amount of memory available**: If you run Dgraph with + insufficient memory, that can result in OOM crashes. The recommended minimum + RAM to run Dgraph on desktops and laptops (single-host deployment) is 16GB. + For servers in a cluster deployment, the recommended minimum is 8GB per + server. This applies to EC2 and GCE instances, as well as on-premises servers. +- **Reduce the number of Go routines**: You can troubleshoot OOM issues by + reducing the number of Go routines (`goroutines`) used by Dgraph from the + default value of eight. For example, you can reduce the `goroutines` that + Dgraph uses to four by calling the `dgraph alpha` command with the following + option: + + `--badger "goroutines=4"` + +### "Too many open files" errors + +If Dgraph logs "too many open files" errors, you should increase the per-process +open file descriptor limit to permit more open files. During normal operations, +Dgraph must be able to open many files. Your operating system may have an open +file descriptor limit with a low default value that isn't adequate for a +database like Dgraph. If so, you might need to increase this limit. + +On Linux and Mac, you can get file descriptor limit settings with the `ulimit` +command, as follows: + +- Get hard limit: `ulimit -n -H` +- Get soft limit: `ulimit -n -S` + +A soft limit of `1048576` open files is the recommended minimum to use Dgraph in +production, but you can try increasing this soft limit if you continue to see +this error. To learn more, see the `ulimit` documentation for your operating +system. + + + Depending on your OS, your shell session limits might not be the same as the + Dgraph process limits. + + +For example, to properly set up the `ulimit` values on Ubuntu 20.04 systems: + +```sh +sudo sed -i 's/#DefaultLimitNOFILE=/DefaultLimitNOFILE=1048576/' /etc/systemd/system.conf +sudo sed -i 's/#DefaultLimitNOFILE=/DefaultLimitNOFILE=1048576/' /etc/systemd/user.conf +``` + +This affects the base limits for all processes. After a reboot, your OS will +pick up the new values. diff --git a/dgraph/reference/design-concepts/acl-concept.mdx b/dgraph/reference/design-concepts/acl-concept.mdx new file mode 100644 index 00000000..b66a6b15 --- /dev/null +++ b/dgraph/reference/design-concepts/acl-concept.mdx @@ -0,0 +1,20 @@ +--- +title: ACLs +--- + +ACLs are a typical mechanism to list who can access what, specifying either +users or roles and what they can access. ACLs help determine who is "authorized" +to access what. + +Dgraph Access Control Lists (ACLs) are sets of permissions for which +`Relationships` a user may access. Recall that Dgraph is "predicate based" so +all data is stored in and is implicit in relationships. This allows +relationship-based controls to be very powerful in restricting a graph based on +roles (RBAC). + +Note that the Dgraph multi-tenancy feature relies on ACLs to ensure each tenant +can only see their own data in one server. + +Using ACLs requires a client to authenticate (log in) differently and specify +credentials that will drive which relationships are visible in their view of the +graph database. diff --git a/dgraph/reference/design-concepts/badger-concept.mdx b/dgraph/reference/design-concepts/badger-concept.mdx new file mode 100644 index 00000000..b863f6ae --- /dev/null +++ b/dgraph/reference/design-concepts/badger-concept.mdx @@ -0,0 +1,15 @@ +--- +title: Badger +--- + +[Badger](https://github.com/dgraph-io/badger) is a key-value store developed and +maintained by Dgraph. It is also open source, and it is the backing store for +Dgraph data. + +It is largely transparent to users that Dgraph uses Badger to store data +internally. Badger is packaged into the Dgraph binary, and is the persistence +layer. However, various configuration settings and log messages may reference +Badger, such as cache sizes. + +Badger values are `Posting Lists` and indexes. Badger Keys are formed by +concatenating `+`. diff --git a/dgraph/reference/design-concepts/clients-concept.mdx b/dgraph/reference/design-concepts/clients-concept.mdx new file mode 100644 index 00000000..e5ec6313 --- /dev/null +++ b/dgraph/reference/design-concepts/clients-concept.mdx @@ -0,0 +1,26 @@ +--- +title: Dgraph Clients +--- + +A client is a program that calls dgraph. Broadly, there are stand alone clients +such as Ratel, which is a graphical web-based application, and programmatic +client libraries which are embedded in larger programs to efficiently and +idomatically call Dgraph. + +GraphQL is an open standard with many clients (graphical and libraries) also, +and GraphQL clients work with Dgraph. + +Dgraph provides [client libraries](./clients) for many languages. These clients +send DQL queries, and perform useful functions such as logging in, in idomatic +ways in each language. + +Note that Dgraph does not force or insist on any particular GraphQL client. Any +GraphQL client, GUI, tool, or library will work well with Dgraph, and it is the +users' choice which to choose. Dgraph only provides clients for the proprietary +DQL query language. GraphQL clients are available for free from many +organizations. + +However, Dgraph's cloud console does support basic GraphQL querying, so this is +something of a tool. We recommend using a mature GraphQL console instead, as +they are more mature. Dgraph's GraphQL GUI function is for quick start and +convenience. diff --git a/dgraph/reference/design-concepts/consistency-model.mdx b/dgraph/reference/design-concepts/consistency-model.mdx new file mode 100644 index 00000000..b18013b0 --- /dev/null +++ b/dgraph/reference/design-concepts/consistency-model.mdx @@ -0,0 +1,91 @@ +--- +title: Consistency Model +--- + +### Dgraph supports MVCC, Read Snapshots and Distributed ACID transactions + +Multi-version concurrency control (MVCC) is a technique where many versions of +data are written (but never modified) on disk, so many versions exist. This +helps control concurrency because the database is queried at a particular +"timestamp" for the duration of one query to provide snapshot isolation and +ensure data is consistent for that transaction. (Note that MVCC is losely +related to LSM trees - in LSM parlance, data is "logged" to write-only files, +which are later merged via Log Compaction.) + +Writes are faster with MVCC because data is always written by flushing a larger +in-memory buffer (a memtable) to new, contiguous files (SST files), and newer +data obscures or replaces older data. Consistent updates from each transaction +share a logical commit timestamp (a 64 bit, increasing number loosely correlated +to wall clock time), and all reads occur "at a point in time" meaning any read +accesses a known, stable set of committed data using these same commit +timestamps. New or in-process commits are associated with a later timestamp so +they do not affect running queries at earlier timestamps. This allows pure +queries (reads) to execute without any locks. + +One special set of structures are "memtables" which are also referred to as +being Level 0 of the LSM tree. These are buffers for fast writes, which later +are flushed to on-disk files called SSTs. + +### Dgraph transactions are cluster-wide (not key-only, or any other non-ACID version of transactions) + +Dgraph uses the RAFT protocol to synchronize updates and ensure updates are +durably written to a majority of alpha nodes in a cluster before the transaction +is considered successful. RAFT ensures true, distributed, cluster wide +transactions across multiple nodes, keys, edges, indexes and facets. Dgraph +provides true ACID transactions, and does not impose limitations on what can be +in a transaction: a transaction can involve multiple predicates, multiple nodes, +multiple keys and even multiple shards. + +### Transactions are lockless + +Dgraph transactoins do not use locks, allowing fast, distributed transactions. + +For reads, queries execute at a particular timestamp based on snapshot +isolation, which isolates reads from any concurrent write activity. All reads +access snapshots across the entire cluster, seeing all previously committed +transactions in full, regardless of which alpha node received earlier queries. + +Writes use optimistic lock semantics, where a transaction will be aborted if +another (concurrent) transaction updates exactly the same data (same edge on the +same node) first. This will be reported as an "aborted" transaction to the +caller. + +Dgraph ensures monotonically increasing transaction timestamps to sequence all +updates in the database. This provides serializability: if any transaction Tx1 +commits before Tx2 starts, then Ts_commit(Tx1) < Ts_start(Tx2), and in turn a +read at any point in time can never see Tx1 changes but not Tx2 changes. + +Dgraph also ensures proper read-after-write semantics. Any commit at timestamp +Tc is guaranteed to be seen by a read at timestamp Tr by any client, if Tr >= +Tc. + +### Terminology + +- **Snapshot isolation:** all reads see a consistent view of the database at the + point in time when the read was submitted +- **Oracle:** a logical process that tracks timestamps and which data (keys, + predicates, etc.) has been committed or is being modified. The oracle hands + out timestamps and aborts transactions if another transaction has modified its + data. +- **RAFT:** a well-known consistency algorithm to ensure distributed processes + durably store data +- **Write-Ahead Log:** Also WAL. A fast log of updates on each alpha that + ensures buffered in-memory structures are persisted. +- **Proposal:** A process within the RAFT algorithm to track possible updates + during the consensus process. +- **SST:** Persistent files comprising the LSM tree, together with memtables. +- **Memtable:** An in-memory version of an SST, supporting fast updates. + Memtables are mutable, and SSTs are immutable. +- **Log Compaction:** The process of combining SSTs into newer SSTs while + eliminating obsolte data and reclaiming disk space. +- **Timestamp:** Or point in time. A numeric counter representing the sequential + order of all transactions, and indicating when a transaction became valid and + query-able. +- **Optimistic Lock:** a logical process whereby all transactions execute + without blocking on other transactions, and are aborted if there is a + conflict. Aborted transactions should typically be retried if they occur. +- **Pessimistic Lock:** a process, not used in Dgraph, where all concurrent + transactions mutating the same data except one block and wait for each other + to complete. +- **ACID** An acronym representing attributes of true transactions: Atomic, + Consistent, Isolated, and Durable diff --git a/dgraph/reference/design-concepts/discovery-concept.mdx b/dgraph/reference/design-concepts/discovery-concept.mdx new file mode 100644 index 00000000..b812849c --- /dev/null +++ b/dgraph/reference/design-concepts/discovery-concept.mdx @@ -0,0 +1,9 @@ +--- +title: Discovery +--- + +### New Servers and Discovery + +Dgraph clusters will detect new machines allocated to the +[cluster](./deploy/cluster-setup), establish connections, and transfer data to +the new server based on the group the new machine is in. diff --git a/dgraph/reference/design-concepts/dql-concept.mdx b/dgraph/reference/design-concepts/dql-concept.mdx new file mode 100644 index 00000000..361645b2 --- /dev/null +++ b/dgraph/reference/design-concepts/dql-concept.mdx @@ -0,0 +1,11 @@ +--- +title: DQL +--- + +DQL is the "Dgraph Query Language" and is based on GraphQL. It is neither a +superset nor subset of GraphQL, but is generally more powerful than GraphQL. DQL +coexists nicely with GraphQL so many users perform most access using GraphQL and +only "drop down" into DQL when there is a particular query mechanism needed that +is not supported in the GraphQL spec. E.g. @recurse query operations are only in +DQL. Other customers simply use DQL. DQL supports both queries and mutations, as +well as hybrid "upsert" operations. diff --git a/dgraph/reference/design-concepts/dql-graphql-layering-concept.mdx b/dgraph/reference/design-concepts/dql-graphql-layering-concept.mdx new file mode 100644 index 00000000..2650e617 --- /dev/null +++ b/dgraph/reference/design-concepts/dql-graphql-layering-concept.mdx @@ -0,0 +1,27 @@ +--- +title: DQL and GraphQL +--- + +## Dgraph Schemas + +Dgraph natively supports GraphQL, including `GraphQL Schema`s. GraphQL schemas +"sit on top of" DQL schemas, in the sense that when a GraphQL schema is added to +Dgraph, a corresponding `DQL Schema` is automatically created. + +Refer to [GraphQL-DQL interoperability](./graphql-dql) section for details. + +## Dgraph Queries, Mutations and Upserts + +Similarly, GraphQL mutations are implemented on top of DQL in the sense that a +GraphQL query is converted internally into a DQL query, which is then executed. +This translation is not particularly complex, since DQL is based on GraphQL, +with some syntax changes and some extensions. + +This is generally transaparent to all callers, however users should be aware +that + +1. Anything done in GraphQL can also be done in DQL if needed. Some small + exceptions include the enforcement of non-null constraints and other checks + done before Dgraph transpiles GraphQL to DQL and executes it. +2. Some logging including Request Logging and OpenTrace (Jaeger) tracing may + show DQL converted from the GraphQL. diff --git a/dgraph/reference/design-concepts/facets-concept.mdx b/dgraph/reference/design-concepts/facets-concept.mdx new file mode 100644 index 00000000..e85aba9f --- /dev/null +++ b/dgraph/reference/design-concepts/facets-concept.mdx @@ -0,0 +1,15 @@ +--- +title: Facets +--- + +Dgraph allows a set of properties to be associated with any `Relationship`. E.g. +if there is a "worksFor" relationships between Node "Bob" and Node "Google", +this relationship may have facet values of "since": 2002-05-05 and "position": +"Engineer". + +Facets can always be replaced by adding a new Node representing the relationship +and storing the facet data as attriubutes of the new Node. + +The term "facet" is also common in database and search engine technology, and +indicates a dimension or classification of data. One way to use facets it to +indicate a relationship type. diff --git a/dgraph/reference/design-concepts/graphql-concept.mdx b/dgraph/reference/design-concepts/graphql-concept.mdx new file mode 100644 index 00000000..add2c652 --- /dev/null +++ b/dgraph/reference/design-concepts/graphql-concept.mdx @@ -0,0 +1,17 @@ +--- +title: GraphQL +--- + +`GraphQL` is a query and update standard defined at +[GraphQL.org](https://graphql.org/). `GraphQL` is natively supported by Dgraph, +without requiring additional servers, data mappings or resolvers. Typically, +"resolving" a data field in GraphQL simply corresponds to walking that +relationship in Dgraph. + +Dgraph also auto-generates access functions for any `GraphQL Schema`, allowing +users to get up and running in minutes with Dgraph + a GraphQL schema. The APIs +are auto-generated. + +GraphQL is internally converted to the (similar-but-different) `DQL` query +language before being executed. We can think of GraphQL as "sitting on top" of +DQL. diff --git a/dgraph/reference/design-concepts/group-concept.mdx b/dgraph/reference/design-concepts/group-concept.mdx new file mode 100644 index 00000000..782e6adc --- /dev/null +++ b/dgraph/reference/design-concepts/group-concept.mdx @@ -0,0 +1,39 @@ +--- +title: Group +--- + +A group is a set of 1 or 3 or more servers that work together and have a single +`leader` in the sense defined by the RAFT protocol. + +## Alpha Group + +An Alpha `Group` in Dgraph is a shard of data, and may or may not be +highly-available (HA). An HA group typically has three Dgraph instances (servers +or K8s pods), and a non-HA group is a single instance. Every Alpha instance +belongs to one group, and each group is responsible for serving a particular set +of tablets (relations). In an HA configuration, the three or more instances in a +single group replicate the same data to every instance to ensure redundancy of +data. + +In a sharded Dgraph cluster, tablets are automatically assigned to each group, +and dynamically relocated as sizes change to keep the groups balanced. +Predicates can also be moved manually if desired. + +In a future version, if a tablet gets too big, it will be split among two +groups, but currently data is balanced by moving each tablet to one group only. + +To avoid confusion, remember that you may have many Dgraph alpha instances due +to either sharding, or due to HA configuration. If you have both sharding and +HA, you will have 3\*N groups: + +| config | Non-HA | HA | +| ----------- | ----------------- | ------------------------ | +| Non-sharded | 1 alpha total | 3 alphas total | +| Sharded | 1 alpha per group | 3\*N alphas for N groups | + +## Zero Group + +Group Zero is a lightweight server or group of servers which helps control the +overall cluster. It manages timestamps and UIDs, determines when data should be +rebalanced among shards, and other functions. The servers in this group are +generally called "Zeros." diff --git a/dgraph/reference/design-concepts/index-tokenize-concept.mdx b/dgraph/reference/design-concepts/index-tokenize-concept.mdx new file mode 100644 index 00000000..b4d4ab30 --- /dev/null +++ b/dgraph/reference/design-concepts/index-tokenize-concept.mdx @@ -0,0 +1,27 @@ +--- +title: Index and Tokenizer +--- + +### Indexing + +An index is an optimized data structure, stored on disk and loaded into memory, +that speeds or optimizes query processing. It is created and stored in addition +to the primary data. E.g. a "hasName" property or relation is the primary +storage structure for a graph in Dgraph, but may also have an additional index +structure configured. + +Typically, Dgraph query access is optimized for forward access. When other +access is needed, an index may speed up queries. Indexes are large structures +that hold all values for some Relation (vs `Posting Lists`, which are typically +smaller, per-Node structures). + +### Tokenizers + +Tokenizers are simply small algorithms that create indexed values from some Node +property. E.g. if a Book Node has a Title attribute, and you add a "term" index, +each word (term) in the text will be indexed. The word "Tokenizer" derives its +name from tokenizing operations to create this index type. + +Similary if the Book has a publicationDateTime you can add a day or year index. +The "tokenizer" here extracts the value to be indexed, which may be the day or +hour of the dateTime, or only the year. diff --git a/dgraph/reference/design-concepts/index.mdx b/dgraph/reference/design-concepts/index.mdx new file mode 100644 index 00000000..23044b15 --- /dev/null +++ b/dgraph/reference/design-concepts/index.mdx @@ -0,0 +1,8 @@ +--- +title: Design Concepts +--- + +This section of the documentation covers various concepts that are relevant to +the Dgraph system. + +### In this section diff --git a/dgraph/reference/design-concepts/lambda-concept.mdx b/dgraph/reference/design-concepts/lambda-concept.mdx new file mode 100644 index 00000000..285c8a11 --- /dev/null +++ b/dgraph/reference/design-concepts/lambda-concept.mdx @@ -0,0 +1,8 @@ +--- +title: Lambdas +--- + +Dgraph Lambdas are JavaScript functions that can be used during query or +mutation processing to extend GraphQL or DQL queries and mutations. Lambdas are +not related at all to AWS Lambdas. They are functions that run in an (optional) +node.js server that is included in the Dgraph Cloud offering. diff --git a/dgraph/reference/design-concepts/minimizing-network-calls.mdx b/dgraph/reference/design-concepts/minimizing-network-calls.mdx new file mode 100644 index 00000000..e2fc1b68 --- /dev/null +++ b/dgraph/reference/design-concepts/minimizing-network-calls.mdx @@ -0,0 +1,116 @@ +--- +title: Minimal Network Calls +--- + +### Predicate-based storage and sharding + +Dgraph is unique in its use of predicate-based sharding, which allows complex +and deep distributed queries to run without incurring high network overhead and +associated delays. + +Rather than store and shard by putting different _nodes_ (aka +entities\*) on different servers, Dgraph stores predicates or triples +of the form ` `. The nodes are therefore +implicit in the predicate storage, rather than vice versa. + +This makes querying much different and particularly allows network optimizations +in a distributed database. + +### Example + +To explain how this works, let's use an example query: + +`Find all posts liked by friends of friends of mine over the last year, written by a popular author A.` + +### SQL/NoSQL + +In a distributed SQL database or (non-graph) NoSQL database, this query requires +retrieval of a lot of data. Consider two approaches: + +Approach 1: + +- Find all the friends (~ 338 + [friends](https://www.pewresearch.org/fact-tank/2014/02/03/what-people-like-dislike-about-facebook/)). +- Find all their friends (~ 338 \* 338 = 40,000 people). +- Find all the posts liked by these people over the last year (resulting set in + the millions). +- Intersect these posts with posts authored by person A. + +Approach 2: + +- Find all posts written by popular author A over the last year (possibly + thousands). +- Find all people who liked those posts (easily millions) (call this + `result set 1`). +- Find all your friends. +- Find all their friends (call this `result set 2`). +- Intersect `result set 1` with `result set 2`. + +Both approaches wouild result in a lot of data moving back and forth between +database and application; would be slow to execute, and may require running an +offline job. + +### Dgraph Approach + +This is how it would run in Dgraph: + +Sharding assumptions (which predicates live where): + +- Assume Server X contains the predicate `friends` representing all friend + relations. +- Assume Server Y contains the predicate `posts_liked` representing who likes + each post. +- Assume Server Z contains the predicate `author` representing all who authored + each post. +- Assume Server W contains the predicate `title` representing the uid->string + title property of posts. + +Algorithm: + +- Server X + - If the request was not sent to Server X, route it to Server X where the + friends predicate lives. **(1 RPC)**. + - Seek to my uid within predicate (tablet) `friends` and retrieve a list of my + friends as a list of uids. + - Still on Server X, use the friends predicate again to get friends for all of + those uids, generating a list of my friends of friends. Call this + `result set myFOF`. +- Server Y + - Send result set myFOF to Server Y, which holds the posts_liked predicate + **(1 RPC)**. + - Retrieve all posts liked by my friends-of-friends. Call this + `result set postsMyFOFLiked`. +- Server Z + - Send postsMyFOFLiked result set to Server Z **(1 RPC)**. + - Retrieve all posts authored by A. Call this `result set authoredByA`. + - Still on Server Z, intersect the two sorted lists to get posts that are both + liked and authored by A: `result set postsMyFOFLiked` intersect + `result set authoredByA`. Call this `result set postsMyFOFLikedByA` + - at this point we have done the hard work, but have the uids of the posts, + instead of the post titles. +- Server W + - Send `result set postsMyFOFLikedByA` to Server W which holds the title + predicate **(1 RPC)**. + - Convert uids to names by looking up the title for each uid. + `result set postUidsAndTitles` +- Respond to caller with `result set postUidsAndTitles`. + +## Net Result - predictable distributed graph scaling + +In at most 4 RPCs, we have figured out all the posts liked by friends of +friends, written by popular author X, with titles. Typically, all four +predicates will not live on four different Servers, so this is a worst-case +scenario. Dgraph network activity is limited to the level of query join depth, +rather than increasing arbitrarily according to the number of nodes in the +graph, and how they are broken up across servers. + +There is no way we are aware of that a node-based sharding database can avoid +high network RPC counts during arbitrary queries because "node-hopping" does not +mix well with a graph that is segmented across servers. + +--- + +\* _Throughout this note, we call entities in a graph "nodes" which +is a standard terminology when talking about nodes and predicates. These may be +confused with RAFT or Kubernetes nodes in some contexts, but generally we mean +nodes in a graph_. diff --git a/dgraph/reference/design-concepts/namespace-tenant-concept.mdx b/dgraph/reference/design-concepts/namespace-tenant-concept.mdx new file mode 100644 index 00000000..754b2781 --- /dev/null +++ b/dgraph/reference/design-concepts/namespace-tenant-concept.mdx @@ -0,0 +1,14 @@ +--- +title: Namespace and Tenant +--- + +A Dgraph `Namespace` (aka Tenant) is a logically separate database within a +Dgraph cluster. A Dgraph cluster can host many Namespaces (and this is how the +Dgraph "shared" cloud offering works). Each user must then into their own +namespace using namespace-specific own credentials, and sees only their own +data. Note that this usually requires an extra or specific login. + +There is no mechanism to query in a way that combines data from two namespaces, +which simplifies and enforces security in use cases where this is the +requirement. An API layer or client would have to pull data from multiple +namespaces using different authenticated queries if data needed to be combined. diff --git a/dgraph/reference/design-concepts/network-call-minimization-concept.mdx b/dgraph/reference/design-concepts/network-call-minimization-concept.mdx new file mode 100644 index 00000000..ec8a1bdb --- /dev/null +++ b/dgraph/reference/design-concepts/network-call-minimization-concept.mdx @@ -0,0 +1,11 @@ +--- +title: Network Call Minimization +--- + +Compared to RAM or SSD access, network calls are slow, so Dgraph is built from +the ground up to minimize them. For graph databases which store sub-graphs on +different shards, this is difficult or impossible, but predicate-based +(relationship-based) sharding allows fast distributed query with Dgraph. + +See [How Dgraph Minmizes Network Calls](./minimizing-network-calls) for more +details. diff --git a/dgraph/reference/design-concepts/posting-list-concept.mdx b/dgraph/reference/design-concepts/posting-list-concept.mdx new file mode 100644 index 00000000..66e0d4d7 --- /dev/null +++ b/dgraph/reference/design-concepts/posting-list-concept.mdx @@ -0,0 +1,124 @@ +--- +title: Posting List and Tablet +--- + +Posting lists and tablets are internal storage mechanisms and are generally +hidden from users or developers, but logs, core product code, blog posts and +discussions about Dgraph may use the terms "posting list" and "tablet." + +Posting lists are a form of inverted index. Posting lists correspond closely to +the RDF concept of a graph, where the entire graph is a collection of triples, +` `. In this view, a posting list is a list of all +triples that share a `+` pair. + +(Note that in Dgraph docs, we typically use the term "relationship" rather than +predicate, but here we will refer to predicates explicitly.) + +The posting lists are grouped by predicate into `tablets`. A tablet therefore +has all data for a predicate, for all subject UIDs. + +Tablets are the basis for data shards in Dgraph. In the near future, Dgraph may +split a single tablet into two shards, but currently every data shard is a +single predicate. Every server then hosts and stores a set of tablets. Dgraph +will move or allocate different tablets to different servers to achieve balance +across a sharded cluster. + +### Example + +If we're storing friendship relationships among four people, we may have four +posting lists represented by the four tables below: + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person1 | friend | person2 | +| person1 | friend | person4 | + +  + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person2 | friend | person1 | + +  + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person3 | friend | person2 | +| person3 | friend | person4 | + +  + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person4 | friend | person2 | +| person4 | friend | person1 | +| person4 | friend | person3 | + +  + +The corrsponding posting lists would be something like: + +``` +person1UID+friend->[person2UID, person4UID] +person2UID+friend->[person1UID] +person3UID+friend->[person2UID, person4UID] +person4UID+friend->[person1UID, person2UID, person3UID] +``` + +  + +Similarly, a posting list will also hold all literal value properties for every +node. E.g. consider the names of people in these three tables: + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person1 | name | "James" | +| person1 | name | "Jimmy" | +| person1 | name | "Jim" | + +  + +| Node | Attribute | Value | +| ------- | --------- | ------- | +| person2 | name | "Rajiv" | + +  + +| Node | Attribute | Value | +| ------- | --------- | -------- | +| person3 | name | "Rachel" | + +  The posting lists would look like: + +``` +person1UID+name->["James", "Jimmy", "Jim"] +person2UID+friend->["Rajiv"] +person3UID+friend->["Rachel"] +``` + +  + +Note that person4 has no name attribute specified, so that posting list would +not exist. + +In these examples, two predicates (relations) are defined, and therefore two +tablets will exist. + +The tablet for the `friend` predicate will hold all posting lists for all +"friend" relationships in the entire graph. The tablet for the `name` property +will hold all posting lists for `name` in the graph. + +If other types such as Pets or Cities also have a name property, their data will +be in the same tablet as the Person names. + +### Performance implications + +A key advantage of grouping data into predicate-based shards is that we have all +the data to do one join in one `tablet` on one server/shard. This means, one RPC +to the machine serving that `tablet` will be adequate, as documented in +[How Dgraph Minmizes Network Calls](./minimizing-network-calls). + +Posting lists are the unit of data access and caching in Dgraph. The underlying +key-value store stores and retrieves posting lists as a unit. Queries that +access larger posting lists will use more cache and may incur more disk access +for un-cached posting lists. diff --git a/dgraph/reference/design-concepts/protocol-buffers-concept.mdx b/dgraph/reference/design-concepts/protocol-buffers-concept.mdx new file mode 100644 index 00000000..c348e9ad --- /dev/null +++ b/dgraph/reference/design-concepts/protocol-buffers-concept.mdx @@ -0,0 +1,8 @@ +--- +title: Protocol Buffers +--- + +All data in Dgraph that is stored or transmitted among the Dgraph instances +(servers) is converted into space-optimized byte arrays using +[Protocol Buffers](https://developers.google.com/protocol-buffers/). Protocol +Buffers are a standard, optimized technology to speed up network communications. diff --git a/dgraph/reference/design-concepts/queries-process.mdx b/dgraph/reference/design-concepts/queries-process.mdx new file mode 100644 index 00000000..689b35ba --- /dev/null +++ b/dgraph/reference/design-concepts/queries-process.mdx @@ -0,0 +1,53 @@ +--- +title: Query Process +--- + +To understand how query execution works, look at an example. + +``` +{ + me(func: uid(0x1)) { + rel_A + rel_B { + rel_B1 + rel_B2 + } + rel_C { + rel_C1 + rel_C2 { + rel_C2_1 + } + } + } +} + +``` + +Let's assume we have 3 Alpha instances, and instance id=2 receives this query. +These are the steps: + +- This query specifies the exact UID list (one UID) to start with, so there is + no root query clause. +- Retreive posting lists using keys = `0x1::rel_A`, `0x1::rel_B`, and + `0x1::rel_C`. + - At worst, these predicates could belong to 3 different groups if the DB is + sharded, so this would incur at most 3 network calls. +- The above posting lists would include three lists of UIDs or values. + - The UID results (id1, id2, ..., idn) for `rel_B` are converted into queries + for `id1::rel_B1` `id2::rel_B1`, etc., and for `id1::rel_B2` `id2::rel_B2`, + etc. + - Similarly, results for rel_C will be used to get the next set of UIDs from + posting list keys like `id::rel_C1` and `id::rel_C2`. +- This process continues recursively for `rel_C2_1` as well, and as deep as any + query requires. + +More complex queries may do filtering operations, or intersections and unions of +UIDs, but this recursive walk to execute a number of (often parallel) `Tasks` to +retrieve UIDs characterizes Dgraph querying. + +If the query was run via HTTP interface `/query`, the resulting subgraph then +gets converted into JSON for replying back to the client. If the query was run +via [gRPC](https://www.grpc.io/) interface using the language +[clients](./clients), the subgraph gets converted to +[protocol buffer](https://developers.google.com/protocol-buffers/) format and +similarly returned to the client. diff --git a/dgraph/reference/design-concepts/raft.mdx b/dgraph/reference/design-concepts/raft.mdx new file mode 100644 index 00000000..db984728 --- /dev/null +++ b/dgraph/reference/design-concepts/raft.mdx @@ -0,0 +1,212 @@ +--- +title: RAFT +--- + +Dgraph uses RAFT whenever consensus among a distributed set of servers is +required, such as ensuring that a transaction has been properly committed, or +determining the proper timestamp for a read or write. Each zero or alpha `group` +uses raft to elect leaders. + +This section aims to explain the RAFT consensus algorithm in simple terms. The +idea is to give you just enough to make you understand the basic concepts, +without going into explanations about why it works accurately. For a detailed +explanation of RAFT, please read the original thesis paper by +[Diego Ongaro](https://github.com/ongardie/dissertation). + +## Term + +Each election cycle is considered a **term**, during which there is a single +leader _(just like in a democracy)_. When a new election starts, the term number +is increased. This is straightforward and obvious but is a critical factor for +the accuracy of the algorithm. + +In rare cases, if no leader could be elected within an `ElectionTimeout`, that +term can end without a leader. + +## Server States + +Each server in cluster can be in one of the following three states: + +- Leader +- Follower +- Candidate + +Generally, the servers are in leader or follower state. When the leader crashes +or the communication breaks down, the followers will wait for election timeout +before converting to candidates. The election timeout is randomized. This would +allow one of them to declare candidacy before others. The candidate would vote +for itself and wait for the majority of the cluster to vote for it as well. If a +follower hears from a candidate with a higher term than the current (_dead in +this case_) leader, it would vote for it. The candidate who gets majority votes +wins the election and becomes the leader. + +The leader then tells the rest of the cluster about the result +(Heartbeat [Communication](./#communication)) and the other candidates +then become followers. Again, the cluster goes back into leader-follower model. + +A leader could revert to being a follower without an election, if it finds +another leader in the cluster with a higher [Term](./#term)). This might happen +in rare cases (network partitions). + +## Communication + +There is unidirectional RPC communication, from the leader to all/any followers. +The followers never ping the leader. The leader sends `AppendEntries` messages +to the followers with logs containing state updates. When the leader sends +`AppendEntries` with zero logs (updates), that's considered a + +Heartbeat. The leader sends all followers Heartbeats at +regular intervals. + +If a follower doesn't receive a Heartbeat for `ElectionTimeout` +duration (generally between 150ms to 300ms), the leader may be down, so it +converts it's state to candidate (as mentioned in +[Server States](./#server-states)). It then requests for votes by sending a +`RequestVote` call to other servers. If it gets votes from the majority, the +candidate becomes the leader. On becoming leader, it sends Heartbeats +to all other servers to establish its authority. + +Every communication request contains a term number. If a server receives a +request with a stale term number, it rejects the request. + +## Log Entries + +Dgraph uses LSM Trees, so we call commits or updates "Log Entries." Log Entries +are numbered sequentially and contain a term number. An Entry is considered +**committed** if it has been replicated (and stored) by a majority of the +servers. + +On being notified of the results of a client request (which is often processed +on other servers), the leader does four things to coordinate RAFT consensus +(this is also called Log Replication): + +- Appends and persists to its log. +- Issue `AppendEntries` in parallel to other servers. +- Monitors for the majority to report it is replicated, after which it considers + the entry committed and applies it to the leader's state machine. +- Notifies followers that the entry is committed so that they can apply it to + their state machines. + +A leader never overwrites or deletes its entries. RAFT guarantees that if an +entry is committed, all future leaders will have it. A leader can, however, +force overwrite the followers' logs, so they match leader's logs if necessary. + +## Voting + +Each server persists its current term and vote, so it doesn't end up voting +twice in the same term. On receiving a `RequestVote` RPC, the server denies its +vote if its log is more up-to-date than the candidate. It would also deny a +vote, if a minimum `ElectionTimeout` hasn't passed since the last + +Heartbeat from the leader. Otherwise, it gives a vote and resets its +`ElectionTimeout` timer. + +Up-to-date property of logs is determined as follows: + +- Term number comparison +- Index number or log length comparison + + + To understand the above sections better, you can see this [interactive + visualization](http://thesecretlivesofdata.com/raft). + + +## Cluster membership + +Raft only allows single-server changes, i.e. only one server can be added or +deleted at a time. This is achieved by cluster configuration changes. Cluster +configurations are communicated using special entries in `AppendEntries`. + +The significant difference in how cluster configuration changes are applied +compared to how typical [Log Entries](./#log-entries) are applied is that the +followers don't wait for a commitment confirmation from the leader before +enabling it. + +A server can respond to both `AppendEntries` and `RequestVote`, without checking +current configuration. This mechanism allows new servers to participate without +officially being part of the cluster. Without this feature, things won't work. + +When a new server joins, it won't have any logs, and they need to be streamed. +To ensure cluster availability, Raft allows this server to join the cluster as a +non-voting member. Once it's caught up, voting can be enabled. This also allows +the cluster to remove this server in case it's too slow to catch up, before +giving voting rights _(sort of like getting a green card to allow assimilation +before citizenship is awarded providing voting rights)_. + + + If you want to add a few servers and remove a few servers, do the addition + before the removal. To bootstrap a cluster, start with one server to allow it + to become the leader, and then add servers to the cluster one-by-one. + + +## Snapshots + +One of the ways to do this is snapshotting. As soon as the state machine is +synced to disk, the logs can be discarded. + +Snapshots are taken by default after 10000 Raft entries, with a frequency of 30 +minutes. The frequency indicates the time between two subsequent snapshots. +These numbers can be adjusted using the `--raft` +[superflag](./deploy/cli-command-reference)'s `snapshot-after-entries` and +`snapshot-after-duration` options respectively. Snapshots are created only when +conditions set by both of these options have been met. + +## Clients + +Clients must locate the cluster to interact with it. Various approaches can be +used for discovery. + +A client can randomly pick up any server in the cluster. If the server isn't a +leader, the request should be rejected, and the leader information passed along. +The client can then re-route it's query to the leader. Alternatively, the server +can proxy the client's request to the leader. + +When a client first starts up, it can register itself with the cluster using +`RegisterClient` RPC. This creates a new client id, which is used for all +subsequent RPCs. + +## Linearizable Semantics + +Servers must filter out duplicate requests. They can do this via session +tracking where they use the client id and another request UID set by the client +to avoid reprocessing duplicate requests. RAFT also suggests storing responses +along with the request UIDs to reply back in case it receives a duplicate +request. + +Linearizability requires the results of a read to reflect the latest committed +write. Serializability, on the other hand, allows stale reads. + +## Read-only queries + +To ensure linearizability of read-only queries run via leader, leader must take +these steps: + +- Leader must have at least one committed entry in its term. This would allow + for up-to-dated-ness. _(C'mon! Now that you're in power do something at + least!)_ +- Leader stores it's latest commit index. +- Leader sends Heartbeats to the cluster and waits for ACK from + majority. Now it knows that it's the leader. _(No successful coup. Yup, still + the democratically elected dictator I was before!)_ +- Leader waits for its state machine to advance to readIndex. +- Leader can now run the queries against state machine and reply to clients. + +Read-only queries can also be serviced by followers to reduce the load on the +leader. But this could lead to stale results unless the follower confirms that +its leader is the real leader(network partition). To do so, it would have to +send a query to the leader, and the leader would have to do steps 1-3. Then the +follower can do 4-5. + +Read-only queries would have to be batched up, and then RPCs would have to go to +the leader for each batch, who in turn would have to send further RPCs to the +whole cluster. _(This is not scalable without considerable optimizations to deal +with latency.)_ + +**An alternative approach** would be to have the servers return the index +corresponding to their state machine. The client can then keep track of the +maximum index it has received from replies so far. And pass it along to the +server for the next request. If a server's state machine hasn't reached the +index provided by the client, it will not service the request. This approach +avoids inter-server communication and is a lot more scalable. _(This approach +does not guarantee linearizability, but should converge quickly to the latest +write.)_ diff --git a/dgraph/reference/design-concepts/relationships-concept.mdx b/dgraph/reference/design-concepts/relationships-concept.mdx new file mode 100644 index 00000000..af009e73 --- /dev/null +++ b/dgraph/reference/design-concepts/relationships-concept.mdx @@ -0,0 +1,34 @@ +--- +title: Relationships +--- + +Dgraph stores `relationships` among `nodes` to represent graph structures, and +also stores literal properties of `nodes`. + +This makes it easy for Dgraph to ingest the RDF +[N-Quad](https://www.w3.org/TR/n-quads/) format, where each line represents + +- `Node, RelationName, Node, Label` or +- `Node, RelationName, ValueLiteral, Label` + +The first represents relations among entities (nodes in graph terminology) and +the second represents the relationship of a Node to all it's named attributes. + +Often, the optional `Label` is omitted, and therefore the N-Quad data is also +referred to as "triples." When it is included, it represents which `Tenant` or +`Namespace` the data lives in within Dgraph. + + + Dgraph can automatically generate a reverse relation. If the user wants to run + queries in that direction, they would need to define the [reverse + relationship](./dql-schema.md#reverse-edges) + + +For `Relationships`, the subject and object are represented as 64-bit numeric +UIDs and the relationship name itself links them: +` `. + +For literal attributes of a `Node`, the subject must still (and always) be a +numeric UID, but the Object will be a primitive value. These can be thought of +as ` `, where value is not a 64-bit UID, +and is instead a: string, float, int, dateTime, geopoint, or boolean. diff --git a/dgraph/reference/design-concepts/replication-concept.mdx b/dgraph/reference/design-concepts/replication-concept.mdx new file mode 100644 index 00000000..9610d9fd --- /dev/null +++ b/dgraph/reference/design-concepts/replication-concept.mdx @@ -0,0 +1,15 @@ +--- +title: High Availability Replication +--- + +Each Highly-Available (HA) group will be served by at least 3 instances (or two +if one is temporarily unavailable). In the case of an alpha instance failure, +other alpha instances in the same group still handle the load for data in that +group. In case of a zero instance failure, the remaining two zeros in the zero +group will continue to hand out timestamps and perform other zero functions. + +In addition, Dgraph `Learner Nodes` are alpha instances that hold replicas of +data, but this replication is to support read replicas, often in a different +geography from the master cluster. This replication is implemented the same way +as HA replication, but the learner nodes do not participate in quorum, and do +not take over from failed nodes to provide high availability. diff --git a/dgraph/reference/design-concepts/transaction-mutation-concept.mdx b/dgraph/reference/design-concepts/transaction-mutation-concept.mdx new file mode 100644 index 00000000..f69f4290 --- /dev/null +++ b/dgraph/reference/design-concepts/transaction-mutation-concept.mdx @@ -0,0 +1,12 @@ +--- +title: Transaction and Mutation +--- + +Borrowing from GraphQL, Dgraph calls writes to the database `Mutations`. As +noted elsewhere (MVCC, LSM Trees and Write Ahead Log sections) writes are +written persistently to the Write Ahead Log, and ephemerally to a memtable. + +Data is queried from the combination of persistent SST files and ephemeral +memtable data structures. The mutations therefore always go into the memtables +first (though are also written durably to the WAL). The memtables are the "Level +0" in the LSM Tree, and conceptually sit on top of the immutable SST files. diff --git a/dgraph/reference/design-concepts/transactions-concept.mdx b/dgraph/reference/design-concepts/transactions-concept.mdx new file mode 100644 index 00000000..6597908c --- /dev/null +++ b/dgraph/reference/design-concepts/transactions-concept.mdx @@ -0,0 +1,32 @@ +--- +title: ACID Transactions +--- + +ACID is an acronym for + +- Atomic +- Consistent +- Isolated +- Durable + +If these properties are maintained, there is a guarantee that data updates will +not be lost, corrupted or unpredictable. Broadly, an ACID database safely and +reliably stores data, but other databases have failure modes where data can be +lost or corrupted. + +### ACID in Dgraph + +Dgraph supports distributed ACID transactions through snapshot isolation and the +RAFT consensus protocol. Dgraph is fully transactional, and is tested via Jepsen +tests, which is a gold standard to verify transactional consistency. + +Dgraph ensure snapshot isolation plus realtime safety: if transaction T1 commits +before T2 begins, than the commit timestamp of T1 is strictly less than the +start timestamp of T2. This ensures that the sequence of writes on shared data +by many processes is reflected in database state. + +Snapshot isolation is ensured by maintaining a consistent view of the database +at any (relatively recent) point in time. Every read (query) takes place at the +point-in-time it was submitted, accesses a consistent snapshot that does not +change or include any partial updates due to concurrent writes that are +processing or committing. diff --git a/dgraph/reference/design-concepts/wal-memtable-concept.mdx b/dgraph/reference/design-concepts/wal-memtable-concept.mdx new file mode 100644 index 00000000..155b981c --- /dev/null +++ b/dgraph/reference/design-concepts/wal-memtable-concept.mdx @@ -0,0 +1,14 @@ +--- +title: WAL and Memtable +--- + +Per the RAFT (and MVCC) approach, transactions write data to a `Write-Ahead Log` +(WAL) to ensure it is durably stored. Soon after commit, data is also updated in +the `memtables` which are memory buffers holding recently-updated data. The +`memtables` are mutable, unlike the SST files written to disk which hold most +data. Once full, memtables are flushed to disk and become SST files. See Log +Compaction for more details on this process. + +In the event of a system crash, the persistent data in the Write Ahead Logs is +replayed to rebuild the memtables and restore the full system state from before +the crash. diff --git a/dgraph/reference/design-concepts/workers-concept.mdx b/dgraph/reference/design-concepts/workers-concept.mdx new file mode 100644 index 00000000..890dc36f --- /dev/null +++ b/dgraph/reference/design-concepts/workers-concept.mdx @@ -0,0 +1,13 @@ +--- +title: Workers +--- + +### Workers and Worker Pools + +Dgraph maintains a fixed set of worker processes (much like threads or +goroutines) that retrieve and execute queries in parallel as they are sent over +HTTP or gRPC. Dgraph also parallelizes Tasks within a single query execution, to +maximize parallelism and more fully utilize system resources. Dgraph is written +in the go language, which supports high numbers of parallel goroutines, enabling +this approach without creating large numbers of OS threads which would be +slower. diff --git a/dgraph/reference/dgraph-glossary.mdx b/dgraph/reference/dgraph-glossary.mdx new file mode 100644 index 00000000..74a95080 --- /dev/null +++ b/dgraph/reference/dgraph-glossary.mdx @@ -0,0 +1,154 @@ +--- +title: Dgraph Glossary +description: Dgraph terms +--- + +### Alpha + +A Dgraph cluster consists of [Zero](#zero) and Alpha nodes. Alpha nodes host +relationships (also known as predicates) and indexes. Dgraph scales horizontally +by adding more Alphas. + +### Badger + +Badger is a fast, open-source key-value database written in pure Go that +provides the storage layer for Dgraph. More at +[Badger documentation](https://dgraph.io/docs/badger) + +### DQL + +Dgraph Query Language is Dgraph's proprietary language to insert, update, delete +and query data. It is based on GraphQL, but is more expressive. (See also: +[GraphQL](#GraphQL)) + +### Edge + +In the mental picture of a graph: bubbles connected by lines ; the bubbles are +nodes, the lines are edges. In Dgraph terminology edges are +[relationships](#relationship) i.e an information about the relation between two +nodes. + +### Facet + +A facet represents a property of a [relationship](#relationship). + +### Graph + +A graph is a simple structure that maps relations between objects. In Dgraph +terminology, the objects are [nodes](#node) and the connections between them are +[relationships](#relationship). + +### GraphQL + +[GraphQL](https://graphql.org/) is a declarative language for querying data used +by application developers to get the data they need using GraphQL APIs. GraphQL +is an open standard with a robust ecosystem. Dgraph supports the deployment of a +GraphQL data model (GraphQL schema) and automatically exposes a GraphQL API +endpoint accepting GraphQL queries. + +### gRPC + +[gRPC](https://grpc.io/) is a high performance Remote Procedure Call (RPC) +framework used by Dgraph to interface with clients. Dgraph has official gRPC +clients for go, C#, Java, JavaScript and Python. Applications written in those +language can perform mutations and queries inside transactions using Dgraph +clients. + +### Lambda + +A Lambda Resolver (Lambda for short) is a GraphQL resolver supported within +Dgraph. A Lambda is a user-defined JavaScript function that performs custom +actions over the GraphQL types, interfaces, queries, and mutations. Dgraph +Lambdas are unrelated to AWS Lambdas. + +### Mutation + +A mutation is a request to modify the database. Mutations include insert, +update, or delete operations. A Mutation can be combined with a query to form an +[Upsert](#upsert). + +### Node + +Conceptually, a node is "a thing" or an object of the business domain. For every +node, Dgraph stores and maintains a universal identifier [UID](#uid), a list of +properties, and the [relationships](#relationship) the node has with other +nodes. + +The term "node" is also used in software architecture to reference a physical +computer or a virtual machine running a module of Dgraph in a cluster. See +[Aplha node](#alpha) and [Zero node](#zero). + +### Predicate + +In [RDF](#RDF) terminology, a predicate is the smallest piece of information +about an object. A predicate can hold a literal value or can describe a relation +to another entity : + +- when we store that an entity name is "Alice". The predicate is `name` and + predicate value is the string "Alice". It becomes a node property. +- when we store that Alice knows Bob, we may use a predicate `knows` with the + node representing Alice. The value of this predicate would be the [uid](#uid) + of the node representing Bob. In that case, `knows` is a + [relationship](#relationship). + +### RATEL + +Ratel is an open source GUI tool for data visualization and cluster management +that’s designed to work with Dgraph and DQL. See also: +[Ratel Overview](./ratel/overview). + +### RDF + +RDF 1.1 is a Semantic Web Standard for data interchange. It allows us to make +statements about resources. The format of these statements is simple and in the +form of `> `. Dgraph supports the RDF format to +create, import and export data. Note that Dgraph also supports the JSON format. + +### Relationship + +A relationship is a named, directed link relating one [node](#node) to another. +It is the Dgraph term similar to [edge](#edge) and [predicate](#predicate). In +Dgraph a relationship may itself have properties representing information about +the relation, such as weight, cost, timeframe, or type. In Dgraph the properties +of a relationship are called [facets](#facet). + +### Sharding + +Sharding is a database architecture pattern to achieve horizontal scale by +distributing data among many servers. Dgraph shards data per relationship, so +all data for one relationship form a single shard, and are stored on one (group +of) servers, an approach referred to as 'predicate-based sharding'. + +### Triple + +Because RDF statements consist of three elements: +` `, they are called triples. A triple represents a +single atomic statement about a node. The object in an RDF triple can be a +literal value or can point to another node. See [DQL RDF Syntax](./dql-rdf) for +more details. + +- when we store that a node name is "Alice". The predicate is `name` and + predicate value is the string "Alice". The string becomes a node property. +- when we store that Alice knows Bob, we may use a predicate `knows` with the + node representing Alice. The value of this predicate would be the [uid](#uid) + of the node representing Bob. In that case, `knows` is a + [relationship](#relationship). + +### UID + +A UID is the Universal Identifier of a node. `uid` is a reserved property +holding the UID value for every node. UIDs can either be generated by Dgraph +when creating nodes, or can be set explicitly. + +### Upsert + +An upsert operation combines a Query with a [Mutation](#mutation). Typically, a +node is searched for, and then depending on if it is found or not, a new node is +created with associated predicates or the exixting node relationships are +updated. Upsert operations are important to implement uniqueness of predicates. + +### Zero + +Dgraph consists of Zero and [Alpha](#alpha) nodes. Zero nodes control the Dgraph +database cluster. It assigns Alpha nodes to groups, re-balances data between +groups, handles transaction timestamp and UID assignment. diff --git a/dgraph/reference/dgraph-overview.md b/dgraph/reference/dgraph-overview.md new file mode 100644 index 00000000..2e74acd2 --- /dev/null +++ b/dgraph/reference/dgraph-overview.md @@ -0,0 +1,206 @@ +--- +title: "Dgraph Database Overview" +description: + "Introduction to Dgraph Database. Dgraph is a horizontally scalable and + distributed graph database that supports GraphQL natively. You can run Dgraph + on-premise, in your cloud infrastructure, or as a service fully-managed by + Dgraph." +--- + +## Dgraph + +Designed from day one to be distributed for scale and speed, **Dgraph** is the +native Graph database with native GraphQL support. It is open-source, scalable, +distributed, highly-available, and lightning fast. + +Dgraph is different from other graph databases in a number of ways, including: + +- **Distributed Scale**:   _Built from day 1 to be distributed, to handle + larger data sets._ + +- **GraphQL Support**:   _GraphQL is built in to make data access simple + and standards-compliant. Unlike most GraphQL solutions, no resolvers are + needed - Dgraph resolves queries automatically through graph navigation._ + +- **Fully Transactional and ACID Compliant**:   _Dgraph satisfies demanding + OLTP workloads that require frequent inserts and updates._ + +- **Language support & Text Search**:   _Full-text searching is included + and strings can be expressed in multiple languages_ + +- **Geo data and geo queries**:   _Dgraph supports points and shapes data + and queries can use near, within, contains, or intersects geo functions_ + +- **True Free Open Source Software (FOSS)**:   _Dgraph is free to use, and + [available on github](https://github.com/dgraph-io/dgraph)._ + +## Dgraph and GraphQL + +In Dgraph, GraphQL is not an afterthought or an add-on; it is core to the +product. GraphQL developers can get started in minutes, and need not concern +themselves with the powerful graph database running in the background. + +The difference with Dgraph is that no resolvers or custom queries are needed. +Simply update a GraphQL schema, and all apis are ready to go. The "resolvers" +are transparently implemented by simply following graph relationships from node +to node and node to field, and with native graph performance. + +For complex queries that the GraphQL specification does not support, Dgraph +provides a query language called "DQL" which is inspired by GraphQL, but +includes more features. With GraphQL simple use cases remain simple, and with +DQL more complex cases become possible. + +{/\* TODO: too long. move this part below to GraphQL page Our GraphQL feature +allows GraphQL users to get started immediately - simply define a schema and +Dgraph automatically builds out CRUD and query APIs. Dgraph works as a +standards-compliant GraphQL server, so many web and app developers may not know +(or care) that Dgraph is a powerful graph database as well. + +As a native GraphQL database, Dgraph doesn’t have a relational database running +in the background, or complex resolvers to map between database and GraphQL +schemas. We often call this "single-schema development." The big time savings is +that there are no GraphQL resolvers or custom queries needed to get started. All +GraphQL fields are "resolved" simply by following our graph database edges to +required fields. With single-schema development, you can change your GraphQL +schema, insert data, and call your new APIs in seconds, not hours. + +If you are a SQL user, check out: + +[Dgraph for SQL Users](https://dgraph.io/learn/courses/datamodel/sql-to-dgraph/overview/introduction/). + +Read more about the motivations for GraphQL and how Facebook still uses it to +provide generic yet efficient data access in +[the original announcement of the spec from Facebook](https://engineering.fb.com/2015/09/14/core-data/graphql-a-data-query-language/). +\*/} + +## The Graph Model - Nodes, Relationships and Values + +Dgraph is fundamentally a +[**property-graph database**](https://www.dataversity.net/what-is-a-property-graph/) +because it stores nodes, relations among those nodes, and associated properties +for any relation. + +{/\* TODO move this to some JSON format page + +**Dgraph JSON input example with a facet:** + + { + "name": "Bob", + "Address": { + "street": "123 Main St." + }, + "Address|since": "2022-02-22" + } + +This JSON structure succinctly represents rich data: + +- **Nodes**: A Person node and Address node are included +- **Relation**: The Person node is related to the Address node via an "Address" + directed relationship +- **Values**: the person's name is "Bob" and the Address street component is + "123 Main St." +- **Facet** metadata: the Address relation is qualified with a property + specifying the Address relationship started on February 20, 2022. \*/} + +Dgraph supports [JSON](./json-mutation-format) data as both a return structure +and an insert/update format. In Dgraph JSON nesting represents relations among +nodes, so `{ "name":"Bob", "homeAddress": { "Street":"123 Main st" } }` +efficiently and intuitively represents a Person node, an Address node, and a +relation (called "homeAddress") between them. + +In addition, Dgraph supports [RDF triples](./dql-rdf) as an input and output +format. + +Dgraph relationships are directed links between nodes, allowing optimized +traversal from node to node. Dgraph allows a bidirectional relation via directed +relationships in both directions if desired. + +## Application developers and data engineers work together seamlessly + +Dgraph allows a particularly smooth interaction among data teams or experts and +data consumers. GraphQL's flexibility empowers data consumers to get exactly the +data they want, in the format they want it, at the speed they need, without +writing custom REST APIs or understanding a new graph query language. + +Database experts can focus on the data, schema and indexes, without maintaining +a sprawling set of REST APIs, views, or optimized queries tailored to each data +consumer or app. + +### Dgraph Cloud Cluster Types + +- **Shared Instance**: Dgraph Cloud with + [shared instances](https://cloud.dgraph.io/pricing?type=shared) is a + fully-managed graph database service that lets you focus on building apps, not + managing infrastructure. This is a fast and easy way to get started with + GraphQL, and does not require any graph database knowledge to start and run. + + Shared instances run in a common database using Dgraph multi-tenancy. Your data is protected but you share resources and will have limited scale. + + Try the [Introduction to GraphQL](https://dgraph.io/tour/graphqlintro/2/) tutorial to define a GraphQL schema, insert and query data in just a few minutes. + +- **Dedicated instances** run on their own dedicated hardware to ensure + consistent performance. This option extends the capabilities of the lower-cost + shared instances to support enterprise, production workloads, and includes a + high availability option. + +## Dgraph Architecture + +Dgraph scales to larger data sizes than other graph databases because it is +designed from the ground up to be distributed. Therefore Dgraph runs as a +cluster of server nodes which communicate to form a single logical data store. +There are two main types of processes (nodes) running: Zeros and Alphas. + +- **Dgraph Zero** server nodes hold metadata for the Dgraph cluster, coordinate + distributed transactions, and re-balance data among server groups. + +- **Dgraph Alpha** server nodes store the graph data and indices. Unlike + non-distributed graph databases, Dgraph alphas store and index "predicates" + which represent the relations among data elements. This unique indexing + approach allows Dgraph to perform a database query with depth N in only N + network hops, making it faster and more scalable for distributed (sharded) + data sets. + ![Dgraph Internal Architecture](/images/overview/dgraph-architecture.png) + +In addition, people use common tools to define schemas, load data, and query the +database: + +- **GraphQL IDEs**: A number of GraphQL IDEs are available to update GraphQL + schemas and run GraphQL updates and queries. + [One of these IDEs is GraphiQL](https://github.com/graphql/graphiql) + +- **Ratel** Ratel is a GUI app from Dgraph that runs DQL queries and mutations, + and allows schema viewing and editing (as well as some cluster management + operations). + +- **Dgraph lambdas**: A Dgraph lambda is a data function written in JavaScript + that can augment results of a query. Lambdas implement database triggers and + custom GraphQL resolvers, and run in an optional node.js server (included in + any cloud deployment). + +### Scale, replication and sharding + +Every cluster will have at least one Dgraph Zero node and one Dgraph Alpha node. +Then databases are expanded in two ways. + +- **High Availability Replication**: For high-availability, Dgraph runs with + three zeros and three alphas instead of one of each. This configuration is + recommended for the scale and reliability required by most production + applications. Having three servers both triples the capacity of the overall + cluster, and also provides redundancy. + +- **Sharding**: When data sizes approach or exceed 1 TB, Dgraph databases are + typically sharded so that full data replicas are not kept on any single alpha + node. With sharding, data is distributed across many nodes (or node groups) to + achieve higher scale. Sharding and high-availability combine when desired to + provide massive scale and ideal reliability. + +- **Self-healing**: In Dgraph's cloud offering, Kubernetes is used to + automatically detect, restart and heal any cluster (HA, sharded, both or + neither) to keep things running smoothly and at full capacity. + +## What's Next + +- **Get Started** with a [free database instance](https://cloud.dgraph.io) +- Get familiar with some terms in our [Glossary](/dgraph-glossary) +- Take the [Dgraph tour](https://dgraph.io/tour/) +- Go through some [tutorials](./learn) diff --git a/dgraph/reference/dgraph-overview.mdx b/dgraph/reference/dgraph-overview.mdx new file mode 100644 index 00000000..9a706b7b --- /dev/null +++ b/dgraph/reference/dgraph-overview.mdx @@ -0,0 +1,206 @@ +--- +title: Dgraph Database Overview +description: + Introduction to Dgraph Database. Dgraph is a horizontally scalable and + distributed graph database that supports GraphQL natively. You can run Dgraph + on-premise, in your cloud infrastructure, or as a service fully-managed by + Dgraph. +--- + +## Dgraph + +Designed from day one to be distributed for scale and speed, **Dgraph** is the +native Graph database with native GraphQL support. It is open-source, scalable, +distributed, highly-available, and lightning fast. + +Dgraph is different from other graph databases in a number of ways, including: + +- **Distributed Scale**:   _Built from day 1 to be distributed, to handle + larger data sets._ + +- **GraphQL Support**:   _GraphQL is built in to make data access simple + and standards-compliant. Unlike most GraphQL solutions, no resolvers are + needed - Dgraph resolves queries automatically through graph navigation._ + +- **Fully Transactional and ACID Compliant**:   _Dgraph satisfies demanding + OLTP workloads that require frequent inserts and updates._ + +- **Language support & Text Search**:   _Full-text searching is included + and strings can be expressed in multiple languages_ + +- **Geo data and geo queries**:   _Dgraph supports points and shapes data + and queries can use near, within, contains, or intersects geo functions_ + +- **True Free Open Source Software (FOSS)**:   _Dgraph is free to use, and + [available on github](https://github.com/dgraph-io/dgraph)._ + +## Dgraph and GraphQL + +In Dgraph, GraphQL is not an afterthought or an add-on; it is core to the +product. GraphQL developers can get started in minutes, and need not concern +themselves with the powerful graph database running in the background. + +The difference with Dgraph is that no resolvers or custom queries are needed. +Simply update a GraphQL schema, and all apis are ready to go. The "resolvers" +are transparently implemented by simply following graph relationships from node +to node and node to field, and with native graph performance. + +For complex queries that the GraphQL specification does not support, Dgraph +provides a query language called "DQL" which is inspired by GraphQL, but +includes more features. With GraphQL simple use cases remain simple, and with +DQL more complex cases become possible. + +{/\* TODO: too long. move this part below to GraphQL page Our GraphQL feature +allows GraphQL users to get started immediately - simply define a schema and +Dgraph automatically builds out CRUD and query APIs. Dgraph works as a +standards-compliant GraphQL server, so many web and app developers may not know +(or care) that Dgraph is a powerful graph database as well. + +As a native GraphQL database, Dgraph doesn’t have a relational database running +in the background, or complex resolvers to map between database and GraphQL +schemas. We often call this "single-schema development." The big time savings is +that there are no GraphQL resolvers or custom queries needed to get started. All +GraphQL fields are "resolved" simply by following our graph database edges to +required fields. With single-schema development, you can change your GraphQL +schema, insert data, and call your new APIs in seconds, not hours. + +If you are a SQL user, check out: + +[Dgraph for SQL Users](https://dgraph.io/learn/courses/datamodel/sql-to-dgraph/overview/introduction/). + +Read more about the motivations for GraphQL and how Facebook still uses it to +provide generic yet efficient data access in +[the original announcement of the spec from Facebook](https://engineering.fb.com/2015/09/14/core-data/graphql-a-data-query-language/). +\*/} + +## The Graph Model - Nodes, Relationships and Values + +Dgraph is fundamentally a +[**property-graph database**](https://www.dataversity.net/what-is-a-property-graph/) +because it stores nodes, relations among those nodes, and associated properties +for any relation. + +{/\* TODO move this to some JSON format page + +**Dgraph JSON input example with a facet:** + + { + "name": "Bob", + "Address": { + "street": "123 Main St." + }, + "Address|since": "2022-02-22" + } + +This JSON structure succinctly represents rich data: + +- **Nodes**: A Person node and Address node are included +- **Relation**: The Person node is related to the Address node via an "Address" + directed relationship +- **Values**: the person's name is "Bob" and the Address street component is + "123 Main St." +- **Facet** metadata: the Address relation is qualified with a property + specifying the Address relationship started on February 20, 2022. \*/} + +Dgraph supports [JSON](./json-mutation-format) data as both a return structure +and an insert/update format. In Dgraph JSON nesting represents relations among +nodes, so `{ "name":"Bob", "homeAddress": { "Street":"123 Main st" } }` +efficiently and intuitively represents a Person node, an Address node, and a +relation (called "homeAddress") between them. + +In addition, Dgraph supports [RDF triples](./dql-rdf) as an input and output +format. + +Dgraph relationships are directed links between nodes, allowing optimized +traversal from node to node. Dgraph allows a bidirectional relation via directed +relationships in both directions if desired. + +## Application developers and data engineers work together seamlessly + +Dgraph allows a particularly smooth interaction among data teams or experts and +data consumers. GraphQL's flexibility empowers data consumers to get exactly the +data they want, in the format they want it, at the speed they need, without +writing custom REST APIs or understanding a new graph query language. + +Database experts can focus on the data, schema and indexes, without maintaining +a sprawling set of REST APIs, views, or optimized queries tailored to each data +consumer or app. + +### Dgraph Cloud Cluster Types + +- **Shared Instance**: Dgraph Cloud with + [shared instances](https://cloud.dgraph.io/pricing?type=shared) is a + fully-managed graph database service that lets you focus on building apps, not + managing infrastructure. This is a fast and easy way to get started with + GraphQL, and does not require any graph database knowledge to start and run. + + Shared instances run in a common database using Dgraph multi-tenancy. Your data is protected but you share resources and will have limited scale. + + Try the [Introduction to GraphQL](https://dgraph.io/tour/graphqlintro/2/) tutorial to define a GraphQL schema, insert and query data in just a few minutes. + +- **Dedicated instances** run on their own dedicated hardware to ensure + consistent performance. This option extends the capabilities of the lower-cost + shared instances to support enterprise, production workloads, and includes a + high availability option. + +## Dgraph Architecture + +Dgraph scales to larger data sizes than other graph databases because it is +designed from the ground up to be distributed. Therefore Dgraph runs as a +cluster of server nodes which communicate to form a single logical data store. +There are two main types of processes (nodes) running: Zeros and Alphas. + +- **Dgraph Zero** server nodes hold metadata for the Dgraph cluster, coordinate + distributed transactions, and re-balance data among server groups. + +- **Dgraph Alpha** server nodes store the graph data and indices. Unlike + non-distributed graph databases, Dgraph alphas store and index "predicates" + which represent the relations among data elements. This unique indexing + approach allows Dgraph to perform a database query with depth N in only N + network hops, making it faster and more scalable for distributed (sharded) + data sets. + ![Dgraph Internal Architecture](/images/overview/dgraph-architecture.png) + +In addition, people use common tools to define schemas, load data, and query the +database: + +- **GraphQL IDEs**: A number of GraphQL IDEs are available to update GraphQL + schemas and run GraphQL updates and queries. + [One of these IDEs is GraphiQL](https://github.com/graphql/graphiql) + +- **Ratel** Ratel is a GUI app from Dgraph that runs DQL queries and mutations, + and allows schema viewing and editing (as well as some cluster management + operations). + +- **Dgraph lambdas**: A Dgraph lambda is a data function written in JavaScript + that can augment results of a query. Lambdas implement database triggers and + custom GraphQL resolvers, and run in an optional node.js server (included in + any cloud deployment). + +### Scale, replication and sharding + +Every cluster will have at least one Dgraph Zero node and one Dgraph Alpha node. +Then databases are expanded in two ways. + +- **High Availability Replication**: For high-availability, Dgraph runs with + three zeros and three alphas instead of one of each. This configuration is + recommended for the scale and reliability required by most production + applications. Having three servers both triples the capacity of the overall + cluster, and also provides redundancy. + +- **Sharding**: When data sizes approach or exceed 1 TB, Dgraph databases are + typically sharded so that full data replicas are not kept on any single alpha + node. With sharding, data is distributed across many nodes (or node groups) to + achieve higher scale. Sharding and high-availability combine when desired to + provide massive scale and ideal reliability. + +- **Self-healing**: In Dgraph's cloud offering, Kubernetes is used to + automatically detect, restart and heal any cluster (HA, sharded, both or + neither) to keep things running smoothly and at full capacity. + +## What's Next + +- **Get Started** with a [free database instance](https://cloud.dgraph.io) +- Get familiar with some terms in our [Glossary](/dgraph-glossary) +- Take the [Dgraph tour](https://dgraph.io/tour/) +- Go through some [tutorials](./learn) diff --git a/dgraph/reference/dql/clients/csharp.mdx b/dgraph/reference/dql/clients/csharp.mdx new file mode 100644 index 00000000..ce2d31a9 --- /dev/null +++ b/dgraph/reference/dql/clients/csharp.mdx @@ -0,0 +1,196 @@ +--- +title: C# +--- + +An implementation for a Dgraph client in C#, using [gRPC](https://grpc.io/). +This client follows the [Dgraph Go client](./go) closely. + + + The official C# client [can be found + here](https://github.com/dgraph-io/dgraph.net). Follow the [install + instructions](https://github.com/dgraph-io/dgraph.net#install) to get it up + and running. + + +## Supported Versions + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph.net#supported-versions). + +## Using a Client + +### Creating a Client + +Make a new client by passing in one or more GRPC channels pointing to alphas. + +```c# +var client = new DgraphClient(new Channel("127.0.0.1:9080", ChannelCredentials.Insecure)); +``` + +### Multi-tenancy + +In [multi-tenancy](./multitenancy) environments, Dgraph provides a new method +`LoginRequest()`, which will allow the users to login to a specific namespace. + +In order to create a Dgraph client, and make the client login into namespace +`123`: + +```c# +var lr = new Api.LoginRequest() { + UserId = "userId", + Password = "password", + Namespace = 123 +} +client.Login(lr) +``` + +In the example above, the client logs into namespace `123` using username +`userId` and password `password`. Once logged in, the client can perform all the +operations allowed to the `userId` user of namespace `123`. + +### Creating a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows: + +```c# +var client = new DgraphClient(SlashChannel.Create("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "api-key-here")); +``` + +### Altering the Database + +To set the schema, pass the schema into the `DgraphClient.Alter` function, as +seen below: + +```c# +var schema = "name: string @index(exact) ."; +var result = client.Alter(new Operation{ Schema = schema }); +``` + +The returned result object is based on the FluentResults library. You can check +the status using `result.isSuccess` or `result.isFailed`. More information on +the result object can be found [here](https://github.com/altmann/FluentResults). + +### Creating a Transaction + +To create a transaction, call `DgraphClient.NewTransaction` method, which +returns a new `Transaction` object. This operation incurs no network overhead. + +It is good practice to call to wrap the `Transaction` in a `using` block, so +that the `Transaction.Dispose` function is called after running the transaction. + +```c# +using(var transaction = client.NewTransaction()) { + ... +} +``` + +You can also create Read-Only transactions. Read-Only transactions only allow +querying, and can be created using `DgraphClient.NewReadOnlyTransaction`. + +### Running a Mutation + +`Transaction.Mutate(RequestBuilder)` runs a mutation. It takes in a json +mutation string. + +We define a person object to represent a person and serialize it to a json +mutation string. In this example, we are using the +[JSON.NET](https://www.newtonsoft.com/json) library, but you can use any JSON +serialization library you prefer. + +```c# +using(var txn = client.NewTransaction()) { + var alice = new Person{ Name = "Alice" }; + var json = JsonConvert.SerializeObject(alice); + + var transactionResult = await txn.Mutate(new RequestBuilder().WithMutations(new MutationBuilder{ SetJson = json })); +} +``` + +You can also set mutations using RDF format, if you so prefer, as seen below: + +```c# +var mutation = "_:alice \"Alice\" ."; +var transactionResult = await txn.Mutate(new RequestBuilder().WithMutations(new MutationBuilder{ SetNquads = mutation })); +``` + +Check out the example in `source/Dgraph.tests.e2e/TransactionTest.cs`. + +### Running a Query + +You can run a query by calling `Transaction.Query(string)`. You will need to +pass in a DQL query string. If you want to pass an additional map of any +variables that you might want to set in the query, call +`Transaction.QueryWithVars(string, Dictionary)` with the +variables dictionary as the second argument. + +The response would contain the response string. + +Let’s run the following query with a variable `$a`: + +```console +query all($a: string) { + all(func: eq(name, $a)) + { + name + } +} +``` + +Run the query, deserialize the result from Uint8Array (or base64) encoded JSON +and print it out: + +```c# +// Run query. +var query = @"query all($a: string) { + all(func: eq(name, $a)) + { + name + } +}"; + +var vars = new Dictionary { { $a: "Alice" } }; +var res = await dgraphClient.NewReadOnlyTransaction().QueryWithVars(query, vars); + +// Print results. +Console.Write(res.Value.Json); +``` + +### Running an Upsert: Query + Mutation + +The `Transaction.Mutate` function allows you to run upserts consisting of one +query and one mutation. + +```c# +var query = @" + query { + user as var(func: eq(email, \"wrong_email@dgraph.io\")) + }"; + +var mutation = new MutationBuilder{ SetNquads = "uid(user) \"correct_email@dgraph.io\" ." }; + +var request = new RequestBuilder{ Query = query, CommitNow = true }.withMutation(mutation); + +// Upsert: If wrong_email found, update the existing data +// or else perform a new mutation. +await txn.Mutate(request); +``` + +### Committing a Transaction + +A transaction can be committed using the `Transaction.Commit` method. If your +transaction consisted solely of calls to `Transaction.Query` or +`Transaction.QueryWithVars`, and no calls to `Transaction.Mutate`, then calling +`Transaction.Commit` is not necessary. + +An error will be returned if other transactions running concurrently modify the +same data that was modified in this transaction. It is up to the user to retry +transactions when they fail. + +```c# +using(var txn = client.NewTransaction()) { + var result = txn.Commit(); +} +``` diff --git a/dgraph/reference/dql/clients/go.mdx b/dgraph/reference/dql/clients/go.mdx new file mode 100644 index 00000000..4c7ace9e --- /dev/null +++ b/dgraph/reference/dql/clients/go.mdx @@ -0,0 +1,526 @@ +--- +title: Go +--- + +[![GoDoc](https://godoc.org/github.com/dgraph-io/dgo?status.svg)](https://godoc.org/github.com/dgraph-io/dgo) + +The Go client communicates with the server on the gRPC port (default value +9080). + +The client can be obtained in the usual way via `go get`: + +```sh + Requires at least Go 1.11 +export GO111MODULE=on +go get -u -v github.com/dgraph-io/dgo/v210 +``` + +The full [GoDoc](https://godoc.org/github.com/dgraph-io/dgo) contains +documentation for the client API along with examples showing how to use it. + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgo#supported-versions). + +## Create the client + +To create a client, dial a connection to Dgraph's external gRPC port (typically +`9080`). The following code snippet shows just one connection. You can connect +to multiple Dgraph Alphas to distribute the workload evenly. + +```go +func newClient() *dgo.Dgraph { + // Dial a gRPC connection. The address to dial to can be configured when + // setting up the dgraph cluster. + d, err := grpc.Dial("localhost:9080", grpc.WithInsecure()) + if err != nil { + log.Fatal(err) + } + + return dgo.NewDgraphClient( + api.NewDgraphClient(d), + ) +} +``` + +The client can be configured to use gRPC compression: + +```go +func newClient() *dgo.Dgraph { + // Dial a gRPC connection. The address to dial to can be configured when + // setting up the dgraph cluster. + dialOpts := append([]grpc.DialOption{}, + grpc.WithInsecure(), + grpc.WithDefaultCallOptions(grpc.UseCompressor(gzip.Name))) + d, err := grpc.Dial("localhost:9080", dialOpts...) + + if err != nil { + log.Fatal(err) + } + + return dgo.NewDgraphClient( + api.NewDgraphClient(d), + ) +} + +``` + +### Multi-tenancy + +In [multi-tenancy](./multitenancy) environments, Dgraph provides a new method +`LoginIntoNamespace()`, which will allow the users to login to a specific +namespace. + +In order to create a dgo client, and make the client login into namespace `123`: + +```go +conn, err := grpc.Dial("127.0.0.1:9080", grpc.WithInsecure()) +if err != nil { + glog.Error("While trying to dial gRPC, got error", err) +} +dc := dgo.NewDgraphClient(api.NewDgraphClient(conn)) +ctx := context.Background() +// Login to namespace 123 +if err := dc.LoginIntoNamespace(ctx, "groot", "password", 123); err != nil { + glog.Error("Failed to login: ",err) +} +``` + +In the example above, the client logs into namespace `123` using username +`groot` and password `password`. Once logged in, the client can perform all the +operations allowed to the `groot` user of namespace `123`. + +### Creating a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows: + +```go +// This example uses dgo +conn, err := dgo.DialCloud("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "") +if err != nil { + log.Fatal(err) +} +defer conn.Close() +dgraphClient := dgo.NewDgraphClient(api.NewDgraphClient(conn)) +``` + + + The `dgo.DialSlashEndpoint()` method has been deprecated and will be removed + in v21.07. Please use `dgo.DialCloud()` instead. + + +## Alter the database + +To set the schema, set it on a `api.Operation` object, and pass it down to the +`Alter` method. + +```go +func setup(c *dgo.Dgraph) { + // Install a schema into dgraph. Accounts have a `name` and a `balance`. + err := c.Alter(context.Background(), &api.Operation{ + Schema: ` + name: string @index(term) . + balance: int . + `, + }) +} +``` + +`api.Operation` contains other fields as well, including drop predicate and drop +all. Drop all is useful if you wish to discard all the data, and start from a +clean slate, without bringing the instance down. + +```go + // Drop all data including schema from the dgraph instance. This is useful + // for small examples such as this, since it puts dgraph into a clean + // state. + err := c.Alter(context.Background(), &api.Operation{DropOp: api.Operation_ALL}) +``` + +The old way to send a drop all operation is still supported but will be +eventually deprecated. It's shown below for reference. + +```go + // Drop all data including schema from the dgraph instance. This is useful + // for small examples such as this, since it puts dgraph into a clean + // state. + err := c.Alter(context.Background(), &api.Operation{DropAll: true}) +``` + +Starting with version 1.1, `api.Operation` also supports a drop data operation. +This operation drops all the data but preserves the schema. This is useful when +the schema is large and needs to be reused, such as in between unit tests. + +```go + // Drop all data including schema from the dgraph instance. This is useful + // for small examples such as this, since it puts dgraph into a clean + // state. + err := c.Alter(context.Background(), &api.Operation{DropOp: api.Operation_DATA}) +``` + +## Create a transaction + +Dgraph supports running distributed ACID transactions. To create a transaction, +just call `c.NewTxn()`. This operation doesn't incur in network calls. +Typically, you'd also want to call a `defer txn.Discard(ctx)` to let it +automatically rollback in case of errors. Calling `Discard` after `Commit` would +be a no-op. + +```go +func runTxn(c *dgo.Dgraph) { + txn := c.NewTxn() + defer txn.Discard(ctx) + ... +} +``` + +### Read-Only Transactions + +Read-only transactions can be created by calling `c.NewReadOnlyTxn()`. Read-only +transactions are useful to increase read speed because they can circumvent the +usual consensus protocol. Read-only transactions cannot contain mutations and +trying to call `txn.Commit()` will result in an error. Calling `txn.Discard()` +will be a no-op. + +Read-only queries can optionally be set as best-effort. Using this flag will ask +the Dgraph Alpha to try to get timestamps from memory on a best-effort basis to +reduce the number of outbound requests to Zero. This may yield improved +latencies in read-bound workloads where linearizable reads are not strictly +needed. + +## Run a query + +You can run a query by calling `txn.Query`. The response would contain a `JSON` +field, which has the JSON encoded result. You can unmarshal it into Go struct +via `json.Unmarshal`. + +```go + // Query the balance for Alice and Bob. + const q = ` + { + all(func: anyofterms(name, "Alice Bob")) { + uid + balance + } + } + ` + resp, err := txn.Query(context.Background(), q) + if err != nil { + log.Fatal(err) + } + + // After we get the balances, we have to decode them into structs so that + // we can manipulate the data. + var decode struct { + All []struct { + Uid string + Balance int + } + } + if err := json.Unmarshal(resp.GetJson(), &decode); err != nil { + log.Fatal(err) + } +``` + +## Query with RDF response + +You can get query result as a RDF response by calling `txn.QueryRDF`. The +response would contain a `Rdf` field, which has the RDF encoded result. + + + If you are querying only for `uid` values, use a JSON format response. + + +```go + // Query the balance for Alice and Bob. + const q = ` + { + all(func: anyofterms(name, "Alice Bob")) { + name + balance + } + } + ` + resp, err := txn.QueryRDF(context.Background(), q) + if err != nil { + log.Fatal(err) + } + + // <0x17> "Alice" . + // <0x17> 100 . + fmt.Println(resp.Rdf) +``` + +## Run a mutation + +`txn.Mutate` would run the mutation. It takes in a `api.Mutation` object, which +provides two main ways to set data: JSON and RDF N-Quad. You can choose +whichever way is convenient. + +To use JSON, use the fields SetJson and DeleteJson, which accept a string +representing the nodes to be added or removed respectively (either as a JSON map +or a list). To use RDF, use the fields SetNquads and DeleteNquads, which accept +a string representing the valid RDF triples (one per line) to added or removed +respectively. This protobuf object also contains the Set and Del fields which +accept a list of RDF triples that have already been parsed into our internal +format. As such, these fields are mainly used internally and users should use +the SetNquads and DeleteNquads instead if they are planning on using RDF. + +We're going to continue using JSON. You could modify the Go structs parsed from +the query, and marshal them back into JSON. + +```go + // Move $5 between the two accounts. + decode.All[0].Bal += 5 + decode.All[1].Bal -= 5 + + out, err := json.Marshal(decode.All) + if err != nil { + log.Fatal(err) + } + + _, err := txn.Mutate(context.Background(), &api.Mutation{SetJson: out}) +``` + +Sometimes, you only want to commit mutation, without querying anything further. +In such cases, you can use a `CommitNow` field in `api.Mutation` to indicate +that the mutation must be immediately committed. + +## Commit the transaction + +Once all the queries and mutations are done, you can commit the transaction. It +returns an error in case the transaction could not be committed. + +```go + // Finally, we can commit the transactions. An error will be returned if + // other transactions running concurrently modify the same data that was + // modified in this transaction. It is up to the library user to retry + // transactions when they fail. + + err := txn.Commit(context.Background()) +``` + +## Complete Example + +This is an example from the [GoDoc](https://godoc.org/github.com/dgraph-io/dgo). +It shows how to to create a `Node` with name `Alice`, while also creating her +relationships with other nodes. + + + `loc` predicate is of type `geo` and can be easily marshaled and unmarshaled + into a Go struct. More such examples are present as part of the GoDoc. + + + + You can also download this complete example file from our [GitHub + repository](https://github.com/dgraph-io/dgo/blob/master/example_set_object_test.go). + + +```go +package dgo_test + +import ( + "context" + "encoding/json" + "fmt" + "log" + "time" + + "github.com/dgraph-io/dgo/v200/protos/api" +) + +type School struct { + Name string `json:"name,omitempty"` + DType []string `json:"dgraph.type,omitempty"` +} + +type loc struct { + Type string `json:"type,omitempty"` + Coords []float64 `json:"coordinates,omitempty"` +} + +// If omitempty is not set, then edges with empty values (0 for int/float, "" for string, false +// for bool) would be created for values not specified explicitly. + +type Person struct { + Uid string `json:"uid,omitempty"` + Name string `json:"name,omitempty"` + Age int `json:"age,omitempty"` + Dob *time.Time `json:"dob,omitempty"` + Married bool `json:"married,omitempty"` + Raw []byte `json:"raw_bytes,omitempty"` + Friends []Person `json:"friend,omitempty"` + Location loc `json:"loc,omitempty"` + School []School `json:"school,omitempty"` + DType []string `json:"dgraph.type,omitempty"` +} + +func Example_setObject() { + dg, cancel := getDgraphClient() + defer cancel() + + dob := time.Date(1980, 01, 01, 23, 0, 0, 0, time.UTC) + // While setting an object if a struct has a Uid then its properties in the graph are updated + // else a new node is created. + // In the example below new nodes for Alice, Bob and Charlie and school are created (since they + // don't have a Uid). + p := Person{ + Uid: "_:alice", + Name: "Alice", + Age: 26, + Married: true, + DType: []string{"Person"}, + Location: loc{ + Type: "Point", + Coords: []float64{1.1, 2}, + }, + Dob: &dob, + Raw: []byte("raw_bytes"), + Friends: []Person{{ + Name: "Bob", + Age: 24, + DType: []string{"Person"}, + }, { + Name: "Charlie", + Age: 29, + DType: []string{"Person"}, + }}, + School: []School{{ + Name: "Crown Public School", + DType: []string{"Institution"}, + }}, + } + + op := &api.Operation{} + op.Schema = ` + name: string @index(exact) . + age: int . + married: bool . + loc: geo . + dob: datetime . + Friend: [uid] . + type: string . + coords: float . + type Person { + name: string + age: int + married: bool + Friend: [Person] + loc: Loc + } + type Institution { + name: string + } + type Loc { + type: string + coords: float + } + ` + + ctx := context.Background() + if err := dg.Alter(ctx, op); err != nil { + log.Fatal(err) + } + + mu := &api.Mutation{ + CommitNow: true, + } + pb, err := json.Marshal(p) + if err != nil { + log.Fatal(err) + } + + mu.SetJson = pb + response, err := dg.NewTxn().Mutate(ctx, mu) + if err != nil { + log.Fatal(err) + } + + // Assigned uids for nodes which were created would be returned in the response.Uids map. + variables := map[string]string{"$id1": response.Uids["alice"]} + q := `query Me($id1: string){ + me(func: uid($id1)) { + name + dob + age + loc + raw_bytes + married + dgraph.type + friend @filter(eq(name, "Bob")){ + name + age + dgraph.type + } + school { + name + dgraph.type + } + } + }` + + resp, err := dg.NewTxn().QueryWithVars(ctx, q, variables) + if err != nil { + log.Fatal(err) + } + + type Root struct { + Me []Person `json:"me"` + } + + var r Root + err = json.Unmarshal(resp.Json, &r) + if err != nil { + log.Fatal(err) + } + + out, _ := json.MarshalIndent(r, "", "\t") + fmt.Printf("%s\n", out) +} +``` + +Example output result: + +```json + Output: { + "me": [ + { + "name": "Alice", + "age": 26, + "dob": "1980-01-01T23:00:00Z", + "married": true, + "raw_bytes": "cmF3X2J5dGVz", + "friend": [ + { + "name": "Bob", + "age": 24, + "loc": {}, + "dgraph.type": [ + "Person" + ] + } + ], + "loc": { + "type": "Point", + "coordinates": [ + 1.1, + 2 + ] + }, + "school": [ + { + "name": "Crown Public School", + "dgraph.type": [ + "Institution" + ] + } + ], + "dgraph.type": [ + "Person" + ] + } + ] + } +``` diff --git a/dgraph/reference/dql/clients/index.mdx b/dgraph/reference/dql/clients/index.mdx new file mode 100644 index 00000000..0b8a783d --- /dev/null +++ b/dgraph/reference/dql/clients/index.mdx @@ -0,0 +1,58 @@ +--- +title: Client Libraries +description: Dgraph client libraries in various programming languages. +--- + +Dgraph client libraries allow you to run DQL transactions, queries and mutations +in various programming languages. + +If you are interested in clients for GraphQL endpoint, please refer to +[GraphQL clients](./graphql-clients) section. + +Go, python, Java, C# and JavaScript clients are using +**[gRPC](http://www.grpc.io/):** protocol and +[Protocol Buffers](https://developers.google.com/protocol-buffers) (the proto +file used by Dgraph is located at +[api.proto](https://github.com/dgraph-io/dgo/blob/master/protos/api.proto)). + +A JavaScript client using **HTTP:** is also available. + +It's possible to interface with Dgraph directly via gRPC or HTTP. However, if a +client library exists for your language, that will be an easier option. + + + For multi-node setups, predicates are assigned to the group that first sees + that predicate. Dgraph also automatically moves predicate data to different + groups in order to balance predicate distribution. This occurs automatically + every 10 minutes. It's possible for clients to aid this process by + communicating with all Dgraph instances. For the Go client, this means passing + in one `*grpc.ClientConn` per Dgraph instance, or routing traffic through a + load balancer. Mutations will be made in a round robin fashion, resulting in a + semi-random initial predicate distribution. + + +### Transactions + +Dgraph clients perform mutations and queries using transactions. A transaction +bounds a sequence of queries and mutations that are committed by Dgraph as a +single unit: that is, on commit, either all the changes are accepted by Dgraph +or none are. + +A transaction always sees the database state at the moment it began, plus any +changes it makes --- changes from concurrent transactions aren't visible. + +On commit, Dgraph will abort a transaction, rather than committing changes, when +a conflicting, concurrently running transaction has already been committed. Two +transactions conflict when both transactions: + +- write values to the same scalar predicate of the same node (e.g both + attempting to set a particular node's `address` predicate); or +- write to a singular `uid` predicate of the same node (changes to `[uid]` + predicates can be concurrently written); or +- write a value that conflicts on an index for a predicate with `@upsert` set in + the schema (see [upserts](./howto/upserts.md)). + +When a transaction is aborted, all its changes are discarded. Transactions can +be manually aborted. + +### In this section diff --git a/dgraph/reference/dql/clients/java.mdx b/dgraph/reference/dql/clients/java.mdx new file mode 100644 index 00000000..5eb98e94 --- /dev/null +++ b/dgraph/reference/dql/clients/java.mdx @@ -0,0 +1,595 @@ +--- +title: Java +--- + +A minimal implementation for a Dgraph client for Java 1.8 and above, using +[gRPC](https://grpc.io/). This client follows the [Dgraph Go client](./go) +closely. + + + The official Java client [can be found + here](https://github.com/dgraph-io/dgraph4j). Follow the [install + instructions](https://github.com/dgraph-io/dgraph4j#download) to get it up and + running. + + +## Supported Versions + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph4j#supported-versions). + +## Quickstart + +Build and run the +[DgraphJavaSample](https://github.com/dgraph-io/dgraph4j/tree/master/samples/DgraphJavaSample) +project in the `samples` folder, which contains an end-to-end example of using +the Dgraph Java client. Follow the instructions in the +[README](https://github.com/dgraph-io/dgraph4j/tree/master/samples/DgraphJavaSample/README.md) +of that project. + +## Intro + +This library supports two styles of clients, the synchronous client +`DgraphClient` and the async client `DgraphAsyncClient`. A `DgraphClient` or +`DgraphAsyncClient` can be initialized by passing it a list of +`DgraphBlockingStub` clients. The `anyClient()` API can randomly pick a stub, +which can then be used for GRPC operations. In the next section, we will explain +how to create a synchronous client and use it to mutate or query dgraph. For the +async client, more details can be found in the +[Using the Asynchronous Client](#using-the-asynchronous-client) section. + +## Using the Synchronous Client + + + You can find a + [DgraphJavaSample](https://github.com/dgraph-io/dgraph4j/tree/master/samples/DgraphJavaSample) + project, which contains an end-to-end working example of how to use the Java + client. + + +### Creating a Client + +The following code snippet shows how to create a synchronous client using three +connections. + +```java +ManagedChannel channel1 = ManagedChannelBuilder + .forAddress("localhost", 9080) + .usePlaintext().build(); +DgraphStub stub1 = DgraphGrpc.newStub(channel1); + +ManagedChannel channel2 = ManagedChannelBuilder + .forAddress("localhost", 9082) + .usePlaintext().build(); +DgraphStub stub2 = DgraphGrpc.newStub(channel2); + +ManagedChannel channel3 = ManagedChannelBuilder + .forAddress("localhost", 9083) + .usePlaintext().build(); +DgraphStub stub3 = DgraphGrpc.newStub(channel3); + +DgraphClient dgraphClient = new DgraphClient(stub1, stub2, stub3); +``` + +### Login using ACL + +If [ACL](./access-control-lists) is enabled then you can log-in to the default +namespace (`0`) with the following method: + +```java +dgraphClient.login(USER_ID, USER_PASSWORD); +``` + +### Multi-tenancy + +If [multi-tenancy](./multitenancy) is enabled, by default the login method on +client will login into the namespace `0`. In order to login into some other +namespace, use the `loginIntoNamespace` method on the client: + +```java +dgraphClient.loginIntoNamespace(USER_ID, USER_PASSWORD, NAMESPACE); +``` + +Once logged-in, the `dgraphClient` object can be used to do any further +operations. + +### Creating a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows : + +```java +DgraphStub stub = DgraphClient.clientStubFromCloudEndpoint("https://civic-wine.us-west-2.aws.cloud.dgraph.io/graphql", "your-api-key"); +DgraphClient dgraphClient = new DgraphClient(stub); +``` + + + The `DgraphClient.clientStubFromSlashEndpoint()` method has been deprecated + and will be removed in v21.07. Please use + `DgraphClient.clientStubFromCloudEndpoint()` instead. + + +### Creating a Secure Client using TLS + +To setup a client using TLS, you could use the following code snippet. The +server needs to be setup using the instructions provided +[here](./tls-configuration). + +If you are doing client verification, you need to convert the client key from +PKCS#1 format to PKCS#8 format. By default, grpc doesn't support reading PKCS#1 +format keys. To convert the format, you could use the `openssl` tool. + +First, let's install the `openssl` tool: + +```sh +apt install openssl +``` + +Now, use the following command to convert the key: + +```sh +openssl pkcs8 -in client.name.key -topk8 -nocrypt -out client.name.java.key +``` + +Now, you can use the following code snippet to connect to Alpha over TLS: + +```java +SslContextBuilder builder = GrpcSslContexts.forClient(); +builder.trustManager(new File("")); +// Skip the next line if you are not performing client verification. +builder.keyManager(new File(""), new File("")); +SslContext sslContext = builder.build(); + +ManagedChannel channel = NettyChannelBuilder.forAddress("localhost", 9080) + .sslContext(sslContext) + .build(); +DgraphGrpc.DgraphStub stub = DgraphGrpc.newStub(channel); +DgraphClient dgraphClient = new DgraphClient(stub); +``` + +### Check Dgraph version + +Checking the version of the Dgraph server this client is interacting with is as +easy as: + +```java +Version v = dgraphClient.checkVersion(); +System.out.println(v.getTag()); +``` + +Checking the version, before doing anything else can be used as a test to find +out if the client is able to communicate with the Dgraph server. This will also +help reduce the latency of the first query/mutation which results from some +dynamic library loading and linking that happens in JVM (see +[this issue](https://github.com/dgraph-io/dgraph4j/issues/108) for more +details). + +### Altering the Database + +To set the schema, create an `Operation` object, set the schema and pass it to +`DgraphClient#alter` method. + +```java +String schema = "name: string @index(exact) ."; +Operation operation = Operation.newBuilder().setSchema(schema).build(); +dgraphClient.alter(operation); +``` + +Starting Dgraph version 20.03.0, indexes can be computed in the background. You +can call the function `setRunInBackground(true)` as shown below before calling +`alter`. You can find more details +[here](./dql-schema.md#indexes-in-background). + +```java +String schema = "name: string @index(exact) ."; +Operation operation = Operation.newBuilder() + .setSchema(schema) + .setRunInBackground(true) + .build(); +dgraphClient.alter(operation); +``` + +`Operation` contains other fields as well, including drop predicate and drop +all. Drop all is useful if you wish to discard all the data, and start from a +clean slate, without bringing the instance down. + +```java +// Drop all data including schema from the dgraph instance. This is useful +// for small examples such as this, since it puts dgraph into a clean +// state. +dgraphClient.alter(Operation.newBuilder().setDropAll(true).build()); +``` + +### Creating a Transaction + +There are two types of transactions in dgraph, i.e. the read-only transactions +that only include queries and the transactions that change data in dgraph with +mutate operations. Both the synchronous client `DgraphClient` and the async +client `DgraphAsyncClient` support the two types of transactions by providing +the `newTransaction` and the `newReadOnlyTransaction` APIs. Creating a +transaction is a local operation and incurs no network overhead. + +In most of the cases, the normal read-write transactions is used, which can have +any number of query or mutate operations. However, if a transaction only has +queries, you might benefit from a read-only transaction, which can share the +same read timestamp across multiple such read-only transactions and can result +in lower latencies. + +For normal read-write transactions, it is a good practice to call +`Transaction#discard()` in a `finally` block after running the transaction. +Calling `Transaction#discard()` after `Transaction#commit()` is a no-op and you +can call `discard()` multiple times with no additional side-effects. + +```java +Transaction txn = dgraphClient.newTransaction(); +try { + // Do something here + // ... +} finally { + txn.discard(); +} +``` + +For read-only transactions, there is no need to call `Transaction.discard`, +which is equivalent to a no-op. + +```java +Transaction readOnlyTxn = dgraphClient.newReadOnlyTransaction(); +``` + +Read-only transactions can be set as best-effort. Best-effort queries relax the +requirement of linearizable reads. This is useful when running queries that do +not require a result from the latest timestamp. + +```java +Transaction bestEffortTxn = dgraphClient.newReadOnlyTransaction() + .setBestEffort(true); +``` + +### Running a Mutation + +`Transaction#mutate` runs a mutation. It takes in a `Mutation` object, which +provides two main ways to set data: JSON and RDF N-Quad. You can choose +whichever way is convenient. + +We're going to use JSON. First we define a `Person` class to represent a person. +This data will be serialized into JSON. + +```java +class Person { + String name + Person() {} +} +``` + +Next, we initialize a `Person` object, serialize it and use it in `Mutation` +object. + +```java +// Create data +Person person = new Person(); +person.name = "Alice"; + +// Serialize it +Gson gson = new Gson(); +String json = gson.toJson(person); +// Run mutation +Mutation mu = Mutation.newBuilder() + .setSetJson(ByteString.copyFromUtf8(json.toString())) + .build(); +txn.mutate(mu); +``` + +Sometimes, you only want to commit mutation, without querying anything further. +In such cases, you can use a `CommitNow` field in `Mutation` object to indicate +that the mutation must be immediately committed. + +Mutation can be run using the `doRequest` function as well. + +```java +Request request = Request.newBuilder() + .addMutations(mu) + .build(); +txn.doRequest(request); +``` + +### Committing a Transaction + +A transaction can be committed using the `Transaction#commit()` method. If your +transaction consisted solely of calls to `Transaction#query()`, and no calls to +`Transaction#mutate()`, then calling `Transaction#commit()` is not necessary. + +An error will be returned if other transactions running concurrently modify the +same data that was modified in this transaction. It is up to the user to retry +transactions when they fail. + +```java +Transaction txn = dgraphClient.newTransaction(); + +try { + // … + // Perform any number of queries and mutations + // … + // and finally … + txn.commit() +} catch (TxnConflictException ex) { + // Retry or handle exception. +} finally { + // Clean up. Calling this after txn.commit() is a no-op + // and hence safe. + txn.discard(); +} +``` + +### Running a Query + +You can run a query by calling `Transaction#query()`. You will need to pass in a +GraphQL+- query string, and a map (optional, could be empty) of any variables +that you might want to set in the query. + +The response would contain a `JSON` field, which has the JSON encoded result. +You will need to decode it before you can do anything useful with it. + +Let’s run the following query: + +``` +query all($a: string) { + all(func: eq(name, $a)) { + name + } +} +``` + +First we must create a `People` class that will help us deserialize the JSON +result: + +```java +class People { + List all; + People() {} +} +``` + +Then we run the query, deserialize the result and print it out: + +```java +// Query +String query = +"query all($a: string){\n" + +" all(func: eq(name, $a)) {\n" + +" name\n" + +" }\n" + +"}\n"; + +Map vars = Collections.singletonMap("$a", "Alice"); +Response response = dgraphClient.newReadOnlyTransaction().queryWithVars(query, vars); + +// Deserialize +People ppl = gson.fromJson(response.getJson().toStringUtf8(), People.class); + +// Print results +System.out.printf("people found: %d\n", ppl.all.size()); +ppl.all.forEach(person -> System.out.println(person.name)); +``` + +This should print: + +``` +people found: 1 +Alice +``` + +You can also use `doRequest` function to run the query. + +```java +Request request = Request.newBuilder() + .setQuery(query) + .build(); +txn.doRequest(request); +``` + +### Running a Query with RDF response + +You can get query results as an RDF response by calling either `queryRDF()` or +`queryRDFWithVars()`. The response contains the `getRdf()` method, which will +provide the RDF encoded output. + +**Note**: If you are querying for `uid` values only, use a JSON format response + +```java +// Query +String query = "query me($a: string) { me(func: eq(name, $a)) { name }}"; +Map vars = Collections.singletonMap("$a", "Alice"); +Response response = + dgraphAsyncClient.newReadOnlyTransaction().queryRDFWithVars(query, vars).join(); + +// Print results +System.out.println(response.getRdf().toStringUtf8()); +``` + +This should print (assuming Alice's `uid` is `0x2`): + +``` +<0x2> "Alice" . +``` + +### Running an Upsert: Query + Mutation + +The `txn.doRequest` function allows you to run upserts consisting of one query +and one mutation. Variables can be defined in the query and used in the +mutation. You could also use `txn.doRequest` to perform a query followed by a +mutation. + +```java +String query = "query {\n" + + "user as var(func: eq(email, \"wrong_email@dgraph.io\"))\n" + + "}\n"; +Mutation mu = Mutation.newBuilder() + .setSetNquads(ByteString.copyFromUtf8("uid(user) \"correct_email@dgraph.io\" .")) + .build(); +Request request = Request.newBuilder() + .setQuery(query) + .addMutations(mu) + .setCommitNow(true) + .build(); +txn.doRequest(request); +``` + +### Running a Conditional Upsert + +The upsert block also allows specifying a conditional mutation block using an +`@if` directive. The mutation is executed only when the specified condition is +true. If the condition is false, the mutation is silently ignored. + +See more about Conditional Upsert [Here](./dql-mutation.md#conditional-upsert). + +```java +String query = "query {\n" + + "user as var(func: eq(email, \"wrong_email@dgraph.io\"))\n" + + "}\n"; +Mutation mu = Mutation.newBuilder() + .setSetNquads(ByteString.copyFromUtf8("uid(user) \"correct_email@dgraph.io\" .")) + .setCond("@if(eq(len(user), 1))") + .build(); +Request request = Request.newBuilder() + .setQuery(query) + .addMutations(mu) + .setCommitNow(true) + .build(); +txn.doRequest(request); +``` + +### Setting Deadlines + +It is recommended that you always set a deadline for each client call, after +which the client terminates. This is in line with the recommendation for any +gRPC client. Read [this forum post][deadline-post] for more details. + +```java +channel = ManagedChannelBuilder.forAddress("localhost", 9080).usePlaintext(true).build(); +DgraphGrpc.DgraphStub stub = DgraphGrpc.newStub(channel); +ClientInterceptor timeoutInterceptor = new ClientInterceptor(){ + @Override + public ClientCall interceptCall( + MethodDescriptor method, CallOptions callOptions, Channel next) { + return next.newCall(method, callOptions.withDeadlineAfter(500, TimeUnit.MILLISECONDS)); + } +}; +stub.withInterceptors(timeoutInterceptor); +DgraphClient dgraphClient = new DgraphClient(stub); +``` + +[deadline-post]: + https://discuss.dgraph.io/t/dgraph-java-client-setting-deadlines-per-call/3056 + +### Setting Metadata Headers + +Certain headers such as authentication tokens need to be set globally for all +subsequent calls. Below is an example of setting a header with the name +"auth-token": + +```java +// create the stub first +ManagedChannel channel = +ManagedChannelBuilder.forAddress(TEST_HOSTNAME, TEST_PORT).usePlaintext(true).build(); +DgraphStub stub = DgraphGrpc.newStub(channel); + +// use MetadataUtils to augment the stub with headers +Metadata metadata = new Metadata(); +metadata.put( + Metadata.Key.of("auth-token", Metadata.ASCII_STRING_MARSHALLER), "the-auth-token-value"); +stub = MetadataUtils.attachHeaders(stub, metadata); + +// create the DgraphClient wrapper around the stub +DgraphClient dgraphClient = new DgraphClient(stub); + +// trigger a RPC call using the DgraphClient +dgraphClient.alter(Operation.newBuilder().setDropAll(true).build()); +``` + +### Helper Methods + +#### Delete multiple edges + +The example below uses the helper method `Helpers#deleteEdges` to delete +multiple edges corresponding to predicates on a node with the given uid. The +helper method takes an existing mutation, and returns a new mutation with the +deletions applied. + +```java +Mutation mu = Mutation.newBuilder().build() +mu = Helpers.deleteEdges(mu, uid, "friends", "loc"); +dgraphClient.newTransaction().mutate(mu); +``` + +### Closing the DB Connection + +To disconnect from Dgraph, call `ManagedChannel#shutdown` on the gRPC channel +object created when [creating a Dgraph client](#creating-a-client). + +``` +channel.shutdown(); +``` + +## Using the Asynchronous Client + +Dgraph Client for Java also bundles an asynchronous API, which can be used by +instantiating the `DgraphAsyncClient` class. The usage is almost exactly the +same as the `DgraphClient` (show in previous section) class. The main +differences is that the `DgraphAsyncClient#newTransacation()` returns an +`AsyncTransaction` class. The API for `AsyncTransaction` is exactly +`Transaction`. The only difference is that instead of returning the results +directly, it returns immediately with a corresponding `CompletableFuture` +object. This object represents the computation which runs asynchronously to +yield the result in the future. Read more about `CompletableFuture` in the +[Java 8 documentation][futuredocs]. + +[futuredocs]: + https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/CompletableFuture.html + +Here is the asynchronous version of the code above, which runs a query. + +```java +// Query +String query = +"query all($a: string){\n" + +" all(func: eq(name, $a)) {\n" + +" name\n" + + "}\n" + +"}\n"; + +Map vars = Collections.singletonMap("$a", "Alice"); + +AsyncTransaction txn = dgraphAsyncClient.newTransaction(); +txn.query(query).thenAccept(response -> { + // Deserialize + People ppl = gson.fromJson(res.getJson().toStringUtf8(), People.class); + + // Print results + System.out.printf("people found: %d\n", ppl.all.size()); + ppl.all.forEach(person -> System.out.println(person.name)); +}); +``` + +## Checking the request latency + +If you would like to see the latency for either a mutation or query request, the +latency field in the returned result can be helpful. Here is an example to log +the latency of a query request: + +```java +Response resp = txn.query(query); +Latency latency = resp.getLatency(); +logger.info("parsing latency:" + latency.getParsingNs()); +logger.info("processing latency:" + latency.getProcessingNs()); +logger.info("encoding latency:" + latency.getEncodingNs()); +``` + +Similarly you can get the latency of a mutation request: + +```java +Assigned assignedIds = dgraphClient.newTransaction().mutate(mu); +Latency latency = assignedIds.getLatency(); +``` diff --git a/dgraph/reference/dql/clients/javascript/grpc.mdx b/dgraph/reference/dql/clients/javascript/grpc.mdx new file mode 100644 index 00000000..3f6a8fb9 --- /dev/null +++ b/dgraph/reference/dql/clients/javascript/grpc.mdx @@ -0,0 +1,443 @@ +--- +title: gRPC Client +--- + +The official Dgraph client implementation for JavaScript, using +[gRPC-js](https://www.npmjs.com/package/@grpc/grpc-js) (the original +[gRPC](https://grpc.io/) client for JavaScript is now deprecated). + +This client follows the [Dgraph Go client](./go) closely. + + + You can find the official Dgraph JavaScript gRPC client at: + https://github.com/dgraph-io/dgraph-js. Follow the [installation + instructions](https://github.com/dgraph-io/dgraph-js#install) to get it up and + running. + + +## Supported Versions + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph-js#supported-versions). + +## Quickstart + +Build and run the +[simple project](https://github.com/dgraph-io/dgraph-js/tree/master/examples/simple), +which contains an end-to-end example of using the Dgraph JavaScript client. +Follow the instructions in the +[README](https://github.com/dgraph-io/dgraph-js/tree/master/examples/simple/README.md) +of that project. + +### Examples + +- [simple](https://github.com/dgraph-io/dgraph-js/tree/master/examples/simple): + Quickstart example of using dgraph-js. +- [tls](https://github.com/dgraph-io/dgraph-js/tree/master/examples/tls): + Example of using dgraph-js with a Dgraph cluster secured with TLS. + +## Using a Client + + + You can find a [simple + example](https://github.com/dgraph-io/dgraph-js/tree/master/examples/simple) + project, which contains an end-to-end working example of how to use the + JavaScript gRPC client, for Node.js >= v6. + + +### Creating a Client + +A `DgraphClient` object can be initialized by passing it a list of +`DgraphClientStub` clients as variadic arguments. Connecting to multiple Dgraph +servers in the same cluster allows for better distribution of workload. + +The following code snippet shows just one connection. + +```js +const dgraph = require("dgraph-js") +const grpc = require("grpc") + +const clientStub = new dgraph.DgraphClientStub( + // addr: optional, default: "localhost:9080" + "localhost:9080", + // credentials: optional, default: grpc.credentials.createInsecure() + grpc.credentials.createInsecure(), +) +const dgraphClient = new dgraph.DgraphClient(clientStub) +``` + +To facilitate debugging, [debug mode](#debug-mode) can be enabled for a client. + +### Multi-tenancy + +In [multi-tenancy](./multitenancy) environments, `dgraph-js` provides a new +method `loginIntoNamespace()`, which will allow the users to login to a specific +namespace. + +In order to create a JavaScript client, and make the client login into namespace +`123`: + +```js +const dgraphClientStub = new dgraph.DgraphClientStub("localhost:9080") +await dgraphClientStub.loginIntoNamespace("groot", "password", 123) // where 123 is the namespaceId +``` + +In the example above, the client logs into namespace `123` using username +`groot` and password `password`. Once logged in, the client can perform all the +operations allowed to the `groot` user of namespace `123`. + +### Creating a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows: + +```js +const dgraph = require("dgraph-js") + +const clientStub = dgraph.clientStubFromSlashGraphQLEndpoint( + "https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", + "", +) +const dgraphClient = new dgraph.DgraphClient(clientStub) +``` + + + The `dgraph.clientStubFromSlashGraphQLEndpoint()` method has been deprecated + and will be removed in v21.07. + + +### Altering the Database + +To set the schema, create an `Operation` object, set the schema and pass it to +`DgraphClient#alter(Operation)` method. + +```js +const schema = "name: string @index(exact) ." +const op = new dgraph.Operation() +op.setSchema(schema) +await dgraphClient.alter(op) +``` + +Starting Dgraph version 20.03.0, indexes can be computed in the background. You +can set `setRunInBackground` field of the `Operation` object to `true` before +passing it to the `DgraphClient#alter(Operation)` method. You can find more +details [here](./dql-schema.md#indexes-in-background). + +```js +const schema = "name: string @index(exact) ." +const op = new dgraph.Operation() +op.setSchema(schema) +op.setRunInBackground(true) +await dgraphClient.alter(op) +``` + +> NOTE: Many of the examples here use the `await` keyword which requires +> `async/await` support which is available on Node.js >= v7.6.0. For prior +> versions, the expressions following `await` can be used just like normal +> `Promise`: +> +> ```js +> dgraphClient.alter(op) +> .then(function(result) { ... }, function(err) { ... }) +> ``` + +`Operation` contains other fields as well, including drop predicate and drop +all. Drop all is useful if you wish to discard all the data, and start from a +clean slate, without bringing the instance down. + +```js +// Drop all data including schema from the Dgraph instance. This is useful +// for small examples such as this, since it puts Dgraph into a clean +// state. +const op = new dgraph.Operation() +op.setDropAll(true) +await dgraphClient.alter(op) +``` + +### Creating a Transaction + +To create a transaction, call `DgraphClient#newTxn()` method, which returns a +new `Txn` object. This operation incurs no network overhead. + +It is good practice to call `Txn#discard()` in a `finally` block after running +the transaction. Calling `Txn#discard()` after `Txn#commit()` is a no-op and you +can call `Txn#discard()` multiple times with no additional side-effects. + +```js +const txn = dgraphClient.newTxn() +try { + // Do something here + // ... +} finally { + await txn.discard() + // ... +} +``` + +To create a read-only transaction, set `readOnly` boolean to `true` while +calling `DgraphClient#newTxn()` method. Read-only transactions cannot contain +mutations and trying to call `Txn#mutate()` or `Txn#commit()` will result in an +error. Calling `Txn.Discard()` will be a no-op. + +You can optionally set the `bestEffort` boolean to `true`. This may yield +improved latencies in read-bound workloads where linearizable reads are not +strictly needed. + +```js +const txn = dgraphClient.newTxn({ + readOnly: true, + bestEffort: false, +}) +// ... +const res = await txn.queryWithVars(query, vars) +``` + +### Running a Mutation + +`Txn#mutate(Mutation)` runs a mutation. It takes in a `Mutation` object, which +provides two main ways to set data: JSON and RDF N-Quad. You can choose +whichever way is convenient. + +We define a person object to represent a person and use it in a `Mutation` +object. + +```js +// Create data. +const p = { + name: "Alice", +} + +// Run mutation. +const mu = new dgraph.Mutation() +mu.setSetJson(p) +await txn.mutate(mu) +``` + +For a more complete example with multiple fields and relationships, look at the +[simple] project in the `examples` folder. + +Sometimes, you only want to commit a mutation, without querying anything +further. In such cases, you can use `Mutation#setCommitNow(true)` to indicate +that the mutation must be immediately committed. + +`Mutation#setIgnoreIndexConflict(true)` can be applied on a `Mutation` object to +not run conflict detection over the index, which would decrease the number of +transaction conflicts and aborts. However, this would come at the cost of +potentially inconsistent upsert operations. + +Mutation can be run using `txn.doRequest` as well. + +```js +const mu = new dgraph.Mutation() +mu.setSetJson(p) + +const req = new dgraph.Request() +req.setCommitNow(true) +req.setMutationsList([mu]) + +await txn.doRequest(req) +``` + +### Running a Query + +You can run a query by calling `Txn#query(string)`. You will need to pass in a +GraphQL+- query string. If you want to pass an additional map of any variables +that you might want to set in the query, call +`Txn#queryWithVars(string, object)` with the variables object as the second +argument. + +The response would contain the method `Response#getJSON()`, which returns the +response JSON. + +Let’s run the following query with a variable $a: + +```console +query all($a: string) { + all(func: eq(name, $a)) + { + name + } +} +``` + +Run the query, deserialize the result from Uint8Array (or base64) encoded JSON +and print it out: + +```js +// Run query. +const query = `query all($a: string) { + all(func: eq(name, $a)) + { + name + } +}` +const vars = { $a: "Alice" } +const res = await dgraphClient.newTxn().queryWithVars(query, vars) +const ppl = res.getJson() + +// Print results. +console.log(`Number of people named "Alice": ${ppl.all.length}`) +ppl.all.forEach((person) => console.log(person.name)) +``` + +This should print: + +```console +Number of people named "Alice": 1 +Alice +``` + +You can also use `txn.doRequest` function to run the query. + +```js +const req = new dgraph.Request() +const vars = req.getVarsMap() +vars.set("$a", "Alice") +req.setQuery(query) + +const res = await txn.doRequest(req) +console.log(JSON.stringify(res.getJson())) +``` + +### Running an Upsert: Query + Mutation + +The `txn.doRequest` function allows you to run upserts consisting of one query +and one mutation. Query variables could be defined and can then be used in the +mutation. You can also use the `txn.doRequest` function to perform just a query +or a mutation. + +```js +const query = ` + query { + user as var(func: eq(email, "wrong_email@dgraph.io")) + }` + +const mu = new dgraph.Mutation() +mu.setSetNquads(`uid(user) "correct_email@dgraph.io" .`) + +const req = new dgraph.Request() +req.setQuery(query) +req.setMutationsList([mu]) +req.setCommitNow(true) + +// Upsert: If wrong_email found, update the existing data +// or else perform a new mutation. +await dgraphClient.newTxn().doRequest(req) +``` + +### Running a Conditional Upsert + +The upsert block allows specifying a conditional mutation block using an `@if` +directive. The mutation is executed only when the specified condition is true. +If the condition is false, the mutation is silently ignored. + +See more about Conditional Upsert [Here](./dql-mutation.md#conditional-upsert). + +```js +const query = ` + query { + user as var(func: eq(email, "wrong_email@dgraph.io")) + }` + +const mu = new dgraph.Mutation() +mu.setSetNquads(`uid(user) "correct_email@dgraph.io" .`) +mu.setCond(`@if(eq(len(user), 1))`) + +const req = new dgraph.Request() +req.setQuery(query) +req.addMutations(mu) +req.setCommitNow(true) + +await dgraphClient.newTxn().doRequest(req) +``` + +### Committing a Transaction + +A transaction can be committed using the `Txn#commit()` method. If your +transaction consisted solely of calls to `Txn#query` or `Txn#queryWithVars`, and +no calls to `Txn#mutate`, then calling `Txn#commit()` is not necessary. + +An error will be returned if other transactions running concurrently modify the +same data that was modified in this transaction. It is up to the user to retry +transactions when they fail. + +```js +const txn = dgraphClient.newTxn() +try { + // ... + // Perform any number of queries and mutations + // ... + // and finally... + await txn.commit() +} catch (e) { + if (e === dgraph.ERR_ABORTED) { + // Retry or handle exception. + } else { + throw e + } +} finally { + // Clean up. Calling this after txn.commit() is a no-op + // and hence safe. + await txn.discard() +} +``` + +### Cleanup Resources + +To cleanup resources, you have to call `DgraphClientStub#close()` individually +for all the instances of `DgraphClientStub`. + +```js +const SERVER_ADDR = "localhost:9080" +const SERVER_CREDENTIALS = grpc.credentials.createInsecure() + +// Create instances of DgraphClientStub. +const stub1 = new dgraph.DgraphClientStub(SERVER_ADDR, SERVER_CREDENTIALS) +const stub2 = new dgraph.DgraphClientStub(SERVER_ADDR, SERVER_CREDENTIALS) + +// Create an instance of DgraphClient. +const dgraphClient = new dgraph.DgraphClient(stub1, stub2) + +// ... +// Use dgraphClient +// ... + +// Cleanup resources by closing all client stubs. +stub1.close() +stub2.close() +``` + +### Debug mode + +Debug mode can be used to print helpful debug messages while performing alters, +queries and mutations. It can be set using +the`DgraphClient#setDebugMode(boolean?)` method. + +```js +// Create a client. +const dgraphClient = new dgraph.DgraphClient(...); + +// Enable debug mode. +dgraphClient.setDebugMode(true); +// OR simply dgraphClient.setDebugMode(); + +// Disable debug mode. +dgraphClient.setDebugMode(false); +``` + +### Setting Metadata Headers + +Metadata headers such as authentication tokens can be set through the context of +gRPC methods. Below is an example of how to set a header named "auth-token". + +```js +// The following piece of code shows how one can set metadata with +// auth-token, to allow Alter operation, if the server requires it. + +var meta = new grpc.Metadata() +meta.add("auth-token", "mySuperSecret") + +await dgraphClient.alter(op, meta) +``` diff --git a/dgraph/reference/dql/clients/javascript/http.mdx b/dgraph/reference/dql/clients/javascript/http.mdx new file mode 100644 index 00000000..e3e95c23 --- /dev/null +++ b/dgraph/reference/dql/clients/javascript/http.mdx @@ -0,0 +1,365 @@ +--- +title: HTTP Client +--- + +A Dgraph client implementation for JavaScript using HTTP. It supports both +browser and Node.js environments. This client follows the +[Dgraph JavaScript gRPC client](./grpc) closely. + + + The official JavaScript HTTP client [can be found + here](https://github.com/dgraph-io/dgraph-js-http). Follow the [install + instructions](https://github.com/dgraph-io/dgraph-js-http#install) to get it + up and running. + + +## Supported Versions + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph-js-http#supported-versions). + +## Quickstart + +Build and run the +[simple project](https://github.com/dgraph-io/dgraph-js-http/tree/master/examples/simple), +which contains an end-to-end example of using the Dgraph javascript HTTP client. +Follow the instructions in the +[README](https://github.com/dgraph-io/dgraph-js-http/tree/master/examples/simple/README.md) +of that project. + +## Using a client + + + You can find a [simple + example](https://github.com/dgraph-io/dgraph-js-http/tree/master/examples/simple) + project, which contains an end-to-end working example of how to use the + JavaScript HTTP client, for Node.js >= v6. + + +### Create a client + +A `DgraphClient` object can be initialized by passing it a list of +`DgraphClientStub` clients as variadic arguments. Connecting to multiple Dgraph +servers in the same cluster allows for better distribution of workload. + +The following code snippet shows just one connection. + +```js +const dgraph = require("dgraph-js-http") + +const clientStub = new dgraph.DgraphClientStub( + // addr: optional, default: "http://localhost:8080" + "http://localhost:8080", + // legacyApi: optional, default: false. Set to true when connecting to Dgraph v1.0.x + false, +) +const dgraphClient = new dgraph.DgraphClient(clientStub) +``` + +To facilitate debugging, [debug mode](#debug-mode) can be enabled for a client. + +### Create a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows: + +```js +const dgraph = require("dgraph-js-http") + +//here we pass the cloud endpoint +const clientStub = new dgraph.DgraphClientStub( + "https://super-pail.us-west-2.aws.cloud.dgraph.io", +) + +const dgraphClient = new dgraph.DgraphClient(clientStub) + +//here we pass the API key +dgraphClient.setSlashApiKey("") +``` + + + You need to remove the `/graphql` path when copying the endpoint URL from the + Dgraph Cloud dashboard. + + +### Login into Dgraph + +If your Dgraph server has Access Control Lists enabled (Dgraph v1.1 or above), +the clientStub must be logged in for accessing data: + +```js +await clientStub.login("groot", "password") +``` + +Calling `login` will obtain and remember the access and refresh JWT tokens. All +subsequent operations via the logged in `clientStub` will send along the stored +access token. + +Access tokens expire after 6 hours, so in long-lived apps (e.g. business logic +servers) you need to `login` again on a periodic basis: + +```js +// When no parameters are specified the clientStub uses existing refresh token +// to obtain a new access token. +await clientStub.login() +``` + +### Configure access tokens + +Some Dgraph configurations require extra access tokens. + +1. Alpha servers can be configured with + [Secure Alter Operations](/dgraph-administration). In this case the token + needs to be set on the client instance: + +```js +dgraphClient.setAlphaAuthToken("My secret token value") +``` + +2. [Dgraph Cloud](https://cloud.dgraph.io/) requires API key for HTTP access: + +```js +dgraphClient.setSlashApiKey("Copy the Api Key from Dgraph Cloud admin page") +``` + +### Create https connection + +If your cluster is using tls/mtls you can pass a node `https.Agent` configured +with you certificates as follows: + +```js +const https = require("https") +const fs = require("fs") +// read your certificates +const cert = fs.readFileSync("./certs/client.crt", "utf8") +const ca = fs.readFileSync("./certs/ca.crt", "utf8") +const key = fs.readFileSync("./certs/client.key", "utf8") + +// create your https.Agent +const agent = https.Agent({ + cert, + ca, + key, +}) + +const clientStub = new dgraph.DgraphClientStub( + "https://localhost:8080", + false, + { agent }, +) +const dgraphClient = new dgraph.DgraphClient(clientStub) +``` + +### Alter the database + +To set the schema, pass the schema to `DgraphClient#alter(Operation)` method. + +```js +const schema = "name: string @index(exact) ." +await dgraphClient.alter({ schema: schema }) +``` + +> NOTE: Many of the examples here use the `await` keyword which requires +> `async/await` support which is not available in all javascript environments. +> For unsupported environments, the expressions following `await` can be used +> just like normal `Promise` instances. + +`Operation` contains other fields as well, including drop predicate and drop +all. Drop all is useful if you wish to discard all the data, and start from a +clean slate, without bringing the instance down. + +```js +// Drop all data including schema from the Dgraph instance. This is useful +// for small examples such as this, since it puts Dgraph into a clean +// state. +await dgraphClient.alter({ dropAll: true }) +``` + +### Create a transaction + +To create a transaction, call `DgraphClient#newTxn()` method, which returns a +new `Txn` object. This operation incurs no network overhead. + +It is good practice to call `Txn#discard()` in a `finally` block after running +the transaction. Calling `Txn#discard()` after `Txn#commit()` is a no-op and you +can call `Txn#discard()` multiple times with no additional side-effects. + +```js +const txn = dgraphClient.newTxn() +try { + // Do something here + // ... +} finally { + await txn.discard() + // ... +} +``` + +You can make queries read-only and best effort by passing `options` to +`DgraphClient#newTxn`. For example: + +```js +const options = { readOnly: true, bestEffort: true } +const res = await dgraphClient.newTxn(options).query(query) +``` + +Read-only transactions are useful to increase read speed because they can +circumvent the usual consensus protocol. Best effort queries can also increase +read speed in read bound system. Please note that best effort requires readonly. + +### Run a mutation + +`Txn#mutate(Mutation)` runs a mutation. It takes in a `Mutation` object, which +provides two main ways to set data: JSON and RDF N-Quad. You can choose +whichever way is convenient. + +We define a person object to represent a person and use it in a `Mutation` +object. + +```js +// Create data. +const p = { + name: "Alice", +} + +// Run mutation. +await txn.mutate({ setJson: p }) +``` + +For a more complete example with multiple fields and relationships, look at the +[simple] project in the `examples` folder. + +For setting values using N-Quads, use the `setNquads` field. For delete +mutations, use the `deleteJson` and `deleteNquads` fields for deletion using +JSON and N-Quads respectively. + +Sometimes, you only want to commit a mutation, without querying anything +further. In such cases, you can use `Mutation#commitNow = true` to indicate that +the mutation must be immediately committed. + +```js +// Run mutation. +await txn.mutate({ setJson: p, commitNow: true }) +``` + +### Run a query + +You can run a query by calling `Txn#query(string)`. You will need to pass in a +GraphQL+- query string. If you want to pass an additional map of any variables +that you might want to set in the query, call +`Txn#queryWithVars(string, object)` with the variables object as the second +argument. + +The response would contain the `data` field, `Response#data`, which returns the +response JSON. + +Let’s run the following query with a variable \$a: + +```console +query all($a: string) { + all(func: eq(name, $a)) + { + name + } +} +``` + +Run the query and print out the response: + +```js +// Run query. +const query = `query all($a: string) { + all(func: eq(name, $a)) + { + name + } +}` +const vars = { $a: "Alice" } +const res = await dgraphClient.newTxn().queryWithVars(query, vars) +const ppl = res.data + +// Print results. +console.log(`Number of people named "Alice": ${ppl.all.length}`) +ppl.all.forEach((person) => console.log(person.name)) +``` + +This should print: + +```console +Number of people named "Alice": 1 +Alice +``` + +### Commit a transaction + +A transaction can be committed using the `Txn#commit()` method. If your +transaction consisted solely of calls to `Txn#query` or `Txn#queryWithVars`, and +no calls to `Txn#mutate`, then calling `Txn#commit()` is not necessary. + +An error will be returned if other transactions running concurrently modify the +same data that was modified in this transaction. It is up to the user to retry +transactions when they fail. + +```js +const txn = dgraphClient.newTxn() +try { + // ... + // Perform any number of queries and mutations + // ... + // and finally... + await txn.commit() +} catch (e) { + if (e === dgraph.ERR_ABORTED) { + // Retry or handle exception. + } else { + throw e + } +} finally { + // Clean up. Calling this after txn.commit() is a no-op + // and hence safe. + await txn.discard() +} +``` + +### Check request latency + +To see the server latency information for requests, check the +`extensions.server_latency` field from the Response object for queries or from +the Assigned object for mutations. These latencies show the amount of time the +Dgraph server took to process the entire request. It does not consider the time +over the network for the request to reach back to the client. + +```js +// queries +const res = await txn.queryWithVars(query, vars) +console.log(res.extensions.server_latency) +// { parsing_ns: 29478, +// processing_ns: 44540975, +// encoding_ns: 868178 } + +// mutations +const assigned = await txn.mutate({ setJson: p }) +console.log(assigned.extensions.server_latency) +// { parsing_ns: 132207, +// processing_ns: 84100996 } +``` + +### Debug mode + +Debug mode can be used to print helpful debug messages while performing alters, +queries and mutations. It can be set using +the`DgraphClient#setDebugMode(boolean?)` method. + +```js +// Create a client. +const dgraphClient = new dgraph.DgraphClient(...); + +// Enable debug mode. +dgraphClient.setDebugMode(true); +// OR simply dgraphClient.setDebugMode(); + +// Disable debug mode. +dgraphClient.setDebugMode(false); +``` diff --git a/dgraph/reference/dql/clients/javascript/index.mdx b/dgraph/reference/dql/clients/javascript/index.mdx new file mode 100644 index 00000000..63df0307 --- /dev/null +++ b/dgraph/reference/dql/clients/javascript/index.mdx @@ -0,0 +1,31 @@ +--- +title: JavaScript +--- + +## gRPC JS Client + +The official JavaScript gRPC client documentation [can be found here](./grpc). + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph-js#supported-versions). + + + You can find a [simple + example](https://github.com/dgraph-io/dgraph-js/tree/master/examples/simple) + project, which contains an end-to-end working example of how to use the + JavaScript gRPC client, for Node.js >= v6. + + +## HTTP JS Client + +The official JavaScript HTTP client documentation [can be found here](./http). + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/dgraph-js-http#supported-versions). + + + You can find a [simple + example](https://github.com/dgraph-io/dgraph-js-http/tree/master/examples/simple) + project, which contains an end-to-end working example of how to use the + JavaScript HTTP client, for Node.js >= v6. + diff --git a/dgraph/reference/dql/clients/python.mdx b/dgraph/reference/dql/clients/python.mdx new file mode 100644 index 00000000..174fb24d --- /dev/null +++ b/dgraph/reference/dql/clients/python.mdx @@ -0,0 +1,454 @@ +--- +title: Python +--- + +Official Dgraph client implementation for Python (Python >= v2.7 and >= v3.5), +using [gRPC](https://grpc.io/). This client follows the [Dgraph Go client](/go) +closely. + + + The official Python client [can be found + here](https://github.com/dgraph-io/pydgraph). Follow the [install + instructions](https://github.com/dgraph-io/pydgraph#install) to get it up and + running. + + +## Supported Versions + +More details on the supported versions can be found at +[this link](https://github.com/dgraph-io/pydgraph#supported-versions). + +## Using a client + + + You can get a [simple + example](https://github.com/dgraph-io/pydgraph/tree/master/examples/simple) + project, which contains an end-to-end working example of how to use the Python + client. + + +### Creating a Client + +You can initialize a `DgraphClient` object by passing it a list of +`DgraphClientStub` clients as variadic arguments. Connecting to multiple Dgraph +servers in the same cluster allows for better distribution of workload. + +The following code snippet shows just one connection. + +```python +import pydgraph + +client_stub = pydgraph.DgraphClientStub('localhost:9080') +client = pydgraph.DgraphClient(client_stub) +``` + +### Multi-tenancy + +In [multi-tenancy](./multitenancy) environments, PyDgraph provides a new method +`login_into_namespace()`, which will allow the users to login to a specific +namespace. + +In order to create a python client, and make the client login into namespace +`123`: + +```python +client_stub = pydgraph.DgraphClientStub('localhost:9080') +client = pydgraph.DgraphClient(client_stub) +// Login to namespace groot user of namespace 123 +client.login_into_namespace("groot", "password", "123") +``` + +In the example above, the client logs into namespace `123` using username +`groot` and password `password`. Once logged in, the client can perform all the +operations allowed to the `groot` user of namespace `123`. + +### Creating a Client for Dgraph Cloud Endpoint + +If you want to connect to Dgraph running on your +[Dgraph Cloud](https://cloud.dgraph.io) instance, then all you need is the URL +of your Dgraph Cloud endpoint and the API key. You can get a client using them +as follows: + +```python +import pydgraph + +client_stub = pydgraph.DgraphClientStub.from_cloud("https://frozen-mango.eu-central-1.aws.cloud.dgraph.io/graphql", "") +client = pydgraph.DgraphClient(client_stub) +``` + + + The `DgraphClientStub.from_slash_endpoint()` method has been deprecated and + will be removed in v21.07. Please use `DgraphClientStub.from_cloud()` instead. + + +### Altering the Database + +To set the schema, create an `Operation` object, set the schema and pass it to +`DgraphClient#alter(Operation)` method. + +```python +schema = 'name: string @index(exact) .' +op = pydgraph.Operation(schema=schema) +client.alter(op) +``` + +Starting with Dgraph version 20.03.0, indexes can be computed in the background. +You can set the `run_in_background` field of `pydgraph.Operation` to `True` +before passing it to the `Alter` function. You can find more details +[here](./dql-schema.md#indexes-in-background). + +```python +schema = 'name: string @index(exact) .' +op = pydgraph.Operation(schema=schema, run_in_background=True) +client.alter(op) +``` + +`Operation` contains other fields as well, including the `drop` predicate and +`drop all`. Drop all is useful if you wish to discard all the data, and start +with a clean slate, without bringing the instance down. + +```python +# Drop all data including schema from the Dgraph instance. This is a useful +# for small examples such as this since it puts Dgraph into a clean state. +op = pydgraph.Operation(drop_all=True) +client.alter(op) +``` + +### Creating a Transaction + +To create a transaction, call the `DgraphClient#txn()` method, which returns a +new `Txn` object. This operation incurs no network overhead. + +It is good practice to call `Txn#discard()` in a `finally` block after running +the transaction. Calling `Txn#discard()` after `Txn#commit()` is a no-op and you +can call `Txn#discard()` multiple times with no additional side-effects. + +```python +txn = client.txn() +try: + # Do something here + # ... +finally: + txn.discard() + # ... +``` + +To create a read-only transaction, call `DgraphClient#txn(read_only=True)`. +Read-only transactions are ideal for transactions which only involve queries. +Mutations and commits are not allowed. + +```python +txn = client.txn(read_only=True) +try: + # Do some queries here + # ... +finally: + txn.discard() + # ... +``` + +To create a read-only transaction that executes best-effort queries, call +`DgraphClient#txn(read_only=True, best_effort=True)`. Best-effort queries are +faster than normal queries because they bypass the normal consensus protocol. +For this same reason, best-effort queries cannot guarantee to return the latest +data. Best-effort queries are only supported by read-only transactions. + +### Running a Mutation + +`Txn#mutate(mu=Mutation)` runs a mutation. It takes in a `Mutation` object, +which provides two main ways to set data: JSON and RDF N-Quad. You can choose +whichever way is convenient. + +`Txn#mutate()` provides convenience keyword arguments `set_obj` and `del_obj` +for setting JSON values and `set_nquads` and `del_nquads` for setting N-Quad +values. See examples below for usage. + +We define a person object to represent a person and use it in a transaction. + +```python +# Create data. +p = { + 'name': 'Alice', +} + +# Run mutation. +txn.mutate(set_obj=p) + +# If you want to use a mutation object, use this instead: +# mu = pydgraph.Mutation(set_json=json.dumps(p).encode('utf8')) +# txn.mutate(mu) + +# If you want to use N-Quads, use this instead: +# txn.mutate(set_nquads='_:alice "Alice" .') +``` + +```python +# Delete data. + +query = """query all($a: string) + { + all(func: eq(name, $a)) + { + uid + } + }""" + +variables = {'$a': 'Bob'} + +res = txn.query(query, variables=variables) +ppl = json.loads(res.json) + +# For a mutation to delete a node, use this: +txn.mutate(del_obj=person) +``` + +For a complete example with multiple fields and relationships, look at the +[simple project][simple] in the `examples` folder. + +Sometimes, you only want to commit a mutation, without querying anything +further. In such cases, you can set the keyword argument `commit_now=True` to +indicate that the mutation must be immediately committed. + +A mutation can be executed using `txn.do_request` as well. + +```python +mutation = txn.create_mutation(set_nquads='_:alice "Alice" .') +request = txn.create_request(mutations=[mutation], commit_now=True) +txn.do_request(request) +``` + +### Committing a Transaction + +A transaction can be committed using the `Txn#commit()` method. If your +transaction consist solely of `Txn#query` or `Txn#queryWithVars` calls, and no +calls to `Txn#mutate`, then calling `Txn#commit()` is not necessary. + +An error is raised if another transaction(s) modify the same data concurrently +that was modified in the current transaction. It is up to the user to retry +transactions when they fail. + +```python +txn = client.txn() +try: + # ... + # Perform any number of queries and mutations + # ... + # and finally... + txn.commit() +except pydgraph.AbortedError: + # Retry or handle exception. +finally: + # Clean up. Calling this after txn.commit() is a no-op + # and hence safe. + txn.discard() +``` + +### Running a Query + +You can run a query by calling `Txn#query(string)`. You will need to pass in a +[DQL](https://dgraph.io/docs/query-language/) query string. If you want to pass +an additional dictionary of any variables that you might want to set in the +query, call `Txn#query(string, variables=d)` with the variables dictionary `d`. + +The query response contains the `json` field, which returns the JSON response. + +Let’s run a query with a variable `$a`, deserialize the result from JSON and +print it out: + +```python +# Run query. +query = """query all($a: string) { + all(func: eq(name, $a)) + { + name + } +}""" +variables = {'$a': 'Alice'} + +res = txn.query(query, variables=variables) + +# If not doing a mutation in the same transaction, simply use: +# res = client.txn(read_only=True).query(query, variables=variables) + +ppl = json.loads(res.json) + +# Print results. +print('Number of people named "Alice": {}'.format(len(ppl['all']))) +for person in ppl['all']: + print(person) +``` + +This should print: + +```console +Number of people named "Alice": 1 +Alice +``` + +You can also use `txn.do_request` function to run the query. + +```python +request = txn.create_request(query=query) +txn.do_request(request) +``` + +### Running an Upsert: Query + Mutation + +The `txn.do_request` function allows you to use upsert blocks. An upsert block +contains one query block and one or more mutation blocks, so it lets you perform +queries and mutations in a single request. Variables defined in the query block +can be used in the mutation blocks using the `uid` and `val` functions +implemented by DQL. + +To learn more about upsert blocks, see the +[Upsert Block documentation](https://dgraph.io/docs/mutations/upsert-block/). + +```python +query = """{ + u as var(func: eq(name, "Alice")) +}""" +nquad = """ + uid(u) "Alice" . + uid(u) "25" . +""" +mutation = txn.create_mutation(set_nquads=nquad) +request = txn.create_request(query=query, mutations=[mutation], commit_now=True) +txn.do_request(request) +``` + +### Running a Conditional Upsert + +The upsert block also allows specifying a conditional mutation block using an +`@if` directive. The mutation is executed only when the specified condition is +true. If the condition is false, the mutation is silently ignored. + +See more about Conditional Upserts [here](./dql-mutation.md#conditional-upsert). + +```python +query = """ + { + user as var(func: eq(email, "wrong_email@dgraph.io")) + } +""" +cond = "@if(eq(len(user), 1))" +nquads = """ + uid(user) "correct_email@dgraph.io" . +""" +mutation = txn.create_mutation(cond=cond, set_nquads=nquads) +request = txn.create_request(mutations=[mutation], query=query, commit_now=True) +txn.do_request(request) +``` + +### Cleaning Up Resources + +To clean up resources, you have to call `DgraphClientStub#close()` individually +for all the instances of `DgraphClientStub`. + +```python +SERVER_ADDR = "localhost:9080" + +# Create instances of DgraphClientStub. +stub1 = pydgraph.DgraphClientStub(SERVER_ADDR) +stub2 = pydgraph.DgraphClientStub(SERVER_ADDR) + +# Create an instance of DgraphClient. +client = pydgraph.DgraphClient(stub1, stub2) + +# ... +# Use client +# ... + +# Clean up resources by closing all client stubs. +stub1.close() +stub2.close() +``` + +### Setting Metadata Headers + +Metadata headers such as authentication tokens can be set through the metadata +of gRPC methods. Below is an example of how to set a header named "auth-token". + +```python +# The following piece of code shows how one can set metadata with +# auth-token, to allow Alter operation, if the server requires it. +# metadata is a list of arbitrary key-value pairs. +metadata = [("auth-token", "the-auth-token-value")] +dg.alter(op, metadata=metadata) +``` + +### Setting a timeout + +A timeout value representing the number of seconds can be passed to the `login`, +`alter`, `query`, and `mutate` methods using the `timeout` keyword argument. + +For example, the following alters the schema with a timeout of ten seconds: +`dg.alter(op, timeout=10)` + +### Passing credentials + +A `CallCredentials` object can be passed to the `login`, `alter`, `query`, and +`mutate` methods using the `credentials` keyword argument. + +### Authenticating to a reverse TLS proxy + +If the Dgraph instance is behind a reverse TLS proxy, credentials can also be +passed through the methods available in the gRPC library. Note that in this case +every request will need to include the credentials. In the example below, we are +trying to add authentication to a proxy that requires an API key. This value is +expected to be included in the metadata using the key "authorization". + +```python +creds = grpc.ssl_channel_credentials() +call_credentials = grpc.metadata_call_credentials( + lambda context, callback: callback((("authorization", ""),), None)) +composite_credentials = grpc.composite_channel_credentials(creds, call_credentials) +client_stub = pydgraph.DgraphClientStub( + '{host}:{port}'.format(host=GRPC_HOST, port=GRPC_PORT), composite_credentials) +client = pydgraph.DgraphClient(client_stub) +``` + +### Async methods + +The `alter` method in the client has an asynchronous version called +`async_alter`. The async methods return a future. You can directly call the +`result` method on the future. However. The DgraphClient class provides a static +method `handle_alter_future` to handle any possible exception. + +```python +alter_future = self.client.async_alter(pydgraph.Operation( + schema="name: string @index(term) .")) +response = pydgraph.DgraphClient.handle_alter_future(alter_future) +``` + +The `query` and `mutate` methods int the `Txn` class also have async versions +called `async_query` and `async_mutation` respectively. These functions work +just like `async_alter`. + +You can use the `handle_query_future` and `handle_mutate_future` static methods +in the `Txn` class to retrieve the result. A short example is given below: + +```python +txn = client.txn() +query = "query body here" +future = txn.async_query() +response = pydgraph.Txn.handle_query_future(future) +``` + +A working example can be found in the `test_asycn.py` test file. + +Keep in mind that due to the nature of async calls, the async functions cannot +retry the request if the login is invalid. You will have to check for this error +and retry the login (with the function `retry_login` in both the `Txn` and +`Client` classes). A short example is given below: + +```python +client = DgraphClient(client_stubs) # client_stubs is a list of gRPC stubs. +alter_future = client.async_alter() +try: + response = alter_future.result() +except Exception as e: + # You can use this function in the util package to check for JWT + # expired errors. + if pydgraph.util.is_jwt_expired(e): + # retry your request here. +``` diff --git a/dgraph/reference/dql/clients/raw-http.mdx b/dgraph/reference/dql/clients/raw-http.mdx new file mode 100644 index 00000000..5c348f4a --- /dev/null +++ b/dgraph/reference/dql/clients/raw-http.mdx @@ -0,0 +1,469 @@ +--- +title: Raw HTTP +--- + +It's also possible to interact with Dgraph directly via its HTTP endpoints. This +allows clients to be built for languages that don't have access to a working +gRPC implementation. + +In the examples shown here, regular command line tools such as `curl` and +[`jq`](https://stedolan.github.io/jq/) are used. However, the real intention +here is to show other programmers how they could implement a client in their +language on top of the HTTP API. + +For an example of how to build a client on top of gRPC, refer to the +implementation of the Go client. + +Similar to the Go client example, we use a bank account transfer example. + +## Create the Client + +A client built on top of the HTTP API will need to track three pieces of state +for each transaction. + +1. A start timestamp (`start_ts`). This uniquely identifies a transaction, and + doesn't change over the transaction lifecycle. + +2. The set of keys modified by the transaction (`keys`). This aids in + transaction conflict detection. + + Every mutation would send back a new set of keys. The client must merge them + with the existing set. Optionally, a client can de-dup these keys while + merging. + +3. The set of predicates modified by the transaction (`preds`). This aids in + predicate move detection. + + Every mutation would send back a new set of preds. The client must merge them + with the existing set. Optionally, a client can de-dup these keys while + merging. + +## Alter the DQL Schema + +You may need to alter the DQL schema to declare predicate types, to add +predicate search indexes and to declare the predicates expected in entities of +specific type. + +Update the DQL schema is done by posting schema data to the `/alter` endpoint: + +```sh +curl "localhost:8080/alter" --silent --request POST \ + --data $' +name: string @index(term) . +release_date: datetime @index(year) . +revenue: float . +running_time: int . +starring: [uid] . +director: [uid] . + +type Person { + name +} + +type Film { + name + release_date + revenue + running_time + starring + director +} +' | python -m json.tool +``` + +_Success response_ + +``` +{ + "data": { + "code": "Success", + "message": "Done" + } +} +``` + +_Error response_ + +In case of errors, the API will reply with an error message such as: + +``` +{ + "errors": [ + { + "extensions": { + "code": "Error" + }, + "message": "line 5 column 18: Invalid ending" + } + ] +} +``` + + + The request will update or create the predicates and types present in the + request. It will not modify or delete other schema information that may be + present. + + +## Query current DQL schema + +Obtain the DQL schema by issuing a DQL query on `/query` endpoint. + +```sh +$ curl -X POST \ + -H "Content-Type: application/dql" \ + localhost:8080/query -d $'schema {}' | python -m json.tool +``` + +## Start a transaction + +Assume some initial accounts with balances have been populated. We now want to +transfer money from one account to the other. This is done in four steps: + +1. Create a new transaction. + +1. Inside the transaction, run a query to determine the current balances. + +1. Perform a mutation to update the balances. + +1. Commit the transaction. + +Starting a transaction doesn't require any interaction with Dgraph itself. Some +state needs to be set up for the transaction to use. The `start_ts` can +initially be set to 0. `keys` can start as an empty set. + +**For both query and mutation if the `start_ts` is provided as a path parameter, +then the operation is performed as part of the ongoing transaction. Otherwise, a +new transaction is initiated.** + +## Run a query + +To query the database, the `/query` endpoint is used. Remember to set the +`Content-Type` header to `application/dql` to ensure that the body of the +request is parsed correctly. + + + GraphQL+- has been renamed to Dgraph Query Language (DQL). While + `application/dql` is the preferred value for the `Content-Type` header, we + will continue to support `Content-Type: application/graphql+-` to avoid making + breaking changes. + + +To get the balances for both accounts: + +```sh +$ curl -H "Content-Type: application/dql" -X POST localhost:8080/query -d $' +{ + balances(func: anyofterms(name, "Alice Bob")) { + uid + name + balance + } +}' | jq + +``` + +The result should look like this: + +```json +{ + "data": { + "balances": [ + { + "uid": "0x1", + "name": "Alice", + "balance": "100" + }, + { + "uid": "0x2", + "name": "Bob", + "balance": "70" + } + ] + }, + "extensions": { + "server_latency": { + "parsing_ns": 70494, + "processing_ns": 697140, + "encoding_ns": 1560151 + }, + "txn": { + "start_ts": 4 + } + } +} +``` + +Notice that along with the query result under the `data` field is additional +data in the `extensions -> txn` field. This data will have to be tracked by the +client. + +For queries, there is a `start_ts` in the response. This `start_ts` will need to +be used in all subsequent interactions with Dgraph for this transaction, and so +should become part of the transaction state. + +## Run a Mutation + +Mutations can be done over HTTP by making a `POST` request to an Alpha's +`/mutate` endpoint. Now that we have the current balances, we need to send a +mutation to Dgraph with the updated balances. If Bob transfers $10 to Alice, +then the RDFs to send are: + +``` +<0x1> "110" . +<0x1> "Balance" . +<0x2> "60" . +<0x2> "Balance" . +``` + +Note that we have to refer to the Alice and Bob nodes by UID in the RDF format. + +We now send the mutations via the `/mutate` endpoint. We need to provide our +transaction start timestamp as a path parameter, so that Dgraph knows which +transaction the mutation should be part of. We also need to set `Content-Type` +header to `application/rdf` in order to specify that mutation is written in RDF +format. + +```sh +$ curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?startTs=4 -d $' +{ + set { + <0x1> "110" . + <0x1> "Balance" . + <0x2> "60" . + <0x2> "Balance" . + } +} +' | jq +``` + +The result: + +```json +{ + "data": { + "code": "Success", + "message": "Done", + "uids": {} + }, + "extensions": { + "server_latency": { + "parsing_ns": 50901, + "processing_ns": 14631082 + }, + "txn": { + "start_ts": 4, + "keys": ["2ahy9oh4s9csc", "3ekeez23q5149"], + "preds": ["1-balance"] + } + } +} +``` + +The result contains `keys` and `predicates` which should be added to the +transaction state. + +## Committing the transaction + + + It's possible to commit immediately after a mutation is made (without + requiring to use the `/commit` endpoint as explained in this section). To do + this, add the parameter `commitNow` in the URL `/mutate?commitNow=true`. + + +Finally, we can commit the transaction using the `/commit` endpoint. We need the +`start_ts` we've been using for the transaction along with the list of `keys` +and the list of predicates. If we had performed multiple mutations in the +transaction instead of just one, then the keys and predicates provided during +the commit would be the union of all keys and predicates returned in the +responses from the `/mutate` endpoint. + +The `preds` field is used to abort the transaction in cases where some of the +predicates are moved. This field is not required and the `/commit` endpoint also +accepts the old format, which was a single array of keys. + +```sh +$ curl -X POST localhost:8080/commit?startTs=4 -d $' +{ + "keys": [ + "2ahy9oh4s9csc", + "3ekeez23q5149" + ], + "preds": [ + "1-balance" + ] +}' | jq +``` + +The result: + +```json +{ + "data": { + "code": "Success", + "message": "Done" + }, + "extensions": { + "txn": { + "start_ts": 4, + "commit_ts": 5 + } + } +} +``` + +The transaction is now complete. + +If another client were to perform another transaction concurrently affecting the +same keys, then it's possible that the transaction would _not_ be successful. +This is indicated in the response when the commit is attempted. + +```json +{ + "errors": [ + { + "code": "Error", + "message": "Transaction has been aborted. Please retry." + } + ] +} +``` + +In this case, it should be up to the user of the client to decide if they wish +to retry the transaction. + +## Aborting the transaction + +To abort a transaction, use the same `/commit` endpoint with the `abort=true` +parameter while specifying the `startTs` value for the transaction. + +```sh +$ curl -X POST "localhost:8080/commit?startTs=4&abort=true" | jq +``` + +The result: + +```json +{ + "code": "Success", + "message": "Done" +} +``` + +## Running read-only queries + +You can set the query parameter `ro=true` to `/query` to set it as a +[read-only](./go.md#read-only-transactions) query. + +```sh +$ curl -H "Content-Type: application/dql" -X POST "localhost:8080/query?ro=true" -d $' +{ + balances(func: anyofterms(name, "Alice Bob")) { + uid + name + balance + } +} +``` + +## Running best-effort queries + +You can set the query parameter `be=true` to `/query` to set it as a +[best-effort](./go.md#read-only-transactions) query. + +```sh +$ curl -H "Content-Type: application/dql" -X POST "localhost:8080/query?be=true" -d $' +{ + balances(func: anyofterms(name, "Alice Bob")) { + uid + name + balance + } +} +``` + +## Compression via HTTP + +Dgraph supports gzip-compressed requests to and from Dgraph Alphas for `/query`, +`/mutate`, and `/alter`. + +Compressed requests: To send compressed requests, set the HTTP request header +`Content-Encoding: gzip` along with the gzip-compressed payload. + +Compressed responses: To receive gzipped responses, set the HTTP request header +`Accept-Encoding: gzip` and Alpha will return gzipped responses. + +Example of a compressed request via curl: + +```sh +$ curl -X POST \ + -H 'Content-Encoding: gzip' \ + -H "Content-Type: application/rdf" \ + localhost:8080/mutate?commitNow=true --data-binary @mutation.gz +``` + +Example of a compressed request via curl: + +```sh +$ curl -X POST \ + -H 'Accept-Encoding: gzip' \ + -H "Content-Type: application/dql" \ + localhost:8080/query -d $'schema {}' | gzip --decompress +``` + +Example of a compressed request and response via curl: + +```sh +$ zcat query.gz # query.gz is gzipped compressed +{ + all(func: anyofterms(name, "Alice Bob")) { + uid + balance + } +} +``` + +```sh +$ curl -X POST \ + -H 'Content-Encoding: gzip' \ + -H 'Accept-Encoding: gzip' \ + -H "Content-Type: application/dql" \ + localhost:8080/query --data-binary @query.gz | gzip --decompress +``` + + + Curl has a `--compressed` option that automatically + requests for a compressed response (`Accept-Encoding` header) and decompresses + the compressed response. + +```sh +curl -X POST --compressed -H "Content-Type: application/dql" localhost:8080/query -d $'schema {}' +``` + + + +## Run a query in JSON format + +The HTTP API also accepts requests in JSON format. For queries you have the keys +"query" and "variables". The JSON format is required to set +[GraphQL Variables](./query-language/graphql-variables) with the HTTP API. + +This query: + +``` +{ + balances(func: anyofterms(name, "Alice Bob")) { + uid + name + balance + } +} +``` + +Should be escaped to this: + +```sh +curl -H "Content-Type: application/json" localhost:8080/query -XPOST -d '{ + "query": "{\n balances(func: anyofterms(name, \"Alice Bob\")) {\n uid\n name\n balance\n }\n }" +}' | python -m json.tool | jq +``` diff --git a/dgraph/reference/dql/clients/unofficial-clients.mdx b/dgraph/reference/dql/clients/unofficial-clients.mdx new file mode 100644 index 00000000..ba20ba51 --- /dev/null +++ b/dgraph/reference/dql/clients/unofficial-clients.mdx @@ -0,0 +1,30 @@ +--- +title: Unofficial Dgraph Clients +--- + + + These third-party clients are contributed by the community and are not + officially supported by Dgraph. + + +## Apache Spark Connector + +- https://github.com/G-Research/spark-dgraph-connector + +## Dart + +- https://github.com/marceloneppel/dgraph + +## Elixir + +- https://github.com/liveforeverx/dlex +- https://github.com/ospaarmann/exdgraph + +## Rust + +- https://github.com/Swoorup/dgraph-rs +- https://github.com/selmeci/dgraph-tonic + +## C# + +- https://github.com/schivei/dgraph4net - DQL Client with migration management diff --git a/dgraph/reference/dql/dql-get-started.mdx b/dgraph/reference/dql/dql-get-started.mdx new file mode 100644 index 00000000..c3a45777 --- /dev/null +++ b/dgraph/reference/dql/dql-get-started.mdx @@ -0,0 +1,194 @@ +--- +title: Quick Start +--- + +This is a quick start guide to run [DQL](./dgraph-glossary.md#RDF) queries and +mutations. For an interactive walkthrough, take the +[tour](https://dgraph.io/tour/). + +This guide helps you: + +- Understand how JSON data are represented as a graph +- Query the graph using DQL +- Use indexes + +## Step 1: Run Dgraph + +The easiest way to get Dgraph up and running is using the +[Dgraph Cloud](https://cloud.dgraph.io). +You can Login to Dgraph cloud using **Sign in with Google**, **Sign in with +GitHub** or any other email account that you prefer to use. + +1. In the Dgraph cloud console, click **Launch new backend**. +1. Select a plan, cloud provider, and region that meets your requirements. +1. Type a name for your Dgraph cloud instance. +1. Click **Launch** +1. Click **Ratel** to access the UI that provides browser-based queries, + mutations and visualizations. + +## Step 2: Run Mutation + +The create, update, and delete operations in Dgraph are called mutations. + +Ratel makes it easier to run queries and mutations. + +1. In the **Console** page, select **Mutate** tab. +2. Paste the following: + +```dql + { + "set": [ + { + "name":"Star Wars: Episode IV - A New Hope", + "release_date": "1977-05-25", + "director": { + "name": "George Lucas", + "dgraph.type": "Person" + }, + "starring" : [ + { + "name": "Luke Skywalker" + }, + { + "name": "Princess Leia" + }, + { + "name": "Han Solo" + } + ] + }, + { + "name":"Star Trek: The Motion Picture", + "release_date": "1979-12-07" + } + ] + } +``` + +The input data is in JSON Format. Dgraph also supports +[RDF](./dgraph-glossary.md#RDF) notation. + +The sample JSON data is an array of two movies with some attributes. These are +stored as [Nodes](./dgraph-glossary.md#node) in Dgraph. + +There will be stored as [Nodes](./dgraph-glossary.md#node) in Dgraph. + +The "Star Wars" movie has a `director` field which is an JSON object and a +`starring` field which is an array of JSON objects. Each object is also stored +as a Node in Dgraph . The `director` and `starring` are stored as +[relations](./dgraph-glossary.md#relation). + +3. Click **Run** to execute the mutation. + +View the Dgraph response in the JSON tab: + +```dql +{ + "data": { + "code": "Success", + "message": "Done", + "queries": null, + "uids": { + "dg.1119451236.100": "0xfffd8d726c1de414", + "dg.1119451236.101": "0xfffd8d726c1de40f", + "dg.1119451236.102": "0xfffd8d726c1de410", + "dg.1119451236.103": "0xfffd8d726c1de411", + "dg.1119451236.104": "0xfffd8d726c1de412", + "dg.1119451236.99": "0xfffd8d726c1de413" + } + }, ... +``` + +Dgraph displays the universal identifiers ([UID](./dgraph-glossary.md#uid)) of +the nodes that were created. + +## Step 3: First query + +1. In the **Console** page, select **Query** tab and run this query: + +```dql + { + movies(func: has(release_date)) { + name + director { name } + starring { name } + } + } +``` + +The query lists all movies that have a `release_date` and for each, it looks for +the `director` and `starring` relations and provides the name attribute of the +related nodes if any. + +2. In the response panel, select **Graph**, to view a Graph output: + +![Query result](/images/dql-quickstart/img1.png) + +## Step 4: Alter Schema + +Alter the schema to add indexes on some of the data so queries can use term +matching, filtering and sorting. + +1. In the **Schema** page, select **Predicates**. Dgraph creates and displays + the predicates `name`, `release-date`,`director` and `starring`. A + [predicate](./dgraph-glossary.md#predicate) is Dgraph internal + representation of a node attribute or a relation. +2. Select the `name` predicate. Ratel displays details about the predicate type + and indexes. +3. Select **index** and select **term** for `name` predicate. +4. Click **Update** to apply the index. + +![Adding an index](/images/dql-quickstart/predicate-name.png) + +Set the index for the `release_date`: + +1. Select `release_date` predicate. +2. Change the type to **datetime** +3. Select **index** and choose **year** for the index type. +4. Click **Update** to apply the index on the `release-date` predicate. + +## Step 5: Queries using indexes + +Let's get the movies having the term "Star" in their name and released before +"1979". + +In the **Console** page select **Query** tab and run this query: + +```dql +{ + me(func: allofterms(name, "Star"), orderasc: release_date) @filter(lt(release_date, "1979")) { + name + release_date + revenue + running_time + director { + name + } + starring (orderasc: name) { + name + } + } +} +``` + +Observe the JSON result and the graph result. + +You can play with the release date and the search terms conditions to see Dgraph +search and filtering in action. + +In these five steps, you set up Dgraph, added some data, visualized it as a +graph, added indexes and queried the data . + +## Where to go from here + +- Take the [Tour](https://dgraph.io/tour/) for a guided tour of how to write + queries in Dgraph. +- A wider range of queries can also be found in the + [Query Language](/query-language/index) reference. +- Go to [Clients](/clients) to see how to communicate with Dgraph from your + application. + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + issues, feature requests, and discussions. diff --git a/dgraph/reference/dql/dql-schema.mdx b/dgraph/reference/dql/dql-schema.mdx new file mode 100644 index 00000000..dda25530 --- /dev/null +++ b/dgraph/reference/dql/dql-schema.mdx @@ -0,0 +1,512 @@ +--- +title: Dgraph types schema +--- + +Here is an example of Dgraph types schema: + +``` +name: string @index(term) . +release_date: datetime @index(year) . +revenue: float . +running_time: int . +starring: [uid] . +director: [uid] . +description: string . + +description_vector: float32vector @index(hnsw(metric:"cosine")) . + +type Person { + name +} + +type Film { + name + release_date + revenue + running_time + starring + director + description + description_vector +} +``` + +The schema contains information about [predicate types](#predicate-types) and +[node types](#node-types). + +A [predicate](./dgraph-glossary.md#Predicate) is the smallest piece of +information about an object. A predicate can hold a literal value or a relation +to another entity : + +- when we store that an entity name is "Alice". The predicate is `name` and + predicate value is the string "Alice". +- when we store that Alice knows Bob, we may use a predicate `knows` with the + node representing Alice. The value of this predicate would be the + [uid](./dgraph-glossary.md#uid) of the node representing Bob. In that case, + `knows` is a [relationship](#relationship). + +Dgraph maintains a list of all predicates names and their type in the **Dgraph +types schema**. + +## Predicates declaration + +The Dgraph Cluster [schema mode](./schema-modes) defines if the Dgraph types +must be declared before allowing mutations or not: + +- In `strict` mode, you must declare the predicates + ([Update Dgraph types](./update-dgraph-types.md) ) before you can run a + mutation using those predicates. +- In `flexible` mode (which is the default behavior), you can run a mutation + without declaring the predicate in the DQL Schema. + + + When you deploy a [GraphQL API schema](./graphql), Dgraph generates all the underlying Dgraph types. + +Refer to [GraphQL and DQL schemas](./graphql-dql-schema) in the +[GraphQL - DQL interoperability](./graphql-dql) section for use cases using both +approaches. + + + +For example, you can run the following mutation (using the [RDF](./dql-rdf) +notation): + +```graphql +{ + set { + <_:jedi1> "Luke Skywalker" . + <_:leia> "Leia" . + <_:sith1> "Anakin" (aka="Darth Vador",villain=true). + <_:sith1> <_:jedi1> . + <_:sith1> <_:leia> . + } +} +``` + +In `strict` mode, the mutation will return an error if the predicates are not +present in the Dgraph types schema. + +In `flexible` mode, Dgraph will execute the mutation and adds the predicates +“character_name” and “has_for_child” to the Dgraph types. + +## Predicate types + +All predicate types used in a Dgraph cluster are declared in the Dgraph schema. + +The Dgraph types schema is the way to specify predicates types and cardinality +(if it is a list or not), to instruct Dgraph how to index predicates, and to +declare if Dgraph needs to maintain different languages for a string predicate. + +A predicate type is either created + +- by altering the Dgraph types schema (See + [Update Dgraph types](./update-dgraph-types.md) ) or +- during a mutation, if the Dgraph Cluster [schema mode](./schema-modes) is + `flexible` and the predicate used is not yet declared. + + If a predicate type isn't declared in the schema, then the type is inferred + from the first mutation and added to the schema. + + If the mutation is using [RDF format](./#rdf-types) with an RDF type, Dgraph + uses this information to infer the predicate type. + + If no type can be inferred, the predicate type is set to `default`. + +A predicate can hold a literal value ([Scalar type](#scalar-types)) or a +relation to another entity ([UID type](#uid-type)). + +### Scalar Types + +For all triples with a predicate of scalar types the object is a literal. + +| Dgraph Type | Go type | +| ----------- | :---------------------------------------------------------------------------------------------------------------------- | +| `default` | string | +| `int` | int64 | +| `float` | float | +| `string` | string | +| `bool` | bool | +| `dateTime` | time.Time (RFC3339 format [Optional timezone] eg: 2006-01-02T15:04:05.999999999+10:00 or 2006-01-02T15:04:05.999999999) | +| `geo` | [go-geom](https://github.com/twpayne/go-geom) | +| `password` | string (encrypted) | + + + Dgraph supports date and time formats for `dateTime` scalar type only if they + are RFC 3339 compatible which is different from ISO 8601(as defined in the RDF + spec). You should convert your values to RFC 3339 format before sending them + to Dgraph. + + +### Vector Type + +The `float32vector` type denotes a vector of floating point numbers, i.e an +ordered array of float32. A node type can contain more than one vector +predicate. + +Vectors are normaly used to store embeddings obtained from other information +through an ML model. When a `float32vector` is +[indexed](./dql/predicate-indexing.md), the DQL +[similar_to](./query-language/functions#vector-similarity-search) function can +be used for similarity search. + +### UID Type + +The `uid` type denotes a relationship; internally each node is identified by +it's UID which is a `uint64`. + +### Predicate name rules + +Any alphanumeric combination of a predicate name is permitted. Dgraph also +supports +[Internationalized Resource Identifiers](https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier) +(IRIs). You can read more in [Predicates i18n](#predicates-i18n). + + + You can't define type names starting with `dgraph.`, it is reserved as the + namespace for Dgraph's internal types/predicates. For example, defining + `dgraph.Student` as a type is invalid. + + +### Special characters + +Following characters are accepted if prefixed/suffixed with alphanumeric +characters. + +``` +][&*()_-+=!#$% +``` + +_Note: You are not restricted to use @ suffix, but the suffix character gets +ignored._ + +The special characters below are not accepted. + +``` +^}|{`\~ +``` + +### Predicates i18n + +Dgraph supports +[Internationalized Resource Identifiers](https://en.wikipedia.org/wiki/Internationalized_Resource_Identifier) +(IRIs) for predicate names and values. + +If your predicate is a URI or has language-specific characters, then enclose it +with angle brackets `<>` when executing the schema mutation. + +Schema syntax: + +``` +<职业>: string @index(exact) . +<年龄>: int @index(int) . +<地点>: geo @index(geo) . +<公司>: string . +``` + +This syntax allows for internationalized predicate names, but full-text indexing +still defaults to English. To use the right tokenizer for your language, you +need to use the `@lang` directive and enter values using your language tag. + +Schema: + +``` +<公司>: string @index(fulltext) @lang . +``` + +Mutation: + +``` +{ + set { + _:a <公司> "Dgraph Labs Inc"@en . + _:b <公司> "夏新科技有限责任公司"@zh . + _:a "Company" . + } +} +``` + +Query: + +``` +{ + q(func: alloftext(<公司>@zh, "夏新科技有限责任公司")) { + uid + <公司>@. + } +} +``` + +### Unique Directive + +The unique constraint enables us to guarantee that all values of a predicate are +distinct. To implement the @unique directive for a predicate, you should define +it in the schema and create an index on the predicate based on its type. If a +user does not add the proper index to the predicate, then Dgraph will return an +error. + +Dgraph will automatically include the @upsert directive for the predicate. To +enforce this uniqueness constraint, a predicate must have an index, as explained +below. Currently, we only support the @unique directive on newly created +predicates with the data types string and integer. + +| Data Type | Index | +| --------- | ----------- | +| string | hash, exact | +| int | int | + +This is how you define the unique directive for a predicate. + +``` +email: string @unique @index(exact) . +``` + +### Upsert directive + +To use [upsert operations](./howto/upserts.md) on a predicate, specify the +`@upsert` directive in the schema. + +When committing transactions involving predicates with the `@upsert` directive, +Dgraph checks index keys for conflicts, helping to enforce uniqueness +constraints when running concurrent upserts. + +This is how you specify the upsert directive for a predicate. + +``` +email: string @index(exact) @upsert . +``` + +### Noconflict directive + +The NoConflict directive prevents conflict detection at the predicate level. +This is an experimental feature and not a recommended directive but exists to +help avoid conflicts for predicates that don't have high correctness +requirements. This can cause data loss, especially when used for predicates with +count index. + +This is how you specify the `@noconflict` directive for a predicate. + +``` +email: string @index(exact) @noconflict . +``` + +### Predicate types from RDF Types + +As well as implying a schema type for a first mutation, an RDF type can override +a schema type for storage. Dgraph supports a number of [RDF](./dql-rdf) types. + +If a predicate has a schema type and a mutation has an RDF type with a different +underlying Dgraph type, the convertibility to schema type is checked, and an +error is thrown if they are incompatible, but the value is stored in the RDF +type's corresponding Dgraph type. Query results are always returned in schema +type. + +For example, if no schema is set for the `age` predicate. Given the mutation + +``` +{ + set { + _:a "15"^^ . + _:b "13" . + _:c "14"^^ . + _:d "14.5"^^ . + _:e "14.5" . + } +} +``` + +Dgraph: + +- sets the schema type to `int`, as implied by the first triple, +- converts `"13"` to `int` on storage, +- checks `"14"` can be converted to `int`, but stores as `string`, +- throws an error for the remaining two triples, because `"14.5"` can't be + converted to `int`. + +### Password type + +A password for an entity is set with setting the schema for the attribute to be +of type `password`. Passwords cannot be queried directly, only checked for a +match using the `checkpwd` function. The passwords are encrypted using +[bcrypt](https://en.wikipedia.org/wiki/Bcrypt). + +For example: to set a password, first set schema, then the password: + +``` +pass: password . +``` + +``` +{ + set { + <0x123> "Password Example" . + <0x123> "ThePassword" . + } +} +``` + +to check a password: + +``` +{ + check(func: uid(0x123)) { + name + checkpwd(pass, "ThePassword") + } +} +``` + +output: + +``` +{ + "data": { + "check": [ + { + "name": "Password Example", + "checkpwd(pass)": true + } + ] + } +} +``` + +You can also use alias with password type. + +``` +{ + check(func: uid(0x123)) { + name + secret: checkpwd(pass, "ThePassword") + } +} +``` + +output: + +``` +{ + "data": { + "check": [ + { + "name": "Password Example", + "secret": true + } + ] + } +} +``` + +## Predicate indexing + +The schema is also used to set [predicates indexes](./predicate-indexing.md) +which are required to apply [filtering functions](/query-language/functions) in +DQL queries. + +## Node types + +Node types are declared along with [predicate types](#predicate-types) in the +Dgraph types schema. + +Node types are optional. + +### Node type definition + +Node type declares the list of predicates that could be present in a Node of +this type. Node type are defined using the following syntax: + +``` +name: string @index(term) . +dob: datetime . +home_address: string . +friends: [uid] . + +type Student { + name + dob + home_address + friends +} +``` + + + All predicates used in a type must be defined in the Dgraph types schema + itself. + + +Different node types can use the same predicates. + +### Reverse predicates + +Reverse predicates can also be included inside a type definition. For example, +the following schema, declares that a node of type Child may have a `~children` +inverse relationhsip. . + +``` +children: [uid] @reverse . +name: string @index(term) . +type Parent { + name + children +} +type Child { + name + <~children> +} +``` + + Predicates with special caracter are enclosed with angle +brackets `<>` + +### Node type attribution + +A node is given a type by setting the `dgraph.type` predicate value to the type +name. + +A node may be given many types, `dgraph.type` is an array of strings. + + + DQL types is only declarative are not enforced by Dgraph. In DQL, - you can + always add node without a `dgraph.type` predicate, that is without a type. - + you can always add a predicate to a node that is not declared in the predicate + list of the node type. + + +Here's an example of mutation to set the types of a node: + +``` +{ + set { + _:a "Garfield" . + _:a "Pet" . + _:a "Animal" . + } +} +``` + +### When to use node types + +Node types are optional, but there are two use cases where actually knowing the +list of potential predicates of a node is necessary: + +- deleting all the information about a node: this is the + `delete { * * . }` mutation. +- retrieving all the predicates of a given node: this is done using the + [expand(_all_)](./query-language/expand-predicates) feature of DQL. + +The Dgraph node types are used in those 2 use cases: when executing the +`delete all predicates` mutation or the `expand all` query, Dgraph will check if +the node has a `dgraph.type` predicate. If so, the engine is using the declared +type to find the list of predicates and apply the delete or the expand on all of +them. + +When nodes have a type (i.e have a `dgraph.type` predicate), then you can use +the function +[type()](./dql-query#node-criteria-used-by-root-function-or-by-filter) in +queries. + + +`delete { * * . }` will only delete the +predicates declared in the type. You may have added other predicates by running +DQL mutation on this node: the node may still exist after the operation if it +holds predicates not declared in the node type. + diff --git a/dgraph/reference/dql/dql-syntax/dql-mutation.mdx b/dgraph/reference/dql/dql-syntax/dql-mutation.mdx new file mode 100644 index 00000000..64fa5350 --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/dql-mutation.mdx @@ -0,0 +1,280 @@ +--- +title: DQL mutation +--- + +Dgraph Query Language (DQL) is Dgraph's proprietary language to add, modify, +delete and fetch data. + +Fetching data is done through [DQL Queries](./dql-query). Adding, modifying or +deleting data is done through **_DQL Mutations_**. + +This overview explains the structure of DQL Mutations and provides links to the +appropriate DQL reference documentation. + +DQL mutations support JSON or [RDF](./dql-rdf) format. + +## set block + +In DQL, you add data using a set mutation, identified by the `set` keyword. + + + + +```dql + { + "set": [ + { + "name":"Star Wars: Episode IV - A New Hope", + "release_date": "1977-05-25", + "director": { + "name": "George Lucas", + "dgraph.type": "Person" + }, + "starring" : [ + { + "name": "Luke Skywalker" + }, + { + "name": "Princess Leia" + }, + { + "name": "Han Solo" + } + ] + }, + { + "name":"Star Trek: The Motion Picture", + "release_date": "1979-12-07" + } + ] + } +``` + + + + +``` +{ + set { + # triples in here + _:n1 "Star Wars: Episode IV - A New Hope" . + _:n1 "1977-05-25" . + _:n1 _:n2 . + _:n2 "George Lucas" . + + } +} +``` + +triples are in [RDF](./dql-rdf) format. + +### Node reference + +A mutation can include a blank nodes as an identifier for the subject or object, +or a known UID. + +``` +{ + set { + # triples in here + <0x632ea2> "1977-05-25" . + } +} +``` + +will add the `release_date` information to the node identified by UID +`0x632ea2`. + +### language support + +``` +{ + set { + # triples in here + <0x632ea2> "Star Wars, épisode IV : Un nouvel espoir"@fr . + } +} +``` + + + + +## delete block + +A delete mutation, identified by the `delete` keyword, removes +[triples](/dql-rdf) from the store. + +For example, if the store contained the following: + +```RDF +<0xf11168064b01135b> "Lewis Carrol" +<0xf11168064b01135b> "1998" +<0xf11168064b01135b> "Person" . +``` + +Then, the following delete mutation deletes the specified erroneous data, and +removes it from any indexes: + +```sh +{ + delete { + <0xf11168064b01135b> "1998" . + } +} +``` + +### Wildcard delete + +In many cases you will need to delete multiple types of data for a predicate. +For a particular node `N`, all data for predicate `P` (and all corresponding +indexing) is removed with the pattern `S P *`. + +```sh +{ + delete { + <0xf11168064b01135b> * . + } +} +``` + +The pattern `S * *` deletes all the known edges out of a node, any reverse edges +corresponding to the removed edges, and any indexing for the removed data. + + + For mutations that fit the `S * *` pattern, only predicates that are among the + types associated with a given node (using `dgraph.type`) are deleted. Any + predicates that don't match one of the node's types will remain after an `S * + *` delete mutation. + + +```sh +{ + delete { + <0xf11168064b01135b> * * . + } +} +``` + +If the node `S` in the delete pattern `S * *` has only a few predicates with a +type defined by `dgraph.type`, then only those triples with typed predicates are +deleted. A node that contains untyped predicates will still exist after a +`S * *` delete mutation. + + + The patterns `* P O` and `* * O` are not supported because it's inefficient to + store and find all the incoming edges. + + +### Deletion of non-list predicates + +Deleting the value of a non-list predicate (i.e a 1-to-1 relationship) can be +done in two ways. + +- Using the [wildcard delete](#wildcard-delete) (star notation) mentioned in the + last section. +- Setting the object to a specific value. If the value passed is not the current + value, the mutation will succeed but will have no effect. If the value passed + is the current value, the mutation will succeed and will delete the non-list + predicate. + +For language-tagged values, the following special syntax is supported: + +``` +{ + delete { + <0x12345> * . + } +} +``` + +In this example, the value of the `name` field that is tagged with the language +tag `es` is deleted. Other tagged values are left untouched. + +## upsert block + +Upsert is an operation where: + +1. A node is searched for, and then +2. Depending on if it is found or not, either: + - Updating some of its attributes, or + - Creating a new node with those attributes. + +The upsert block allows performing queries and mutations in a single request. +The upsert block contains one query block and mutation blocks. + +The structure of the upsert block is as follows: + +``` +upsert { + query + mutation + [mutation ] + ... +} +``` + +Execution of an upsert block also returns the response of the query executed on +the state of the database _before mutation was executed_. To get the latest +result, you have to execute another query after the transaction is committed. + +Variables defined in the query block can be used in the mutation blocks using +the [uid](./uid-upsert) and [val](./val-upsert) functions. + +## conditional upsert + +The upsert block also allows specifying conditional mutation blocks using an +`@if` directive. The mutation is executed only when the specified condition is +true. If the condition is false, the mutation is silently ignored. The general +structure of Conditional Upsert looks like as follows: + +``` +upsert { + query + [fragment ] + mutation [@if()] + [mutation [@if()] ] + ... +} +``` + +The `@if` directive accepts a condition on variables defined in the query block +and can be connected using `AND`, `OR` and `NOT`. + +## Example of Conditional Upsert + +Let's say in our previous example, we know the `company1` has less than 100 +employees. For safety, we want the mutation to execute only when the variable +`v` stores less than 100 but greater than 50 UIDs in it. This can be achieved as +follows: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + v as var(func: regexp(email, /.*@company1.io$/)) + } + + mutation @if(lt(len(v), 100) AND gt(len(v), 50)) { + delete { + uid(v) * . + uid(v) * . + uid(v) * . + } + } +}' | jq +``` + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d '{ + "query": "{ v as var(func: regexp(email, /.*@company1.io$/)) }", + "cond": "@if(lt(len(v), 100) AND gt(len(v), 50))", + "delete": { + "uid": "uid(v)", + "name": null, + "email": null, + "age": null + } +}' | jq +``` diff --git a/dgraph/reference/dql/dql-syntax/dql-query.mdx b/dgraph/reference/dql/dql-syntax/dql-query.mdx new file mode 100644 index 00000000..1d7b2179 --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/dql-query.mdx @@ -0,0 +1,218 @@ +--- +title: DQL query +--- + +Fetching data with Dgraph Query Language (DQL), is done through **DQL Queries**. +Adding, modifying or deleting data is done through +[DQL Mutations](./dql-mutation). + +This overview explains the structure of DQL Queries and provides links to the +appropriate DQL reference documentation. + +### DQL query structure + +DQL is **declarative**, which means that queries return a response back in a +similar shape to the query. It gives the client application the control of what +it gets: the request return exactly what you ask for, nothing less and nothing +more. In this, DQL is similar to GraphQL from which it is inspired. + +A DQL query finds nodes based on search criteria, matches patterns in the graph +and returns the node attributes, relationships specified in the query. + +A DQL query has + +- an optional parameterization, ie a name and a list of parameters +- an opening curly bracket +- at least one [query block](./#query-block), but can contain many blocks +- optional var blocks +- a closing curly bracket + +![DQL Query with parameterization](/images/dql-syntax/query-syntax-1.png) + +### Query parameterization + +**Parameters** + +- must have a name starting with a `$` symbol. +- must have a type `int`, `float`, `bool` or `string`. +- may have a default value. In the example below, `$age` has a default value of + `95` +- may be mandatory by suffixing the type with a `!`. Mandatory parameters can't + have a default value. + +Variables can be used in the query where a string, float, int or bool value are +needed. + +You can also use a variable holding `uids` by using a string variable and by +providing the value as a quoted list in square brackets: +`query title($uidsParam: string = "[0x1, 0x2, 0x3]") { ... }`. + +**Error handling** When submitting a query using parameters, Dgraph responds +with errors if + +- A parameter value is not parsable to the given type. +- The query is using a parameter that is not declared. +- A mandatory parameter is not provided + +The query parameterization is optional. If you don't use parameters you can omit +it and send only the query blocks. + +![DQL Query without parameters](/images/dql-syntax/query-syntax-2.png) + + + The current documentation is usually using example of queries without + parameters. + + +If you execute this query in our [Movies demo database](./graphql-fundamentals) +you can see that Dgraph will return a JSON structure similar to the request : +![DQL response structure](/images/dql-syntax/query-syntax-3.png) + +### Query block + +A query block specifies information to retrieve from Dgraph. + +A query block + +- must have name +- must have a node criteria defined by the keyword `func:` +- may have ordering and pagination information +- may have a combination of filters (to apply to the root nodes) +- must provide the list of attributes and relationships to fetch for each node + matching the root nodes. + +Refer to [pagination](./pagination), [ordering](./sorting), +[connecting filters](./connecting-filters) for more information. + +For each relationships to fetch, the query is using a nested block. + +A nested block + +- may specify filters to apply on the related nodes +- may specify criteria on the relationships attributes using + [filtering on facets](./query-language/facets.md#filtering-on-facets)) +- provides the list of relationship attributes + ([facets](/query-language/facets))) to fetch. +- provides the list of attributes and relationships to fetch for the related + nodes. + +A nested block may contain another nested block, and such at any level. + +### Escape characters in predicate names + +If your predicate has special characters, wrap it with angular brackets `< >` in +the query. + +E.g. ` ` + +### Formatting options + +Dgraph returns the attributes and relationships that you specified in the query. +You can specify an alternate name for the result by using [ + +You can flatten the response structure at any level using +[@normalize](/query-language/normalize-directive) directive. + +Entering the list of all the attributes you want to fetch could be fastidious +for large queries or repeating blocks : you may take advantage of +[fragments](./query-language/fragments) and the +[expand function](./query-language/expand-predicates). + +### Node criteria (used by root function or by filter) + +Root criteria and filters are using [functions](./functions) applied to nodes +attributes or variables. + +Dgraph offers functions for + +- testing string attributes + - term matching : [allofterms](./functions.md#allofterms), + [anyofterms](./functions.md#anyofterms) + - regular Expression : [regexp](./functions.md#regular-expressions) + - fuzzy match : [match](./functions.md#fuzzy-matching) + - full-text search : [alloftext](./functions.md#full-text-search) +- testing attribute value + - equality : [eq](./functions.md#equal-to) + - inequalities : + [le,lt,ge,gt](./functions.md#less-than-less-than-or-equal-to-greater-than-and-greater-than-or-equal-to) + - range : [between](./functions.md#between) +- testing if a node + - has a particular predicate (an attribute or a relation) : + [has](./functions.md#has) + - has a given UID : [uid](./functions.md#uid) + - has a relationship to a given node : [uid_in](./functions.md#uid_in) + - is of a given type : type() +- testing the number of node relationships + - equality : [eq](./functions.md#equal-to) + - inequalities : + [le,lt,ge,gt](./functions.md#less-than-less-than-or-equal-to-greater-than-and-greater-than-or-equal-to) +- testing geolocation attributes + - if geo location is within distance : [near](./functions.md#near) + - if geo location lies within a given area : [within](./functions.md#within) + - if geo area contains a given location : [contains](./functions.md#contains) + - if geo area intersects a given are : [intersects](./functions.md#intersects) + +### Variable (`var`) block + +Variable blocks (`var` blocks) start with the keyword `var` instead of a block +name. + +var blocks are not reflected in the query result. They are used to compute +[query-variables](./query-variables) which are lists of node UIDs, or +[value-variables](./value-variables) which are maps from node UIDs to the +corresponding scalar values. + +Note that query-variables and value-variables can also be computed in query +blocks. In that case, the query block is used to fetch and return data, and to +define some variables which must be used in other blocks of the same query. + +Variables may be used as functions parameters in filters or root criteria in +other blocks. + +### Summarizing functions + +When dealing with array attributes or with relationships to many node, the query +may use summary functions [count](./query-language/count) , +[min](./query-language/aggregation.md#min), +[max](./query-language/aggregation.md#max), +[avg](./query-language/aggregation.md#sum-and-avg) or +[sum](./query-language/aggregation.md#sum-and-avg). + +The query may also contain +[mathematical functions](./query-language/math-on-value-variables) on value +variables. + +Summary functions can be used in conjunction with +[@grouby](./query-language/groupby) directive to create aggregated value +variables. + +The query may contain **anonymous block** to return computed values. **Anonymous +block** don't have a root criteria as they are not used to search for nodes but +only to returned computed values. + +### Graph traversal + +When you specify nested blocks and filters you basically describe a way to +traverse the graph. + +[@recurse](./query-language/recurse-query) and +[@ignorereflex](./query-language/ignorereflex-directive) are directives used to +optionally configure the graph traversal. + +### Pattern matching + +Queries with nested blocks with filters may be turned into pattern matching +using [@cascade](./query-language/cascade-directive) directive : nodes that +don’t have all attributes and all relationships specified in the query at any +sub level are not considered in the result. So only nodes "matching" the +complete query structure are returned. + +### Graph algorithms + +The query can ask for the shortest path between a source (from) node and +destination (to) node using the +[shortest](./query-language/kshortest-path-queries) query block. + +### Comments + +Anything on a line following a `#` is a comment diff --git a/dgraph/reference/dql/dql-syntax/dql-rdf.mdx b/dgraph/reference/dql/dql-syntax/dql-rdf.mdx new file mode 100644 index 00000000..32fba16c --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/dql-rdf.mdx @@ -0,0 +1,185 @@ +--- +title: RDF +--- + +Dgrpah natively supports Resource Description Framework (RDF) when creating, +importing and exporting data. Dgraph Client libraries can be used to query RDF +as well. + +[RDF 1.1](https://www.w3.org/RDF/) is a Semantic Web Standard for data +interchange defined by the W3C. It expresses statements about resources. The +format of these statements is simple and in the form of triples. + +A triple has the form + +``` + . +``` + +In RDF terminology, each triple represents one fact about a node. + +In Dgraph, the `` of a triple is always a node, and must be a numeric +UID. The `` of a triple may be another node or a literal value: + +``` +<0x01> "Alice" . +<0x01> <0x02> . +``` + +The first triple specifies that a node has a name property of “Alice”. The +subject is the UID of the first node, the predicate is `name`, and the object is +the literal value string: `"Alice"`. The second triple specifies that Alice +knows Bob. The subject is again the UID of a node (the "alice" node), the +predicate is `knows`, and the object of this triple is the uid of the other node +(the "bob" node). When the object is a UID, the triple represents a relationship +in Dgraph. + +Each triple representation in RDF ends with a period. + +### Blank nodes in mutations + +When creating nodes in Dgraph, you often let Dgraph assign the node +[UID](./dgraph-glossary.md#uid) by specifing a blank node starting with "_:". +All references to the same blank node, such as `_:identifier123`, will identify +the same node within a mutation. Dgraph creates a UID identifying each blank +node. + +### Language for string values + +Languages are written using `@lang`. For example + +``` +<0x01> "Adelaide"@en . +<0x01> "Аделаида"@ru . +<0x01> "Adélaïde"@fr . +<0x01> "Person" . +``` + +See also +[how language strings are handled in queries](./query-language/graphql-fundamentals.md#language-support). + +### Types + +Dgraph understands standard RDF types specified in RDF using the `^^` separator. +For example + +``` +<0x01> "32"^^ . +<0x01> "1985-06-08"^^ . +``` + +The supported +[RDF datatypes](https://www.w3.org/TR/rdf11-concepts/#section-Datatypes) and the +corresponding internal Dgraph type are as follows. + +| Storage Type | Dgraph type | +| -------------------------------------------------------------- | :---------: | +| <xs:string> | `string` | +| <xs:dateTime> | `dateTime` | +| <xs:date> | `datetime` | +| <xs:int> | `int` | +| <xs:integer> | `int` | +| <xs:boolean> | `bool` | +| <xs:double> | `float` | +| <xs:float> | `float` | +| <geo:geojson> | `geo` | +| <xs:password> | `password` | +| <http://www.w3.org/2001/XMLSchema#string> | `string` | +| <http://www.w3.org/2001/XMLSchema#dateTime> | `dateTime` | +| <http://www.w3.org/2001/XMLSchema#date> | `dateTime` | +| <http://www.w3.org/2001/XMLSchema#int> | `int` | +| <http://www.w3.org/2001/XMLSchema#positiveInteger> | `int` | +| <http://www.w3.org/2001/XMLSchema#integer> | `int` | +| <http://www.w3.org/2001/XMLSchema#boolean> | `bool` | +| <http://www.w3.org/2001/XMLSchema#double> | `float` | +| <http://www.w3.org/2001/XMLSchema#float> | `float` | + +### Facets + +Dgraph is more expressive than RDF in that it allows properties to be stored on +every relation. These properties are called Facets in Dgraph, and dgraph allows +an extension to RDF where facet values are incuded in any triple. + +#### Creating a list with facets + +The following set operation uses a sequence of RDF statements with additional +facet information: + +```sh +{ + set { + _:Julian "Julian" . + _:Julian "Jay-Jay" (kind="first") . + _:Julian "Jules" (kind="official") . + _:Julian "JB" (kind="CS-GO") . + } +} +``` + +```graphql +{ + q(func: eq(name,"Julian")){ + name + nickname @facets + } +} +``` + +Result: + +```JSON +{ + "data": { + "q": [ + { + "name": "Julian", + "nickname|kind": { + "0": "first", + "1": "official", + "2": "CS-GO" + }, + "nickname": [ + "Jay-Jay", + "Jules", + "JB" + ] + } + ] + } +} +``` + + + Dgraph can automatically generate a reverse relation. If the user wants to run + queries in that direction, they would define the [reverse + relationship](./dql-schema.md#reverse-edges) + + +## N-quads format + +While most RDF data uses only triples (with three parts) an optional fourth part +is allowed. This fourth component in RDF is called a graph label, and in Dgraph +it must be the UID of the namespace that the data should go into as described in +[Multi-tenancy](./cloud-multitenancy). + +## Processing RDF to comply with Dgraph syntax for subjects + +While it is valid RDF to specify subjects that are IRI strings, Dgraph requires +a numeric UID or a blank node as the subject. If a string IRI is required, +Dgraph support them via [xid properties](./external-ids-upsert-block). When +importing RDF from another source that does not use numeric UID subjects, it +will be required to replace arbitrary subject IRIs with blank node IRIs. + +Typically this is done simply by prepending "\_:" to the start of the original +IRI. So a triple such as: + +` "somevalue"^^xs:string` + +may be rewritten as + +`<_:http://abc.org/schema/foo#item1> "somevalue"^^xs:string` + +Dgraph will create a consistent UID for all references to the uniquely-named +blank node. To maintain this uniqueness over multiple data loads, use the +[dgraph live](./dgraph-glossary.md#uid) utility with the xid option, or use +specific UIDs such as the hash of the IRI in the source RDF directly. diff --git a/dgraph/reference/dql/dql-syntax/index.mdx b/dgraph/reference/dql/dql-syntax/index.mdx new file mode 100644 index 00000000..bfb8e2cf --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/index.mdx @@ -0,0 +1,10 @@ +--- +title: DQL syntax +--- + +Dgraph Query Language (DQL) is Dgraph’s proprietary language to add, modify, +delete and fetch data. Dgraph Query Language (DQL) is Dgraph’s proprietary +language to add, modify, delete and fetch data. + +Fetching data is done through [DQL Queries](./dql-query). Adding, modifying or +deleting data is done through [DQL Mutations](/dql-mutation). diff --git a/dgraph/reference/dql/dql-syntax/json-mutation-format.mdx b/dgraph/reference/dql/dql-syntax/json-mutation-format.mdx new file mode 100644 index 00000000..1741e463 --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/json-mutation-format.mdx @@ -0,0 +1,436 @@ +--- +title: JSON Mutation Format +--- + +Dgraph supports [Mutations](./dql-mutation) in JSON or [RDF](./dql-rdf) format. +When using JSON format Dgraph creates nodes and relationships from the JSON +structure and assigns UIDs to nodes. + +## Specifying node UIDs + +For example, if you run this mutation: + +```dql + { + "set": [ + { + "name": "diggy", + "dgraph.type": "Mascot" + } + ] + } +``` + +You will see that Dgraph responds with + +```json +{ + "data": { + "code": "Success", + "message": "Done", + "queries": null, + "uids": { + "dg.3162278161.22055": "0xfffd8d72745f0650" + } + } +} +``` + +Meaning that Dgraph has created one node from the JSON. It has used the +identifier `dg.3162278161.22055` during the transaction. And the final UID value +for this node is `0xfffd8d72745f0650`. + +You can control the identifier name by specifying a `uid` field in your JSON +data and using the notation: `"uid" : "_:"` + +In this mutation, there are two JSON objects and because they are referring to +the same identifier, Dgraph creates only one node: + +```dql + { + "set": [ + { + "uid": "_:diggy", + "name": "diggy", + "dgraph.type": "Mascot" + }, + { + "uid": "_:diggy", + "specie": "badger" + } + ] + } +``` + +When you run this mutation, you can see that Dgraph returns the UID of the node +that was created with the `diggy` identifier: + +```json +{ + "data": { + "code": "Success", + "message": "Done", + "queries": null, + "uids": { "diggy": "0xfffd8d72745f0691" } + } +} +``` + +Note that the `specie` field is added to the node already created with `name` +and `dgraph.type` information. + +### Referencing existing nodes + +You can use the `"uid"` field to reference an existing node. To do so, you must +specify the UID value of the node. + +For example: + +```dql + { + "set": [ + { + "uid": "0xfffd8d72745f0650", + "specie": "badger" + } + ] + } +``` + +Adds the `specie` information to the node that was created earlier. + +## Language support + +To set a string value for a specific lnguage, append the language tag to the +field name. In case, `specie` predicate has the @lang directive, the JSON +mutation + +```dql + { + "set": [ + { + "uid": "_:diggy", + "name": "diggy", + "dgraph.type": "Mascot", + "specie@en" : "badger", + "specie@fr" : "blaireau" + } + ] + } +``` + +Dgraph sets the `specie` string predicate in English and in French. + +## Geolocation support + +Geo-location data must be specified using keys `type` and `coordinates` in the +JSON document. The supported types are `Point`, `Polygon`, or `MultiPolygon` . + +```dql + { + "set": [ + { + "name": "diggy", + "dgraph.type": "Mascot", + "home" : { + "type": "Point", + "coordinates": [-122.475537, 37.769229 ] + } + } + ] + } +``` + +## Relationships + +Relationships are simply created from the nested structure of JSON. + +For example: + +```dql + { + "set": [ + { + "uid": "_:diggy", + "name": "diggy", + "dgraph.type": "Mascot", + "food" : [ + { + "uid":"_:f1", + "name": "earthworms" + }, + { + "uid":"_:f2", + "name": "apples" + }] + } + ] + } + +``` + +This result in the creation of three nodes and the `food` predicate as a +relationship. + +```json +{ + "data": { + "code": "Success", + "message": "Done", + "queries": null, + "uids": { + "diggy": "0xfffd8d72745f06d7", + "f1": "0xfffd8d72745f06d8", + "f2": "0xfffd8d72745f06d9" + } + } +} +``` + +You can use references to existing nodes at any level of your nested JSON. + +## Deleting literal values + +To delete node predicates, specify the UID of the node you are changing and +set +the predicates to delete to the JSON value `null`. + +For example, to remove the predicate `name` from node `0xfffd8d72745f0691` : + +```dql +{ + "delete": [ + { + "uid": "0xfffd8d72745f0691", + "name": null + } + ] +} +``` + +## Deleting relationship + +A relationship can be defined with a cardinality of 1 or many (list). Setting a +relationship to `null` removes all the relationships. + +```JSON +{ + "uid": "0xfffd8d72745f06d7", + "food": null +} +``` + +To delete a single relationship in a list, you must specify the target node of +the relationship. + +```dql +{ + "delete": [ + { + "uid": "0xfffd8d72745f06d7", + "food": { + "uid": "0xfffd8d72745f06d9" + } + } + ] +} + +``` + +deletes only one `food` relationship. + +To delete all predicates of a given node: + +- make sure the node has a `dgraph.type` predicate +- the type is defined in the [Dgraph types schema](./dql-schema) +- run a delete mutation specifying only the uid field + +```JSON +{ + "delete": [ + { + "uid": "0x123" + } + ] +} +``` + +## Handling arrays + +To create a predicate as a list of string: + +```JSON +{ + "set": [ + { + "testList": [ + "Grape", + "Apple", + "Strawberry", + "Banana", + "watermelon" + ] + } + ] +} +``` + +For example, if `0x06` is the UID of the node created. + +To remove one value from the list: + +```JSON +{ + "delete": { + "uid": "0x6", #UID of the list. + "testList": "Apple" + } +} +``` + +To remove multiple multiple values: + +```JSON +{ + "delete": { + "uid": "0x6", + "testList": [ + "Strawberry", + "Banana", + "watermelon" + ] + } +} +``` + +To add a value: + +```JSON +{ + "uid": "0x6", #UID of the list. + "testList": "Pineapple" +} +``` + +## Adding Facets + +Facets can be created by using the `|` character to separate the predicate and +facet key in a JSON object field name. This is the same encoding schema used to +show facets in query results. E.g. + +```JSON +{ + "name": "Carol", + "name|initial": "C", + "dgraph.type": "Person", + "friend": { + "name": "Daryl", + "friend|close": "yes", + "dgraph.type": "Person" + } +} +``` + +Facets do not contain type information but Dgraph will try to guess a type from +the input. If the value of a facet can be parsed to a number, it will be +converted to either a float or an int. If it can be parsed as a Boolean, it will +be stored as a Boolean. If the value is a string, it will be stored as a +datetime if the string matches one of the time formats that Dgraph recognizes +(YYYY, MM-YYYY, DD-MM-YYYY, RFC339, etc.) and as a double-quoted string +otherwise. If you do not want to risk the chance of your facet data being +misinterpreted as a time value, it is best to store numeric data as either an +int or a float. + +## Deleting Facets + +To delete a `Facet`, overwrite it. When you run a mutation for the same entity +without a `Facet`, the existing `Facet` is deleted automatically. + +## Facets in List + +Schema: + +```sh +: string @index(exact). +: [string] . +``` + +To create a List-type predicate you need to specify all value in a single list. +Facets for all predicate values should be specified together. It is done in map +format with index of predicate values inside list being map key and their +respective facets value as map values. Predicate values which does not have +facets values will be missing from facets map. E.g. + +```JSON +{ + "set": [ + { + "uid": "_:Julian", + "name": "Julian", + "nickname": ["Jay-Jay", "Jules", "JB"], + "nickname|kind": { + "0": "first", + "1": "official", + "2": "CS-GO" + } + } + ] +} +``` + +Above you see that we have three values ​​to enter the list with their +respective facets. You can run this query to check the list with facets: + +```graphql +{ + q(func: eq(name,"Julian")) { + uid + nickname @facets + } +} +``` + +Later, if you want to add more values ​​with facets, just do the same procedure, +but this time instead of using Blank-node you must use the actual node's UID. + +```JSON +{ + "set": [ + { + "uid": "0x3", + "nickname|kind": "Internet", + "nickname": "@JJ" + } + ] +} +``` + +And the final result is: + +```JSON +{ + "data": { + "q": [ + { + "uid": "0x3", + "nickname|kind": { + "0": "first", + "1": "Internet", + "2": "official", + "3": "CS-GO" + }, + "nickname": [ + "Jay-Jay", + "@JJ", + "Jules", + "JB" + ] + } + ] + } +} +``` + +## Reserved values + +The string values `uid(...)`, `val(...)` are not accepted. diff --git a/dgraph/reference/dql/dql-syntax/to-sort.md.txt b/dgraph/reference/dql/dql-syntax/to-sort.md.txt new file mode 100644 index 00000000..f4e3206f --- /dev/null +++ b/dgraph/reference/dql/dql-syntax/to-sort.md.txt @@ -0,0 +1,631 @@ + +TO DO +--> what if a non mandatory parameter is not provided and used in the query ! +--> what if a parameter is declared but never used in the query + +TO-DO : which notation should be used to describe the grammar (BNF ?) +* `query title($name: string!, @age: int = "95") { ... }` + +Example of query using Variables : + +{{< runnable vars="{\"$a\": \"5\", \"$b\": \"10\", \"$name\": \"Steven Spielberg\"} +query test($a: int, $b: int, $name: string) { + me(func: allofterms(name@en, $name)) { + name@en + director.film (first: $a, offset: $b) { + name @en + genre(first: $a) { + name@en + } + } + } +} +``` + +Example of variables used in an arrays + +{{< runnable vars="{\"$b\": \"10\", \"$aName\": \"Steven Spielberg\", \"$bName\": \"Quentin Tarantino\"} +query test($a: int = 2, $b: int!, $aName: string, $bName: string) { + me(func: eq(name@en, [$aName, $bName])) { + director.film (first: $a, offset: $b) { + genre(first: $a) { + name@en + } + } + } +} +``` + +## Submitting queries to Dgraph server +< TO DO : explain http and grpc enppoint and client - gives links to client page > +### Error Codes + +When running a DQL query you might get an error message from the `/query` endpoint. +Here we will be focusing on the error `"code"` returned in the JSON error object. + +You can usually get two types of error codes: +- [`ErrorInvalidRequest`](#errorinvalidrequest): this error can be either a bad request (`400`) or an internal server error (`500`). +- [`Error`](#error): this is an internal server error (`500`) + +For example, if you submit a query with a syntax error, you'll get: + +```json +{ + "errors": [ + { + "message": "while lexing {\nq(func: has(\"test)){\nuid\n}\n} at line 2 column 12: Unexpected end of input.", + "extensions": { + "code": "ErrorInvalidRequest" + } + } + ], + "data": null +} +``` +The error `"code"` value is returned with the query response. +In this case, it's a syntax error and the error `code` is `ErrorInvalidRequest`. + +##### `Error` + +This is a rare code to get and it's always an internal server error (`500`). +This can happen when JSON marsharling is failing (it's returned when the system tries to marshal a Go struct to JSON) + +##### `ErrorInvalidRequest` + +This is the most common error code that you can get from the `/query` endpoint. This error can be either a bad request (`400`) or an internal server error (`500`). + +For example, you can get this error: +- If the query parameter is not being parsed correctly. The query parameter could be: + - `debug` + - `timeout` + - `startTs` + - `be` (best effort) + - `ro` (read-only) + - If the value of these query parameters is incorrect you would get this error code. This is basically a bad request (`400`) +- If the header's `Content-Type` value is not parsed correctly. The only allowed content types in the header are: + - `application/json` + - `application/dql` + - `application/graphql+-` (deprecated) + - Anything else will be wrongly parsed and end up in a bad request (`400`) +- Query timeout (deadline exceeded). This is an internal server error (`500`) +- Any error in query processing like: + - syntax error - bad request (`400`) + - health failing (server not healthy) - internal server error (`500`) + - Alpha not able to reach zero because of network issue - internal server error (`500`) + - ACL error (user not found or user does not have privileges) - unauthenticated/unauthorized request (`401` or `403`) + - if you set `be=true` and `ro=false` - bad request (`400`) + - any error related to JSON formatting the response - internal server error (`500`) + + +## Submitting queries to Dgraph +### Grpc +### HTTP Raw + + + + +For **HTTP requests** with parameters, we must use `Content-Type: application/json` header and pass data with a JSON object containing `query` and `variables`. + +```sh +curl -H "Content-Type: application/json" localhost:8080/query -XPOST -d $'{ + "query": "query test($a: string) { test(func: eq(name, $a)) { \n uid \n name \n } }", + "variables": { "$a": "Alice" } +}' | python -m json.tool | less +``` + +{{< runnable vars="{\"$a\": \"5\", \"$b\": \"10\", \"$name\": \"Steven Spielberg\"} +query test($a: int, $b: int, $name: string) { + me(func: allofterms(name@en, $name)) { + name@en + director.film (first: $a, offset: $b) { + name @en + genre(first: $a) { + name@en + } + } + } +} +``` + + +* Any variable that is being used must be declared in the named query clause in the beginning. + +{{< runnable vars="{\"$b\": \"10\", \"$name\": \"Steven Spielberg\"} +query test($a: int = 2, $b: int!, $name: string) { + me(func: allofterms(name@en, $name)) { + director.film (first: $a, offset: $b) { + genre(first: $a) { + name@en + } + } + } +} +``` +### Clients + + + +### conditional upsert +## Example of Conditional Upsert + +Let's say in our previous example, we know the `company1` has less than 100 employees. +For safety, we want the mutation to execute only when the variable `v` stores less than +100 but greater than 50 UIDs in it. This can be achieved as follows: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + v as var(func: regexp(email, /.*@company1.io$/)) + } + + mutation @if(lt(len(v), 100) AND gt(len(v), 50)) { + delete { + uid(v) * . + uid(v) * . + uid(v) * . + } + } +}' | jq +``` + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d '{ + "query": "{ v as var(func: regexp(email, /.*@company1.io$/)) }", + "cond": "@if(lt(len(v), 100) AND gt(len(v), 50))", + "delete": { + "uid": "uid(v)", + "name": null, + "email": null, + "age": null + } +}' | jq +``` + +## Example of Multiple Mutation Blocks + +Consider an example with the following schema: + +```sh +curl localhost:8080/alter -X POST -d $' + name: string @index(term) . + email: [string] @index(exact) @upsert .' | jq +``` + +Let's say, we have many users stored in our database each having one or more than +one email Addresses. Now, we get two email Addresses that belong to the same user. +If the email Addresses belong to the different nodes in the database, we want to delete +the existing nodes and create a new node with both the emails attached to this new node. +Otherwise, we create/update the new/existing node with both the emails. + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + # filter is needed to ensure that we do not get same UIDs in u1 and u2 + q1(func: eq(email, "user_email1@company1.io")) @filter(not(eq(email, "user_email2@company1.io"))) { + u1 as uid + } + + q2(func: eq(email, "user_email2@company1.io")) @filter(not(eq(email, "user_email1@company1.io"))) { + u2 as uid + } + + q3(func: eq(email, "user_email1@company1.io")) @filter(eq(email, "user_email2@company1.io")) { + u3 as uid + } + } + + # case when both emails do not exist + mutation @if(eq(len(u1), 0) AND eq(len(u2), 0) AND eq(len(u3), 0)) { + set { + _:user "user" . + _:user "Person" . + _:user "user_email1@company1.io" . + _:user "user_email2@company1.io" . + } + } + + # case when email1 exists but email2 does not + mutation @if(eq(len(u1), 1) AND eq(len(u2), 0) AND eq(len(u3), 0)) { + set { + uid(u1) "user_email2@company1.io" . + } + } + + # case when email1 does not exist but email2 exists + mutation @if(eq(len(u1), 0) AND eq(len(u2), 1) AND eq(len(u3), 0)) { + set { + uid(u2) "user_email1@company1.io" . + } + } + + # case when both emails exist and needs merging + mutation @if(eq(len(u1), 1) AND eq(len(u2), 1) AND eq(len(u3), 0)) { + set { + _:user "user" . + _:user "Person" . + _:user "user_email1@company1.io" . + _:user "user_email2@company1.io" . + } + + delete { + uid(u1) * . + uid(u1) * . + uid(u2) * . + uid(u2) * . + } + } +}' | jq +``` + +Result (when database is empty): + +```json +{ + "data": { + "q1": [], + "q2": [], + "q3": [], + "code": "Success", + "message": "Done", + "uids": { + "user": "0x1" + } + }, + "extensions": {...} +} +``` + +Result (both emails exist and are attached to different nodes): +```json +{ + "data": { + "q1": [ + { + "uid": "0x2" + } + ], + "q2": [ + { + "uid": "0x3" + } + ], + "q3": [], + "code": "Success", + "message": "Done", + "uids": { + "user": "0x4" + } + }, + "extensions": {...} +} +``` + +Result (when both emails exist and are already attached to the same node): + +```json +{ + "data": { + "q1": [], + "q2": [], + "q3": [ + { + "uid": "0x4" + } + ], + "code": "Success", + "message": "Done", + "uids": {} + }, + "extensions": {...} +} +``` + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d '{ + "query": "{q1(func: eq(email, \"user_email1@company1.io\")) @filter(not(eq(email, \"user_email2@company1.io\"))) {u1 as uid} \n q2(func: eq(email, \"user_email2@company1.io\")) @filter(not(eq(email, \"user_email1@company1.io\"))) {u2 as uid} \n q3(func: eq(email, \"user_email1@company1.io\")) @filter(eq(email, \"user_email2@company1.io\")) {u3 as uid}}", + "mutations": [ + { + "cond": "@if(eq(len(u1), 0) AND eq(len(u2), 0) AND eq(len(u3), 0))", + "set": [ + { + "uid": "_:user", + "name": "user", + "dgraph.type": "Person" + }, + { + "uid": "_:user", + "email": "user_email1@company1.io", + "dgraph.type": "Person" + }, + { + "uid": "_:user", + "email": "user_email2@company1.io", + "dgraph.type": "Person" + } + ] + }, + { + "cond": "@if(eq(len(u1), 1) AND eq(len(u2), 0) AND eq(len(u3), 0))", + "set": [ + { + "uid": "uid(u1)", + "email": "user_email2@company1.io", + "dgraph.type": "Person" + } + ] + }, + { + "cond": "@if(eq(len(u1), 1) AND eq(len(u2), 0) AND eq(len(u3), 0))", + "set": [ + { + "uid": "uid(u2)", + "email": "user_email1@company1.io", + "dgraph.type": "Person" + } + ] + }, + { + "cond": "@if(eq(len(u1), 1) AND eq(len(u2), 1) AND eq(len(u3), 0))", + "set": [ + { + "uid": "_:user", + "name": "user", + "dgraph.type": "Person" + }, + { + "uid": "_:user", + "email": "user_email1@company1.io", + "dgraph.type": "Person" + }, + { + "uid": "_:user", + "email": "user_email2@company1.io", + "dgraph.type": "Person" + } + ], + "delete": [ + { + "uid": "uid(u1)", + "name": null, + "email": null + }, + { + "uid": "uid(u2)", + "name": null, + "email": null + } + ] + } + ] +}' | jq +``` + +## reverse edge ?? +Any outgoing edge in Dgraph can be reversed using the `@reverse` directive in the schema and be queried using tilde as the prefix of the edge name. e.g. `<~myEdge>`. + +Dgraph serializes directed graphs. This means that all properties always point from an entity to another entity or value in a single direction. `S P -> O`. + +Reverse edges are automatically generated edges and are not part of your dataset. This means that you cannot run mutations directly on the reverse edges. Mutating the forward edge will automatically update the reverse edge. + +**Using Reverse Edges correctly** + +In RDF the arrangement of the triples already defines what can be reversed. + +```RDF +_:MyObject _:BlankNode . #That's the right syntax of a reverse edge. +_:BlankNode "Person" . +``` + +The easiest way to correct and apply reverse edges is using JSON. It is simply having the directive on schema at the desired edge. When building your mutations remember that there is no reverse syntax in JSON. So what you should do is similar to RDF: change the arrangement of the JSON objects. + +Since `MyObject` is above the `Person` entity, `MyObject` must come before when formatting the mutation. + +```JSON +{ + "set": [ + { + "uid": "_:MyObject", + "dgraph.type": "Object", + "myEdge": { + "uid": "_:BlankNode", + "dgraph.type": "Person" + } + } + ] +} +``` + +Another way to do this is to separate into small chunks/batches and use blank nodes as references. This facilitates the organization and reuse of references. + +```JSON +{ + "set": [ + { + "uid": "_:MyObject", + "dgraph.type": "Object", + "myEdge": [{"uid": "_:BlankNode"}] + }, + { + "uid": "_:BlankNode", + "dgraph.type": "Person" + } + ] +} +``` + +### More reverse examples + +In RDF the correct way to apply reverse edges is very straight-forward. + +```RDF +name: String . +husband: uid @reverse . +wife: uid @reverse . +parent: [uid] @reverse . +``` + +```RDF +{ + set { + _:Megalosaurus "Earl Sneed Sinclair" . + _:Megalosaurus "Dinosaur" . + _:Megalosaurus _:Allosaurus . + _:Allosaurus "Francis Johanna Phillips Sinclair" (short="Fran") . + _:Allosaurus "Dinosaur" . + _:Allosaurus _:Megalosaurus . + _:Hypsilophodon "Robert Mark Sinclair" (short="Robbie") . + _:Hypsilophodon "Dinosaur" . + _:Hypsilophodon _:Allosaurus (role="son") . + _:Hypsilophodon _:Megalosaurus (role="son") . + _:Protoceratops "Charlene Fiona Sinclair" . + _:Protoceratops "Dinosaur" . + _:Protoceratops _:Allosaurus (role="daughter") . + _:Protoceratops _:Megalosaurus (role="daughter") . + _:MegalosaurusBaby "Baby Sinclair" (short="Baby") . + _:MegalosaurusBaby "Dinosaur" . + _:MegalosaurusBaby _:Allosaurus (role="son") . + _:MegalosaurusBaby _:Megalosaurus (role="son") . + } +} +``` + +The directions are like: + +```rdf +Exchanged hierarchy: + Object -> Parent; + Object <~ Parent; #Reverse + Children to parents via "parent" edge. + wife and husband bidirectional using reverse. +Normal hierarchy: + Parent -> Object; + Parent <~ Object; #Reverse + This hierarchy is not part of the example, but is generally used in all graph models. + To make this hierarchy we need to bring the hierarchical relationship starting from the parents and not from the children. Instead of using the edges "wife" and "husband" we switch to single edge called "married" to simplify the model. + _:Megalosaurus "Earl Sneed Sinclair" . + _:Megalosaurus "Dinosaur" . + _:Megalosaurus _:Allosaurus . + _:Megalosaurus _:Hypsilophodon (role="son") . + _:Megalosaurus _:Protoceratops (role="daughter") . + _:Megalosaurus _:MegalosaurusBaby (role="son") . + _:Allosaurus "Francis Johanna Phillips Sinclair" (short="Fran") . + _:Allosaurus "Dinosaur" . + _:Allosaurus _:Megalosaurus . + _:Allosaurus _:Hypsilophodon (role="son") . + _:Allosaurus _:Protoceratops (role="daughter") . + _:Allosaurus _:MegalosaurusBaby (role="son") . +``` + +### Queries + +1. `wife_husband` is the reversed `wife` edge. +2. `husband` is an actual edge. + +```graphql +{ + q(func: has(wife)) { + name + WF as wife { + name + } + } + reverseIt(func: uid(WF)) { + name + wife_husband : ~wife { + name + } + husband { + name + } + } +} +``` + +1. `Children` is the reversed `parent` edge. + +```graphql +{ + q(func: has(name)) @filter(eq(name, "Earl Sneed Sinclair")){ + name + Children : ~parent @facets { + name + } + } +} +``` + +### Reverse Edges and Facets + +Facets on reverse edges are the same as the forward edge. That is, if you set or update a facet on an edge, its reverse will have the same facets. + +```rdf +{ + set { + _:Megalosaurus "Earl Sneed Sinclair" . + _:Megalosaurus "Dinosaur" . + _:Megalosaurus _:Allosaurus . + _:Megalosaurus _:MegalosaurusBaby (role="parent -> child") . + _:MegalosaurusBaby "Baby Sinclair" (short="Baby -> parent") . + _:MegalosaurusBaby "Dinosaur" . + _:MegalosaurusBaby _:Megalosaurus (role="child -> parent") . + } +} +``` + +Using a similar query from the previous example: + +```graphql +{ + Parent(func: has(name)) @filter(eq(name, "Earl Sneed Sinclair")){ + name + C as Children : parent @facets { + name + } + } + Child(func: uid(C)) { + name + parent @facets { + name + } + } +} +``` + +```json +{ + "data": { + "Parent": [ + { + "name": "Earl Sneed Sinclair", + "Children": [ + { + "name": "Baby Sinclair", + "Children|role": "parent -> child" + } + ] + } + ], + "Child": [ + { + "name": "Baby Sinclair", + "parent": [ + { + "name": "Earl Sneed Sinclair", + "parent|role": "child -> parent" + } + ] + } + ] + } +} + ``` +## bulk delete to put in drop data + +Drop data in admin ? +Individual triples, patterns of triples and predicates can be deleted as described in the [DQL docs](./mutations/delete). diff --git a/dgraph/reference/dql/index.mdx b/dgraph/reference/dql/index.mdx new file mode 100644 index 00000000..96c1c496 --- /dev/null +++ b/dgraph/reference/dql/index.mdx @@ -0,0 +1,9 @@ +--- +title: Dgraph Query Language +description: + Dgraph Query Language (DQL) is Dgraph’s proprietary language to add, modify, + delete and fetch data +--- + +Dgraph Query Language (DQL) is Dgraph’s proprietary language to add, modify, +delete and fetch data. diff --git a/dgraph/reference/dql/mutations/external-ids-upsert-block.mdx b/dgraph/reference/dql/mutations/external-ids-upsert-block.mdx new file mode 100644 index 00000000..6221dd3a --- /dev/null +++ b/dgraph/reference/dql/mutations/external-ids-upsert-block.mdx @@ -0,0 +1,88 @@ +--- +title: External IDs and Upsert Block +--- + +The upsert block makes managing external IDs easy. + +Set the schema. + +``` +xid: string @index(exact) . +: string @index(exact) . +: [uid] @reverse . +``` + +Set the type first of all. + +``` +{ + set { + _:blank "http://schema.org/Person" . + _:blank "ExternalType" . + } +} +``` + +Now you can create a new person and attach its type using the upsert block. + +``` + upsert { + query { + var(func: eq(xid, "http://schema.org/Person")) { + Type as uid + } + var(func: eq(, "Robin Wright")) { + Person as uid + } + } + mutation { + set { + uid(Person) "https://www.themoviedb.org/person/32-robin-wright" . + uid(Person) uid(Type) . + uid(Person) "Robin Wright" . + uid(Person) "Person" . + } + } + } +``` + +You can also delete a person and detach the relation between Type and Person +Node. It's the same as above, but you use the keyword "delete" instead of "set". +"`http://schema.org/Person`" will remain but "`Robin Wright`" will be deleted. + +``` + upsert { + query { + var(func: eq(xid, "http://schema.org/Person")) { + Type as uid + } + var(func: eq(, "Robin Wright")) { + Person as uid + } + } + mutation { + delete { + uid(Person) "https://www.themoviedb.org/person/32-robin-wright" . + uid(Person) uid(Type) . + uid(Person) "Robin Wright" . + uid(Person) "Person" . + } + } + } +``` + +Query by user. + +``` +{ + q(func: eq(, "Robin Wright")) { + uid + xid + + { + uid + xid + } + } +} +``` diff --git a/dgraph/reference/dql/mutations/index.mdx b/dgraph/reference/dql/mutations/index.mdx new file mode 100644 index 00000000..98af14dc --- /dev/null +++ b/dgraph/reference/dql/mutations/index.mdx @@ -0,0 +1,5 @@ +--- +title: Mutations +--- + +Adding or removing data in Dgraph is called a mutation. diff --git a/dgraph/reference/dql/mutations/uid-upsert.mdx b/dgraph/reference/dql/mutations/uid-upsert.mdx new file mode 100644 index 00000000..ac40be65 --- /dev/null +++ b/dgraph/reference/dql/mutations/uid-upsert.mdx @@ -0,0 +1,212 @@ +--- +title: uid function in upsert +--- + +The upsert block contains one query block and mutation blocks. Variables defined +in the query block can be used in the mutation blocks using the `uid` and `val` +function. + +The `uid` function allows extracting UIDs from variables defined in the query +block. There are two possible outcomes based on the results of executing the +query block: + +- If the variable is empty i.e. no node matched the query, the `uid` function + returns a new UID in case of a `set` operation and is thus treated similar to + a blank node. On the other hand, for `delete/del` operation, it returns no + UID, and thus the operation becomes a no-op and is silently ignored. A blank + node gets the same UID across all the mutation blocks. +- If the variable stores one or more than one UIDs, the `uid` function returns + all the UIDs stored in the variable. In this case, the operation is performed + on all the UIDs returned, one at a time. + +## Example of `uid` Function + +Consider an example with the following schema: + +```sh +curl localhost:8080/alter -X POST -d $' + name: string @index(term) . + email: string @index(exact, trigram) @upsert . + age: int @index(int) .' | jq +``` + +Now, let's say we want to create a new user with `email` and `name` information. +We also want to make sure that one email has exactly one corresponding user in +the database. To achieve this, we need to first query whether a user exists in +the database with the given email. If a user exists, we use its UID to update +the `name` information. If the user doesn't exist, we create a new user and +update the `email` and `name` information. + +We can do this using the upsert block as follows: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + q(func: eq(email, "user@company1.io")) { + v as uid + name + } + } + + mutation { + set { + uid(v) "first last" . + uid(v) "user@company1.io" . + } + } +}' | jq +``` + +Result: + +```json +{ + "data": { + "q": [], + "code": "Success", + "message": "Done", + "uids": { + "uid(v)": "0x1" + } + }, + "extensions": {...} +} +``` + +The query part of the upsert block stores the UID of the user with the provided +email in the variable `v`. The mutation part then extracts the UID from variable +`v`, and stores the `name` and `email` information in the database. If the user +exists, the information is updated. If the user doesn't exist, `uid(v)` is +treated as a blank node and a new user is created as explained above. + +If we run the same mutation again, the data would just be overwritten, and no +new uid is created. Note that the `uids` map is empty in the result when the +mutation is executed again and the `data` map (key `q`) contains the uid that +was created in the previous upsert. + +```json +{ + "data": { + "q": [ + { + "uid": "0x1", + "name": "first last" + } + ], + "code": "Success", + "message": "Done", + "uids": {} + }, + "extensions": {...} +} +``` + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d ' +{ + "query": "{ q(func: eq(email, \"user@company1.io\")) {v as uid, name} }", + "set": { + "uid": "uid(v)", + "name": "first last", + "email": "user@company1.io" + } +}' | jq +``` + +Now, we want to add the `age` information for the same user having the same +email `user@company1.io`. We can use the upsert block to do the same as follows: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + q(func: eq(email, "user@company1.io")) { + v as uid + } + } + + mutation { + set { + uid(v) "28" . + } + } +}' | jq +``` + +Result: + +```json +{ + "data": { + "q": [ + { + "uid": "0x1" + } + ], + "code": "Success", + "message": "Done", + "uids": {} + }, + "extensions": {...} +} +``` + +Here, the query block queries for a user with `email` as `user@company1.io`. It +stores the `uid` of the user in variable `v`. The mutation block then updates +the `age` of the user by extracting the uid from the variable `v` using `uid` +function. + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d $' +{ + "query": "{ q(func: eq(email, \\"user@company1.io\\")) {v as uid} }", + "set":{ + "uid": "uid(v)", + "age": "28" + } +}' | jq +``` + +If we want to execute the mutation only when the user exists, we could use +[Conditional Upsert](./dql-mutation.md#conditional-upsert). + +## Bulk Delete Example + +Let's say we want to delete all the users of `company1` from the database. This +can be achieved in just one query using the upsert block as follows: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + v as var(func: regexp(email, /.*@company1.io$/)) + } + + mutation { + delete { + uid(v) * . + uid(v) * . + uid(v) * . + } + } +}' | jq +``` + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d '{ + "query": "{ v as var(func: regexp(email, /.*@company1.io$/)) }", + "delete": { + "uid": "uid(v)", + "name": null, + "email": null, + "age": null + } +}' | jq +``` diff --git a/dgraph/reference/dql/mutations/val-upsert.mdx b/dgraph/reference/dql/mutations/val-upsert.mdx new file mode 100644 index 00000000..abd4ffbe --- /dev/null +++ b/dgraph/reference/dql/mutations/val-upsert.mdx @@ -0,0 +1,77 @@ +--- +title: val function in upsert +--- + +The upsert block allows performing queries and mutations in a single request. +The upsert block contains one query block and one or more than one mutation +blocks. Variables defined in the query block can be used in the mutation blocks +using the `uid` and `val` function. + +The `val` function allows extracting values from value variables. Value +variables store a mapping from UIDs to their corresponding values. Hence, +`val(v)` is replaced by the value stored in the mapping for the UID (Subject) in +the N-Quad. If the variable `v` has no value for a given UID, the mutation is +silently ignored. The `val` function can be used with the result of aggregate +variables as well, in which case, all the UIDs in the mutation would be updated +with the aggregate value. + +## Example of `val` Function + +Let's say we want to migrate the predicate `age` to `other`. We can do this +using the following mutation: + +```sh +curl -H "Content-Type: application/rdf" -X POST localhost:8080/mutate?commitNow=true -d $' +upsert { + query { + v as var(func: has(age)) { + a as age + } + } + + mutation { + # we copy the values from the old predicate + set { + uid(v) val(a) . + } + + # and we delete the old predicate + delete { + uid(v) * . + } + } +}' | jq +``` + +Result: + +```json +{ + "data": { + "code": "Success", + "message": "Done", + "uids": {} + }, + "extensions": {...} +} +``` + +Here, variable `a` will store a mapping from all the UIDs to their `age`. The +mutation block then stores the corresponding value of `age` for each UID in the +`other` predicate and deletes the `age` predicate. + +We can achieve the same result using `json` dataset as follows: + +```sh +curl -H "Content-Type: application/json" -X POST localhost:8080/mutate?commitNow=true -d $'{ + "query": "{ v as var(func: regexp(email, /.*@company1.io$/)) }", + "delete": { + "uid": "uid(v)", + "age": null + }, + "set": { + "uid": "uid(v)", + "other": "val(a)" + } +}' | jq +``` diff --git a/dgraph/reference/dql/predicate-indexing.mdx b/dgraph/reference/dql/predicate-indexing.mdx new file mode 100644 index 00000000..d16bb5ca --- /dev/null +++ b/dgraph/reference/dql/predicate-indexing.mdx @@ -0,0 +1,283 @@ +--- +title: Predicate indexes +--- + +Filtering on a predicate by applying a [function](./query-language/functions) +requires an index. + +Indices are defined in the [Dgraph types schema](./dql/dql-schema) using +`@index` directive. + +Here are some examples: + +``` +name: string @index(term) . +release_date: datetime @index(year) . +description_vector: float32vector @index(hnsw(metric:"cosine")) . +``` + +When filtering by applying a function, Dgraph uses the index to make the search +through a potentially large dataset efficient. + +All scalar types can be indexed. + +Types `int`, `float`, `bool` and `geo` have only a default index each: with +tokenizers named `int`, `float`, `bool` and `geo`. + +Types `string` and `dateTime` have a number of indices. + +Type `float32vector` supports `hnsw` index. + +## String Indices + +The indices available for strings are as follows. + +| Dgraph function | Required index / tokenizer | Notes | +| :------------------------- | :------------------------------------- | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `eq` | `hash`, `exact`, `term`, or `fulltext` | The most performant index for `eq` is `hash`. Only use `term` or `fulltext` if you also require term or full-text search. If you're already using `term`, there is no need to use `hash` or `exact` as well. | +| `le`, `ge`, `lt`, `gt` | `exact` | Allows faster sorting. | +| `allofterms`, `anyofterms` | `term` | Allows searching by a term in a sentence. | +| `alloftext`, `anyoftext` | `fulltext` | Matching with language specific stemming and stopwords. | +| `regexp` | `trigram` | Regular expression matching. Can also be used for equality checking. | + + + Incorrect index choice can impose performance penalties and an increased + transaction conflict rate. Use only the minimum number of and simplest indexes + that your application needs. + + +## Vector Indices + +The indices available for `float32vector` are as follows. + +| Dgraph function | Required index / tokenizer | Notes | +| :-------------- | :------------------------- | :------------------------------------------------------ | +| `similar_to` | `hnsw` | HNSW index supports parameters `metric` and `exponent`. | + +# + +`hnsw` (**Hierarchical Navigable Small World**) index supports the following +parameters + +- metric : indicate the metric to use to compute vector similarity. One of + `cosine`, `euclidean`, and `dotproduct`. Default is `euclidean`. + +- exponent : An integer, represented as a string, roughly representing the + number of vectors expected in the index in power of 10. The exponent value,is + used to set "reasonable defaults" for HNSW internal tuning parameters. Default + is "4" (10^4 vectors). + +Here are some examples: + +``` +simple_vector: float32vector @index(hnsw) . +description_vector: float32vector @index(hnsw(metric:"cosine")) . +large_vector: float32vector @index(hnsw(metric:"euclidean",exponent:"6")) . +``` + +## DateTime Indices + +The indices available for `dateTime` are as follows. + +| Index name / Tokenizer | Part of date indexed | +| :--------------------- | :--------------------------------- | +| `year` | index on year (default) | +| `month` | index on year and month | +| `day` | index on year, month and day | +| `hour` | index on year, month, day and hour | + +The choices of `dateTime` index allow selecting the precision of the index. +Applications, such as the movies examples in these docs, that require searching +over dates but have relatively few nodes per year may prefer the `year` +tokenizer; applications that are dependent on fine grained date searches, such +as real-time sensor readings, may prefer the `hour` index. + +All the `dateTime` indices are sortable. + +## Sortable Indices + +Not all the indices establish a total order among the values that they index. +Sortable indices allow inequality functions and sorting. + +- Indexes `int` and `float` are sortable. +- `string` index `exact` is sortable. +- All `dateTime` indices are sortable. + +For example, given an edge `name` of `string` type, to sort by `name` or perform +inequality filtering on names, the `exact` index must have been specified. In +which case a schema query would return at least the following tokenizers. + +``` +{ + "predicate": "name", + "type": "string", + "index": true, + "tokenizer": [ + "exact" + ] +} +``` + +## Count index + +For predicates with the `@count` Dgraph indexes the number of edges out of each +node. This enables fast queries of the form: + +``` +{ + q(func: gt(count(pred), threshold)) { + ... + } +} +``` + +## List Type + +Predicate with scalar types can also store a list of values if specified in the +schema. The scalar type needs to be enclosed within `[]` to indicate that its a +list type. + +``` +occupations: [string] . +score: [int] . +``` + +- A set operation adds to the list of values. The order of the stored values is + non-deterministic. +- A delete operation deletes the value from the list. +- Querying for these predicates would return the list in an array. +- Indexes can be applied on predicates which have a list type and you can use + [Functions](./query-language/functions.md) on them. +- Sorting is not allowed using these predicates. +- These lists are like an unordered set. For example: `["e1", "e1", "e2"]` may + get stored as `["e2", "e1"]`, i.e., duplicate values will not be stored and + order may not be preserved. + +## Filtering on list + +Dgraph supports filtering based on the list. Filtering works similarly to how it +works on edges and has the same available functions. + +For example, `@filter(eq(occupations, "Teacher"))` at the root of the query or +the parent edge will display all the occupations from a list of each node in an +array but will only include nodes which have `Teacher` as one of the +occupations. However, filtering on value edge is not supported. + +## Reverse Edges + +A graph edge is unidirectional. For node-node edges, sometimes modeling requires +reverse edges. If only some subject-predicate-object triples have a reverse, +these must be manually added. But if a predicate always has a reverse, Dgraph +computes the reverse edges if `@reverse` is specified in the schema. + +The reverse edge of `anEdge` is `~anEdge`. + +For existing data, Dgraph computes all reverse edges. For data added after the +schema mutation, Dgraph computes and stores the reverse edge for each added +triple. + +``` +type Person { + name +} +type Car { + regnbr + owner +} +owner: uid @reverse . +regnbr: string @index(exact) . +name: string @index(exact) . +``` + +This makes it possible to query Persons and their cars by using: + +``` +q(func: type(Person)) { + name + ~owner { regnbr } +} +``` + +To get a different key than `~owner` in the result, the query can be written +with the wanted label (`cars` in this case): + +``` +q(func: type(Person)) { + name + cars: ~owner { regnbr } +} +``` + +This also works if there are multiple "owners" of a `car`: + +``` +owner [uid] @reverse . +``` + +In both cases the `owner` edge should be set on the `Car`: + +``` +_:p1 "Mary" . +_:p1 "Person" . +_:c1 "ABC123" . +_:c1 "Car" . +_:c1 _:p1 . +``` + +## Querying Schema + +A schema query queries for the whole schema: + +``` +schema {} +``` + + + Unlike regular queries, the schema query is not surrounded by curly braces. + Also, schema queries and regular queries cannot be combined. + + +You can query for particular schema fields in the query body. + +``` +schema { + type + index + reverse + tokenizer + list + count + upsert + lang +} +``` + +You can also query for particular predicates: + +``` +schema(pred: [name, friend]) { + type + index + reverse + tokenizer + list + count + upsert + lang +} +``` + + + If ACL is enabled, then the schema query returns only the predicates for which + the logged-in ACL user has read access. + + +Types can also be queried. Below are some example queries. + +``` +schema(type: Movie) {} +schema(type: [Person, Animal]) {} +``` + +Note that type queries do not contain anything between the curly braces. The +output will be the entire definition of the requested types. diff --git a/dgraph/reference/dql/tips/index.mdx b/dgraph/reference/dql/tips/index.mdx new file mode 100644 index 00000000..4ce7d7c1 --- /dev/null +++ b/dgraph/reference/dql/tips/index.mdx @@ -0,0 +1,76 @@ +--- +title: "DQL: Tips and Tricks" +--- + +## Get Sample Data + +Use the `has` function to get some sample nodes. + +````json { result(func: has(director.film), first: 10) { uid +expand(_all_) } } ``` + +## Count number of connecting nodes + +Use `expand(_all_)` to expand the nodes' edges, then assign them to a variable. +The variable can now be used to iterate over the unique neighboring nodes. Then +use `count(uid)` to count the number of nodes in a block. + +```json { uids(func: has(director.film), first: 1) { uid expand(_all_) +{ u as uid } } + +result(func: uid(u)) { count(uid) } } ``` + +## Search on non-indexed predicates + +Use the `has` function among the value variables to search on non-indexed +predicates. + +```json { var(func: has(festival.date_founded)) { p as +festival.date_founded } query(func: eq(val(p), "1961-01-01T00:00:00Z")) { uid +name@en name@ru name@pl festival.date_founded festival.focus { name@en } +festival.individual_festivals { total : count(uid) } } } ``` + +## Sort edge by nested node values + +Dgraph [sorting](./query-language/sorting) is based on a single level of the +subgraph. To sort a level by the values of a deeper level, use +[query variables](./query-language/query-variables) to bring nested values up to +the level of the edge to be sorted. + +Example: Get all actors from a Steven Spielberg movie sorted alphabetically. The +actor's name is not accessed from a single traversal from the `starring` edge; +the name is accessible via `performance.actor`. + +```json { spielbergMovies as var(func: allofterms(name@en, "steven +spielberg")) { name@en director.film (orderasc: name@en, first: 1) { starring { +performance.actor { ActorName as name@en } # Stars is a uid-to-value map +mapping # starring edges to performance.actor names Stars as min(val(ActorName)) +} } } + +movies(func: uid(spielbergMovies)) @cascade { name@en director.film (orderasc: +name@en, first: 1) { name@en starring (orderasc: val(Stars)) { performance.actor +{ name@en } } } } } ``` + +## Obtain unique results by using variables + +To obtain unique results, assign the node's edge to a variable. The variable can +now be used to iterate over the unique nodes. + +Example: Get all unique genres from all of the movies directed by Steven +Spielberg. + +```json { var(func: eq(name@en, "Steven Spielberg")) { director.film { +genres as genre } } + +q(func: uid(genres)) { name@. } } ``` + +## Usage of checkpwd boolean + +Store the result of `checkpwd` in a query variable and then match it against `1` +(`checkpwd` is `true`) or `0` (`checkpwd` is `false`). + +```json { + exampleData(func: has(email)) { uid email check as checkpwd(pass, +"1bdfhJHb!fd") } userMatched(func: eq(val(check), 1)) { uid email } +userIncorrect(func: eq(val(check), 0)) { uid email } } ``` +```` diff --git a/dgraph/reference/enterprise-features/access-control-lists.mdx b/dgraph/reference/enterprise-features/access-control-lists.mdx new file mode 100644 index 00000000..a2789357 --- /dev/null +++ b/dgraph/reference/enterprise-features/access-control-lists.mdx @@ -0,0 +1,877 @@ +--- +title: Access Control Lists +--- + + + This feature was introduced in + [v1.1.0](https://github.com/dgraph-io/dgraph/releases/tag/v1.1.0). The `dgraph + acl` command is deprecated and will be removed in a future release. ACL + changes can be made by using the `/admin` GraphQL endpoint on any Alpha node. + + +Access Control List (ACL) provides access protection to your data stored in +Dgraph. When the ACL feature is enabled, a client, e.g. +[dgo](https://github.com/dgraph-io/dgo) or +[dgraph4j](https://github.com/dgraph-io/dgraph4j), must authenticate with a +username and password before executing any transactions, and is only allowed to +access the data permitted by the ACL rules. + +## Enable enterprise ACL feature + +1. Generate a data encryption key that is 32 bytes long: + + ```bash + tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file + ``` + + + On a macOS you may have to use `LC_CTYPE=C; tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file`. + + +1. To view the secret key value use `cat enc_key_file`. +1. Create a plain text file named `hmac_secret_file`, and store a randomly + generated `` in it. The secret key is used by Dgraph Alpha + nodes to sign JSON Web Tokens (JWT). + + ```bash + echo '' > hmac_secret_file + ``` + +1. Start all the Dgraph Alpha nodes in your cluster with the option + `--acl secret-file="/path/to/secret"`, and make sure that they are all using + the same secret key file created in Step 1. Alternatively, you can + [store the secret in Hashicorp Vault](#storing-acl-secret-in-hashicorp-vault). + + ```bash + dgraph alpha --acl "secret-file=/path/to/secret" --security "whitelist=" + ``` + + +In addition to command line flags +`--acl secret-file="/path/to/secret"` and +`--security "whitelist="`, you can also configure Dgraph +using a configuration file (`config.yaml`, `config.json`). You can also use +environment variables, i.e. `DGRAPH_ALPHA_ACL="secret-file="` +and `DGRAPH_ALPHA_SECURITY="whitelist="`. See +[Config](./deploy/config) for more information in general about configuring +Dgraph. + + +### Example using Dgraph CLI + +Here is an example that starts a Dgraph Zero node and a Dgraph Alpha node with +the ACL feature turned on. You can run these commands in a separate terminal +tab: + +```bash +## Create ACL secret key file with 32 ASCII characters +echo '' > hmac_secret_file + +## Start Dgraph Zero in different terminal tab or window +dgraph zero --my=localhost:5080 --replicas 1 --raft idx=1 + +## Start Dgraph Alpha in different terminal tab or window +dgraph alpha --my=localhost:7080 --zero=localhost:5080 \ + --acl secret-file="./hmac_secret_file" \ + --security whitelist="10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" +``` + +### Example using Docker Compose + +If you are using [Docker Compose](https://docs.docker.com/compose/), you can set +up a sample Dgraph cluster using this `docker-compose.yaml` configuration: + +```yaml +version: "3.5" +services: + alpha1: + command: dgraph alpha --my=alpha1:7080 --zero=zero1:5080 + container_name: alpha1 + environment: + DGRAPH_ALPHA_ACL: secret-file=/dgraph/acl/hmac_secret_file + DGRAPH_ALPHA_SECURITY: whitelist=10.0.0.0/8,172.0.0.0/8,192.168.0.0/16 + image: dgraph/dgraph:latest + ports: + - "8080:8080" + volumes: + - ./hmac_secret_file:/dgraph/acl/hmac_secret_file + zero1: + command: dgraph zero --my=zero1:5080 --replicas 1 --raft idx=1 + container_name: zero1 + image: dgraph/dgraph:latest +``` + +You can run this with: + +```bash +## Create ACL secret key file with 32 ASCII characters +echo '' > hmac_secret_file + +## Start Docker Compose +docker-compose up +``` + +### Example using Kubernetes Helm Chart + +If you deploy Dgraph on [Kubernetes](https://kubernetes.io/), you can configure +the ACL feature using the +[Dgraph Helm Chart](https://artifacthub.io/packages/helm/dgraph/dgraph). + +The first step is to encode the secret with base64: + +```bash +## encode a secret without newline character and copy to the clipboard +printf '' | base64 +``` + +The next step is that we need to create a [Helm](https://helm.sh/) chart config +values file, e.g. `dgraph_values.yaml`. We want to copy the results of encoded +secret as paste this into the `hmac_secret_file` like the example below: + +```yaml +## dgraph_values.yaml +alpha: + acl: + enabled: true + file: + hmac_secret_file: + configFile: + config.yaml: | + acl: + secret_file: /dgraph/acl/hmac_secret_file + security: + whitelist: 10.0.0.0/8,172.0.0.0/8,192.168.0.0/16 +``` + +Now with the Helm chart config values created, we can deploy Dgraph: + +```bash +helm repo add "dgraph" https://charts.dgraph.io +helm install "my-release" --values ./dgraph_values.yaml dgraph/dgraph +``` + +## Storing ACL secret in Hashicorp Vault + +You can save the ACL secret on [Hashicorp Vault](https://www.vaultproject.io/) +server instead of saving the secret on the local file system. + +### Configuring a Hashicorp Vault Server + +Do the following to set up on the +[Hashicorp Vault](https://www.vaultproject.io/) server for use with Dgraph: + +1. Ensure that the Vault server is accessible from Dgraph Alpha and configured + using URL `http://fqdn[ip]:port`. +2. Enable [AppRole Auth method](https://www.vaultproject.io/docs/auth/approle) + and enable [KV Secrets Engine](https://www.vaultproject.io/docs/secrets/kv). +3. Save the 256-bits (32 ASCII characters) long ACL secret in a KV Secret path + ([K/V Version 1](https://www.vaultproject.io/docs/secrets/kv/kv-v1) or + [K/V Version 2](https://www.vaultproject.io/docs/secrets/kv/kv-v2)). For + example, you can upload this below to KV Secrets Engine Version 2 path of + `secret/data/dgraph/alpha`: + ```json + { + "options": { + "cas": 0 + }, + "data": { + "hmac_secret_file": "" + } + } + ``` +4. Create or use a role with an attached policy that grants access to the + secret. For example, the following policy would grant access to + `secret/data/dgraph/alpha`: + ```hcl + path "secret/data/dgraph/*" { + capabilities = [ "read", "update" ] + } + ``` +5. Using the `role_id` generated from the previous step, create a corresponding + `secret_id`, and copy the `role_id` and `secret_id` over to local files, like + `./dgraph/vault/role_id` and `./dgraph/vault/secret_id`, that will be used by + Dgraph Alpha nodes. + + + To learn more about the above steps, see [Dgraph Vault Integration: + Docker](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/vault/docker/README.md). + + + + The key format for the `acl-field` option can be defined using `acl-format` + with the values `base64` (default) or `raw`. + + +### Example using Dgraph CLI with Hashicorp Vault configuration + +Here is an example of using Dgraph with a Vault server that holds the secret +key: + +```bash +## Start Dgraph Zero in different terminal tab or window +dgraph zero --my=localhost:5080 --replicas 1 --raft "idx=1" + +## Start Dgraph Alpha in different terminal tab or window +dgraph alpha \ + --security whitelist="10.0.0.0/8,172.0.0.0/8,192.168.0.0/16" \ + --vault addr="http://localhost:8200";acl-field="hmac_secret_file";acl-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` + +### Example using Docker Compose with Hashicorp Vault configuration + +If you are using [Docker Compose](https://docs.docker.com/compose/), you can set +up a sample Dgraph cluster using this `docker-compose.yaml` configuration: + +```yaml +version: "3.5" +services: + alpha1: + command: dgraph alpha --my=alpha1:7080 --zero=zero1:5080 + container_name: alpha1 + environment: + DGRAPH_ALPHA_VAULT: addr=http://vault:8200;acl-field=hmac_secret_file;acl-format=raw;path=secret/data/dgraph/alpha;role-id-file=/dgraph/vault/role_id;secret-id-file=/dgraph/vault/secret_id + DGRAPH_ALPHA_SECURITY: whitelist=10.0.0.0/8,172.0.0.0/8,192.168.0.0/16 + image: dgraph/dgraph:latest + ports: + - "8080:8080" + volumes: + - ./role_id:/dgraph/vault/role_id + - ./secret_id:/dgraph/vault/secret_id + zero1: + command: dgraph zero --my=zero1:5080 --replicas 1 --raft idx=1 + container_name: zero1 + image: dgraph/dgraph:latest +``` + +In this example, you will also need to configure a +[Hashicorp Vault](https://www.vaultproject.io/) service named `vault` in the +above `docker-compose.yaml`, and then run through this sequence: + +1. Launch `vault` service: `docker-compose up --detach vault` +2. Unseal and Configure `vault` with the required prerequisites (see + [Configuring a Hashicorp Vault Server](#configuring-a-hashicorp-vault-server)). +3. Save role-id and secret-id as `./role_id` and `secret_id` +4. Launch Dgraph Zero and Alpha: `docker-compose up --detach` + +### Example using Kubernetes Helm Chart with Hashicorp Vault configuration + +If you deploy Dgraph on [Kubernetes](https://kubernetes.io/), you can configure +the ACL feature using the +[Dgraph Helm Chart](https://artifacthub.io/packages/helm/dgraph/dgraph). + +The next step is that we need to create a [Helm](https://helm.sh/) chart config +values file, such as `dgraph_values.yaml`. + +```yaml +## dgraph_values.yaml +alpha: + configFile: + config.yaml: | + vault: + addr: http://vault-headless.default.svc.cluster.local:9200 + acl_field: hmac_secret_file + acl_format: raw + path: secret/data/dgraph/alpha + role_id_file: /dgraph/vault/role_id + secret_id_file: /dgraph/vault/secret_id + security: + whitelist: 10.0.0.0/8,172.0.0.0/8,192.168.0.0/16‘ +``` + +To set up this chart, the [Hashicorp Vault](https://www.vaultproject.io/) +service must be installed and available. You can use the +[Hashicorp Vault Helm Chart](https://www.vaultproject.io/docs/platform/k8s/helm) +and configure it to +[auto unseal](https://learn.hashicorp.com/collections/vault/auto-unseal) so that +the service is immediately available after deployment. + +## Accessing secured Dgraph + +Before managing users and groups and configuring ACL rules, you will need to +login in order to get a token that is needed to access Dgraph. You will use this +token with the `X-Dgraph-AccessToken` header field. + +### Logging In + +To login, send a POST request to `/admin` with the GraphQL mutation. For +example, to log in as the root user `groot`: + +```graphql +mutation { + login(userId: "groot", password: "password") { + response { + accessJWT + refreshJWT + } + } +} +``` + +Response: + +```json +{ + "data": { + "accessJWT": "", + "refreshJWT": "" + } +} +``` + +#### Access Token + +The response includes the access and refresh JWTs which are used for the +authentication itself and refreshing the authentication token, respectively. +Save the JWTs from the response for later HTTP requests. + +You can run authenticated requests by passing the access JWT to a request via +the `X-Dgraph-AccessToken` header. Add the header `X-Dgraph-AccessToken` with +the `accessJWT` value which you got in the login response in the GraphQL tool +which you're using to make the request. + +For example, if you were using the GraphQL Playground, you would add this in the +headers section: + +```json +{ "X-Dgraph-AccessToken": "" } +``` + +And in the main code section, you can add a mutation, such as: + +```graphql +mutation { + addUser(input: [{ name: "alice", password: "whiterabbit" }]) { + user { + name + } + } +} +``` + +#### Refresh Token + +The refresh token can be used in the `/admin` POST GraphQL mutation to receive +new access and refresh JWTs, which is useful to renew the authenticated session +once the ACL access TTL expires (controlled by Dgraph Alpha's flag +`--acl_access_ttl` which is set to 6h0m0s by default). + +```graphql +mutation { + login(userId: "groot", password: "password", refreshToken: "") { + response { + accessJWT + refreshJWT + } + } +} +``` + +### Login using a client + +With ACL configured, you need to log in as a user to access data protected by +ACL rules. You can do this using the client's `.login(USER_ID, USER_PASSWORD)` +method. + +Here are some code samples using a client: + +- **Go** ([dgo client](https://github.com/dgraph-io/dgo)): example + `acl_over_tls_test.go` + ([here](https://github.com/dgraph-io/dgraph/blob/main/tlstest/acl/acl_over_tls_test.go)) +- **Java** ([dgraph4j](https://github.com/dgraph-io/dgraph4j)): example + `AclTest.java` + ([here](https://github.com/dgraph-io/dgraph4j/blob/master/src/test/java/io/dgraph/AclTest.java)) + +### Login using curl + +If you are using `curl` from the command line, you can use the following with +the above [login mutation](#logging-in) saved to `login.graphql`: + +```bash +## Login and save results +JSON_RESULT=$(curl http://localhost:8080/admin --silent --request POST \ + --header "Content-Type: application/graphql" \ + --upload-file login.graphql +) + +## Extracting a token using GNU grep, perl, the silver searcher, or jq +TOKEN=$(grep -oP '(?<=accessJWT":")[^"]*' <<< $JSON_RESULT) +TOKEN=$(perl -wln -e '/(?<=accessJWT":")[^"]*/ and print $&;' <<< $JSON_RESULT) +TOKEN=$(ag -o '(?<=accessJWT":")[^"]*' <<< $JSON_RESULT) +TOKEN=$(jq -r '.data.login.response.accessJWT' <<< $JSON_RESULT) + +## Run a GraphQL query using the token +curl http://localhost:8080/admin --silent --request POST \ + --header "Content-Type: application/graphql" \ + --header "X-Dgraph-AccessToken: $TOKEN" \ + --upload-file some_other_query.graphql +``` + + + Parsing JSON results on the command line can be challenging, so you will find + some alternatives to extract the desired data using popular tools, such as + [the silver searcher](https://github.com/ggreer/the_silver_searcher) or the + json query tool [jq](https://stedolan.github.io/jq), embedded in this snippet. + + +## User and group administration + +The default configuration comes with a user `groot`, with a password of +`password`. The `groot` user is part of administrative group called `guardians` +that have access to everything. You can add more users to the `guardians` group +as needed. + +### Reset the root password + +You can reset the root password like this example: + +```graphql +mutation { + updateUser( + input: { + filter: { name: { eq: "groot" } } + set: { password: "$up3r$3cr3t1337p@$$w0rd" } + } + ) { + user { + name + } + } +} +``` + +### Create a regular user + +To create a user `alice`, with password `whiterabbit`, you should execute the +following GraphQL mutation: + +```graphql +mutation { + addUser(input: [{ name: "alice", password: "whiterabbit" }]) { + user { + name + } + } +} +``` + +### Create a group + +To create a group `dev`, you should execute: + +```graphql +mutation { + addGroup(input: [{ name: "dev" }]) { + group { + name + users { + name + } + } + } +} +``` + +### Assign a user to a group + +To assign the user `alice` to both the group `dev` and the group `sre`, the +mutation should be + +```graphql +mutation { + updateUser( + input: { + filter: { name: { eq: "alice" } } + set: { groups: [{ name: "dev" }, { name: "sre" }] } + } + ) { + user { + name + groups { + name + } + } + } +} +``` + +### Remove a user from a group + +To remove `alice` from the `dev` group, the mutation should be + +```graphql +mutation { + updateUser( + input: { + filter: { name: { eq: "alice" } } + remove: { groups: [{ name: "dev" }] } + } + ) { + user { + name + groups { + name + } + } + } +} +``` + +### Delete a User + +To delete the user `alice`, you should execute + +```graphql +mutation { + deleteUser(filter: { name: { eq: "alice" } }) { + msg + numUids + } +} +``` + +### Delete a Group + +To delete the group `sre`, the mutation should be + +```graphql +mutation { + deleteGroup(filter: { name: { eq: "sre" } }) { + msg + numUids + } +} +``` + +## ACL rules configuration + +You can set up ACL rules using the Dgraph Ratel UI or by using a GraphQL tool, +such as [Insomnia](https://insomnia.rest/), +[GraphQL Playground](https://github.com/prisma/graphql-playground), +[GraphiQL](https://github.com/skevy/graphiql-app), etc. You can set the +permissions on a predicate for the group using a pattern similar to the UNIX +file permission conventions shown below: + +| Permission | Value | Binary | +| --------------------------- | ----- | ------ | +| `READ` | `4` | `100` | +| `WRITE` | `2` | `010` | +| `MODIFY` | `1` | `001` | +| `READ` + `WRITE` | `6` | `110` | +| `READ` + `WRITE` + `MODIFY` | `7` | `111` | + +These permissions represent the following: + +- `READ` - group has permission to read read the predicate +- `WRITE` - group has permission to write or update the predicate +- `MODIFY` - group has permission to change the predicate's schema + +The following examples will grant full permissions to predicates to the group +`dev`. If there are no rules for a predicate, the default behavior is to block +all (`READ`, `WRITE` and `MODIFY`) operations. + +### Assign predicate permissions to a group + +Here we assign a permission rule for the `friend` predicate to the group: + +```graphql +mutation { + updateGroup( + input: { + filter: { name: { eq: "dev" } } + set: { rules: [{ predicate: "friend", permission: 7 }] } + } + ) { + group { + name + rules { + permission + predicate + } + } + } +} +``` + +In case you have [reverse edges](./dql-schema.md#reverse-edges), they have to be +given the permission to the group as well + +```graphql +mutation { + updateGroup( + input: { + filter: { name: { eq: "dev" } } + set: { rules: [{ predicate: "~friend", permission: 7 }] } + } + ) { + group { + name + rules { + permission + predicate + } + } + } +} +``` + +In some cases, it may be desirable to manage permissions for all the predicates +together rather than individual ones. This can be achieved using the +`dgraph.all` keyword. + +The following example provides `read+write` access to the `dev` group over all +the predicates of a given namespace using the `dgraph.all` keyword. + +```graphql +mutation { + updateGroup( + input: { + filter: { name: { eq: "dev" } } + set: { rules: [{ predicate: "dgraph.all", permission: 6 }] } + } + ) { + group { + name + rules { + permission + predicate + } + } + } +} +``` + + + The permissions assigned to a group `dev` is the union of permissions from + `dgraph.all` and permissions for a specific predicate `name`. So if the group + is assigned `READ` permission for `dgraph.all` and `WRITE` permission for + predicate `name` it will have both, `READ` and `WRITE` permissions for the + `name` predicate, as a result of the union. + + +### Remove a rule from a group + +To remove a rule or rules from the group `dev`, the mutation should be: + +```graphql +mutation { + updateGroup( + input: { + filter: { name: { eq: "dev" } } + remove: { rules: ["friend", "~friend"] } + } + ) { + group { + name + rules { + predicate + permission + } + } + } +} +``` + +## Querying users and groups + +You can set up ACL rules using the Dgraph Ratel UI or by using a GraphQL tool, +such as [Insomnia](https://insomnia.rest/), +[GraphQL Playground](https://github.com/prisma/graphql-playground), +[GraphiQL](https://github.com/skevy/graphiql-app), etc. The permissions can be +set on a predicate for the group using using pattern similar to the UNIX file +permission convention: + +You can query and get information for users and groups. These sections show +output that will show the user `alice` and the `dev` group along with rules for +`friend` and `~friend` predicates. + +### Query for users + +Let's query for the user `alice`: + +```graphql +query { + queryUser(filter: { name: { eq: "alice" } }) { + name + groups { + name + } + } +} +``` + +The output should show the groups that the user has been added to, e.g. + +```json +{ + "data": { + "queryUser": [ + { + "name": "alice", + "groups": [ + { + "name": "dev" + } + ] + } + ] + } +} +``` + +### Get user information + +We can obtain information about a user with the following query: + +```graphql +query { + getUser(name: "alice") { + name + groups { + name + } + } +} +``` + +The output should show the groups that the user has been added to, e.g. + +```json +{ + "data": { + "getUser": { + "name": "alice", + "groups": [ + { + "name": "dev" + } + ] + } + } +} +``` + +### Query for groups + +Let's query for the `dev` group: + +```graphql +query { + queryGroup(filter: { name: { eq: "dev" } }) { + name + users { + name + } + rules { + permission + predicate + } + } +} +``` + +The output should include the users in the group as well as the permissions, the +group's ACL rules, e.g. + +```json +{ + "data": { + "queryGroup": [ + { + "name": "dev", + "users": [ + { + "name": "alice" + } + ], + "rules": [ + { + "permission": 7, + "predicate": "friend" + }, + { + "permission": 7, + "predicate": "~friend" + } + ] + } + ] + } +} +``` + +### Get group information + +To check the `dev` group information: + +```graphql +query { + getGroup(name: "dev") { + name + users { + name + } + rules { + permission + predicate + } + } +} +``` + +The output should include the users in the group as well as the permissions, the +group's ACL rules, e.g. + +```json +{ + "data": { + "getGroup": { + "name": "dev", + "users": [ + { + "name": "alice" + } + ], + "rules": [ + { + "permission": 7, + "predicate": "friend" + }, + { + "permission": 7, + "predicate": "~friend" + } + ] + } + } +} +``` + +## Reset Groot Password + +If you have forgotten the password to the `groot` user, then you may reset the +`groot` password (or the password for any user) by following these steps. + +1. Stop Dgraph Alpha. +2. Turn off ACLs by removing the `--acl_hmac_secret` config flag in the Alpha + config. This leaves the Alpha open with no ACL rules, so be sure to restrict + access, including stopping request traffic to this Alpha. +3. Start Dgraph Alpha. +4. Connect to Dgraph Alpha using Ratel and run the following upsert mutation to + update the `groot` password to `newpassword` (choose your own secure + password): + ```graphql + upsert { + query { + groot as var(func: eq(dgraph.xid, "groot")) + } + mutation { + set { + uid(groot) "newpassword" . + } + } + } + ``` +5. Restart Dgraph Alpha with ACLs turned on by setting the `--acl_hmac_secret` + config flag. +6. Login as groot with your new password. diff --git a/dgraph/reference/enterprise-features/audit-logs.mdx b/dgraph/reference/enterprise-features/audit-logs.mdx new file mode 100644 index 00000000..fdc62718 --- /dev/null +++ b/dgraph/reference/enterprise-features/audit-logs.mdx @@ -0,0 +1,151 @@ +--- +title: Audit Logging +description: + With an Enterprise license, Dgraph can generate audit logs that let you track + and audit all requests (queries and mutations). +--- + +As a database administrator, you count on being able to audit access to your +database. With a Dgraph [enterprise license](./enterprise-features/license), you +can enable audit logging so that all requests are tracked and available for use +in security audits. When audit logging is enabled, the following information is +recorded about the queries and mutations (requests) sent to your database: + +- Endpoint +- Logged-in User Name +- Server host address +- Client Host address +- Request Body (truncated at 4KB) +- Timestamp +- Namespace +- Query Parameters (if provided) +- Response status + +## Audit log scope + +Most queries and mutations sent to Dgraph Alpha and Dgraph Zero are logged. +Specifically, the following are logged: + +- HTTP requests sent over Dgraph Zero's 6080 port and Dgraph Alpha's 8080 port + (except as noted below) +- gRPC requests sent over Dgraph Zero's 5080 port and Dgraph Alpha's 9080 port + (except the Raft, health and Dgraph Zero stream endpoints noted below) + +The following aren't logged: + +- Responses to queries and mutations +- HTTP requests to `/health`, `/state` and `/jemalloc` endpoints +- gRPC requests to Raft endpoints (see [RAFT](./design-concepts/raft)) +- gRPC requests to health endpoints (`Check` and `Watch`) +- gRPC requests to Dgraph Zero stream endpoints (`StreamMembership`, + `UpdateMembership`, `Oracle`, `Timestamps`, `ShouldServe` and `Connect`) + +{/* We don't have any docs to link to for the endpoints described in the last two bullets. TBD fix this so we are't referencing something not described elsewhere */} + +## Audit log files + +All audit logs are in JSON format. Dgraph has a "rolling-file" policy for audit +logs, where the current log file is used until it reaches a configurable size +(default: 100 MB), and then is replaced by another current audit log file. Older +audit log files are retained for a configurable number of days (default: 10 +days). + +For example, by sending this query: + +```graphql +{ + q(func: has(actor.film)){ + count(uid) + } +} +``` + +You'll get the following JSON audit log entry: + +```json +{ + "ts": "2021-03-22T15:03:19.165Z", + "endpoint": "/query", + "level": "AUDIT", + "user": "", + "namespace": 0, + "server": "localhost:7080", + "client": "[::1]:60118", + "req_type": "Http", + "req_body": "{\"query\":\"{\\n q(func: has(actor.film)){\\n count(uid)\\n }\\n}\",\"variables\":{}}", + "query_param": { + "timeout": ["20s"] + }, + "status": "OK" +} +``` + +## Enable audit logging + +You can enable audit logging on a Dgraph Alpha or Dgraph Zero node by using the +`--audit` flag to specify semicolon-separated options for audit logging. When +you enable audit logging, a few options are available for you to configure: + +- `compress=true` tells Dgraph to use compression on older audit log files +- `days=20` tells Dgraph to retain older audit logs for 20 days, rather than the + default of 10 days +- `output=/path/to/audit/logs` tells Dgraph which path to use for storing audit + logs +- `encrypt-file=/encryption/key/path` tells Dgraph to encrypt older log files + with the specified key +- `size=200` tells Dgraph to store audit logs in 200 MB files, rather than the + default of 100 MB files + +You can see how to use these options in the example commands below. + +## Example commands + +The commands in this section show you how to enable and configure audit logging. + +### Enable audit logging + +In the simplest scenario, you can enable audit logging by simply specifying the +directory to store audit logs on a Dgraph Alpha node: + +```bash +dgraph alpha --audit output=audit-log-dir +``` + +You could extend this command a bit to specify larger log files (200 MB, instead +of 100 MB) and retain them for longer (15 days instead of 10 days): + +```bash +dgraph alpha --audit "output=audit-log-dir;size=200;days=15" +``` + +### Enable audit logging with compression + +In many cases you will want to compress older audit logs to save storage space. +You can do this with a command like the following: + +```bash +dgraph alpha --audit "output=audit-log-dir;compress=true" +``` + +### Enable audit logging with encryption + +You can also enable encryption of audit logs to protect sensitive information +that might exist in logged requests. You can do this, along with compression, +with a command like the following: + +```bash +dgraph alpha --audit "output=audit-log-dir;compress=true;encrypt-file=/path/to/encrypt/key/file" +``` + +### Decrypt audit logs + +To decrypt encrypted audit logs, you can use the `dgraph audit decrypt` command, +as follows: + +```bash +dgraph audit decrypt --encryption_key_file=/path/encrypt/key/file --in /path/to/encrypted/log/file --out /path/to/output/file +``` + +## Next steps + +To learn more about the logging features of Dgraph, see [Logging](./log-format). diff --git a/dgraph/reference/enterprise-features/binary-backups.mdx b/dgraph/reference/enterprise-features/binary-backups.mdx new file mode 100644 index 00000000..cfbda3a1 --- /dev/null +++ b/dgraph/reference/enterprise-features/binary-backups.mdx @@ -0,0 +1,850 @@ +--- +title: Binary Backups +--- + + + This feature was introduced in + [v1.1.0](https://github.com/dgraph-io/dgraph/releases/tag/v1.1.0). + + +Binary backups are full backups of Dgraph that are backed up directly to cloud +storage such as Amazon S3 or any Minio storage backend. Backups can also be +saved to an on-premise network file system shared by all Alpha servers. These +backups can be used to restore a new Dgraph cluster to the previous state from +the backup. Unlike [exports](./dgraph-administration.md#exporting-database), +binary backups are Dgraph-specific and can be used to restore a cluster quickly. + +## Configure Backup + +Backup is only enabled when a valid license file is supplied to a Zero server OR +within the thirty (30) day trial period, no exceptions. + +### Configure Amazon S3 Credentials + +To backup to Amazon S3, the Alpha server must have the following AWS credentials +set via environment variables: + +| Environment Variable | Description | +| ------------------------------------------- | ------------------------------------------------------------------- | +| `AWS_ACCESS_KEY_ID` or `AWS_ACCESS_KEY` | AWS access key with permissions to write to the destination bucket. | +| `AWS_SECRET_ACCESS_KEY` or `AWS_SECRET_KEY` | AWS access key with permissions to write to the destination bucket. | +| `AWS_SESSION_TOKEN` | AWS session token (if required). | + +Starting with +[v20.07.0](https://github.com/dgraph-io/dgraph/releases/tag/v20.07.0) if the +system has access to the S3 bucket, you no longer need to explicitly include +these environment variables. + +In AWS, you can accomplish this by doing the following: + +1. Create an + [IAM Role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html) + with an IAM Policy that grants access to the S3 bucket. +2. Depending on whether you want to grant access to an EC2 instance, or to a pod + running on [EKS](https://aws.amazon.com/eks/), you can do one of these + options: + - [Instance Profile](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html) + can pass the IAM Role to an EC2 Instance + - [IAM Roles for Amazon EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) + to attach the IAM Role to a running EC2 Instance + - [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) + to associate the IAM Role to a + [Kubernetes Service Account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/). + +### Configure Minio Credentials + +To backup to Minio, the Alpha server must have the following Minio credentials +set via environment variables: + +| Environment Variable | Description | +| -------------------- | --------------------------------------------------------------------- | +| `MINIO_ACCESS_KEY` | Minio access key with permissions to write to the destination bucket. | +| `MINIO_SECRET_KEY` | Minio secret key with permissions to write to the destination bucket. | + +## Create a Backup + +To create a backup, make an HTTP POST request to `/admin` to a Dgraph Alpha HTTP +address and port (default, "localhost:8080"). Like with all `/admin` endpoints, +this is only accessible on the same machine as the Alpha server unless +[whitelisted for admin operations](./dgraph-administration.md#whitelisting-admin-operations). +You can look at `BackupInput` given below for all the possible options. + +```graphql +input BackupInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + destination: String! + + """ + Access key credential for the destination. + """ + accessKey: String + + """ + Secret key credential for the destination. + """ + secretKey: String + + """ + AWS session token, if required. + """ + sessionToken: String + + """ + Set to true to allow backing up to S3 or Minio bucket that requires no credentials. + """ + anonymous: Boolean + + """ + Force a full backup instead of an incremental backup. + """ + forceFull: Boolean +} +``` + +Execute the following mutation on /admin endpoint using any GraphQL compatible +client like Insomnia, GraphQL Playground or GraphiQL. + +### Backup to Amazon S3 + +```graphql +mutation { + backup( + input: { destination: "s3://s3.us-west-2.amazonaws.com/" } + ) { + response { + message + code + } + taskId + } +} +``` + +### Backup to Minio + +```graphql +mutation { + backup(input: { destination: "minio://127.0.0.1:9000/" }) { + response { + message + code + } + taskId + } +} +``` + +### Backup using a MinIO Gateway + +#### Azure Blob Storage + +You can use +[Azure Blob Storage](https://azure.microsoft.com/services/storage/blobs/) +through the +[MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html). +You need to configure a +[storage account](https://docs.microsoft.com/azure/storage/common/storage-account-overview) +and +a[container](https://docs.microsoft.com/azure/storage/blobs/storage-blobs-introduction#containers) +to organize the blobs. + +For MinIO configuration, you will need to +[retrieve storage accounts keys](https://docs.microsoft.com/azure/storage/common/storage-account-keys-manage). +The [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) +will use `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` to correspond to Azure +Storage Account `AccountName` and `AccountKey`. + +Once you have the `AccountName` and `AccountKey`, you can access Azure Blob +Storage locally using one of these methods: + +- Run + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + using Docker + ```bash + docker run --publish 9000:9000 --name gateway \ + --env "MINIO_ACCESS_KEY=" \ + --env "MINIO_SECRET_KEY=" \ + minio/minio gateway azure + ``` +- Run + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + using the MinIO Binary + ```bash + export MINIO_ACCESS_KEY="" + export MINIO_SECRET_KEY="" + minio gateway azure + ``` + +#### Google Cloud Storage + +You can use [Google Cloud Storage](https://cloud.google.com/storage) through the +[MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html). You +will need to +[create storage buckets](https://cloud.google.com/storage/docs/creating-buckets), +create a Service Account key for GCS and get a credentials file. See +[Create a Service Account key](https://github.com/minio/minio/blob/master/docs/gateway/gcs.md#11-create-a-service-account-ey-for-gcs-and-get-the-credentials-file) +for further information. + +Once you have a `credentials.json`, you can access GCS locally using one of +these methods: + +- Run [MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html) + using Docker + ```bash + docker run --publish 9000:9000 --name gateway \ + --volume /path/to/credentials.json:/credentials.json \ + --env "GOOGLE_APPLICATION_CREDENTIALS=/credentials.json" \ + --env "MINIO_ACCESS_KEY=minioaccountname" \ + --env "MINIO_SECRET_KEY=minioaccountkey" \ + minio/minio gateway gcs + ``` +- Run [MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html) + using the MinIO Binary + ```bash + export GOOGLE_APPLICATION_CREDENTIALS=/path/to/credentials.json + export MINIO_ACCESS_KEY=minioaccesskey + export MINIO_SECRET_KEY=miniosecretkey + minio gateway gcs + ``` + +#### Test Using MinIO Browser + +MinIO Gateway comes with an embedded web-based object browser. After using one +of the aforementioned methods to run the MinIO Gateway, you can test that MinIO +Gateway is running, open a web browser, navigate to http://127.0.0.1:9000, and +ensure that the object browser is displayed and can access the remote object +storage. + +### Disabling HTTPS for S3 and Minio backups + +By default, Dgraph assumes the destination bucket is using HTTPS. If that is not +the case, the backup will fail. To send a backup to a bucket using HTTP +(insecure), set the query parameter `secure=false` with the destination endpoint +in the `destination` field: + +```graphql +mutation { + backup( + input: { destination: "minio://127.0.0.1:9000/?secure=false" } + ) { + response { + message + code + } + taskId + } +} +``` + +### Overriding Credentials + +The `accessKey`, `secretKey`, and `sessionToken` parameters can be used to +override the default credentials. Please note that unless HTTPS is used, the +credentials will be transmitted in plain text so use these parameters with +discretion. The environment variables should be used by default but these +options are there to allow for greater flexibility. + +The `anonymous` parameter can be set to "true" to allow backing up to S3 or +MinIO bucket that requires no credentials (i.e a public bucket). + +### Backup to NFS + +```graphql +mutation { + backup(input: { destination: "/path/to/local/directory" }) { + response { + message + code + } + taskId + } +} +``` + +A local filesystem will work only if all the Alpha servers have access to it +(e.g all the Alpha servers are running on the same filesystems as a normal +process, not a Docker container). However, an NFS is recommended so that backups +work seamlessly across multiple machines and/or containers. + +### Forcing a Full Backup + +By default, an incremental backup will be created if there's another full backup +in the specified location. To create a full backup, set the `forceFull` field to +`true` in the mutation. Each series of backups can be identified by a unique ID +and each backup in the series is assigned a monotonically increasing number. The +following section contains more details on how to restore a backup series. + +```graphql +mutation { + backup(input: { destination: "/path/to/local/directory", forceFull: true }) { + response { + message + code + } + taskId + } +} +``` + +## Listing Backups + +The GraphQL admin interface includes the `listBackups` endpoint that lists the +backups in the given location along with the information included in the +`manifest.json` file. An example of a request to list the backups in the +`/data/backup` location is included below: + +``` +query backup() { + listBackups(input: {location: "/data/backup"}) { + backupId + backupNum + encrypted + groups { + groupId + predicates + } + path + since + type + } +} +``` + +The listBackups input can contain the following fields. Only the `location` +field is required. + +``` +input ListBackupsInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + location: String! + + """ + Access key credential for the destination. + """ + accessKey: String + + """ + Secret key credential for the destination. + """ + secretKey: String + + """ + AWS session token, if required. + """ + sessionToken: String + + """ + Whether the destination doesn't require credentials (e.g. S3 public bucket). + """ + anonymous: Boolean +} +``` + +The output is of `[Manifest]` type. The fields inside the `Manifest` type +corresponds to the fields in the `manifest.json` file. + +``` +type Manifest { + """ + Unique ID for the backup series. + """ + backupId: String + + """ + Number of this backup within the backup series. The full backup always has a value of one. + """ + backupNum: Int + + """ + Whether this backup was encrypted. + """ + encrypted: Boolean + + """ + List of groups and the predicates they store in this backup. + """ + groups: [BackupGroup] + + """ + Path to the manifest file. + """ + path: String + + """ + The timestamp at which this backup was taken. The next incremental backup will + start from this timestamp. + """ + since: Int + + """ + The type of backup, either full or incremental. + """ + type: String +} + +type BackupGroup { + """ + The ID of the cluster group. + """ + groupId: Int + + """ + List of predicates assigned to the group. + """ + predicates: [String] +} +``` + +### Automating Backups + +You can use the provided endpoint to automate backups, however, there are a few +things to keep in mind. + +- The requests should go to a single Alpha server. The Alpha server that + receives the request is responsible for looking up the location and + determining from which point the backup should resume. + +- Versions of Dgraph starting with v20.07.1, v20.03.5, and v1.2.7 have a way to + block multiple backup requests going to the same Alpha server. For previous + versions, keep this in mind and avoid sending multiple requests at once. This + is for the same reason as the point above. + +- You can have multiple backup series in the same location although the feature + still works if you set up a unique location for each series. + +## Export Backups + +The `export_backup` tool lets you convert a binary backup into an exported +folder. + +If you need to upgrade between two major Dgraph versions that have incompatible +changes, you can use the `export_backup` tool to apply changes (either to the +exported `.rdf` file or to the schema file), and then import back the dataset +into the new Dgraph version. + +### Using exports instead of binary backups + +An example of this use-case would be to migrate existing schemas from Dgraph +v1.0 to Dgraph latest. You need to update the schema file from an export so all +predicates of type `uid` are changed to `[uid]`. Then use the updated schema +when loading data into Dgraph latest. + +For example, for the following schema: + +``` +name: string . +friend: uid . +``` + +becomes + +``` +name: string . +friend: [uid] . +``` + +Because you have to do a modification to the schema itself, you need an export. +You can use the `export_backup` tool to convert your binary backup into an +export folder. + +### Binary Backups and Exports folders + +A Binary Backup directory has the following structure: + +```sh +backup +├── dgraph.20210102.204757.509 +│ └── r9-g1.backup +├── dgraph.20210104.224757.707 +│ └── r9-g1.backup +└── manifest.json +``` + +An Export directory has the following structure: + +```sh +dgraph.r9.u0108.1621 +├── g01.gql_schema.gz +├── g01.rdf.gz +└── g01.schema.gz +``` + +If you want to do the changes cited above, you need to edit the `g01.schema.gz` +file. + +### Benefits + +With the `export_backup` tool you get the speed benefit from the binary backups, +which are faster than regular exports. So if you have a big dataset, you don't +need to wait a long time until an export is completed. Instead, just take a +binary backup and convert it to an export only when needed. + +### How to use it + +Ensure that you have created a binary backup. The directory tree of a binary +backup usually looks like this: + +```sh +backup +├── dgraph.20210104.224757.709 +│ └── r9-g1.backup +└── manifest.json +``` + +Then run the following command: + +```sh +dgraph export_backup --location "" --destination "" +``` + +Once completed you will find your export folder (in this case +`dgraph.r9.u0108.1621`). The tree of the directory should look like this: + +```sh +dgraph.r9.u0108.1621 +├── g01.gql_schema.gz +├── g01.rdf.gz +└── g01.schema.gz +``` + +## Encrypted Backups + +Encrypted backups are an Enterprise feature that are available from `v20.03.1` +and `v1.2.3` and allow you to encrypt your backups and restore them. This +documentation describes how to implement encryption into your binary backups. + +Starting with` v20.07.0`, we also added support for Encrypted Backups using +encryption keys sitting on [Hashicorp Vault](https://www.vaultproject.io/). + +### New `Encrypted` flag in manifest.json + +A new `Encrypted` flag is added to the `manifest.json`. This flag indicates if +the corresponding binary backup is encrypted or not. To be backward compatible, +if this flag is absent, it is presumed that the corresponding backup is not +encrypted. + +For a series of full and incremental backups, per the current design, we don't +allow the mixing of encrypted and unencrypted backups. As a result, all full and +incremental backups in a series must either be encrypted fully or not at all. +This flag helps with checking this restriction. + +### AES And Chaining with Gzip + +If encryption is turned on an Alpha server, then we use the configured +encryption key. The key size (16, 24, 32 bytes) determines AES-128/192/256 +cipher chosen. We use the AES CTR mode. Currently, the binary backup is already +gzipped. With encryption, we will encrypt the gzipped data. + +During **backup**: the 16 bytes IV is prepended to the Cipher-text data after +encryption. + +### Backup + +Backup is an online tool, meaning it is available when Dgraph Alpha server is +running. For encrypted backups, the Dgraph Alpha server must be configured with +the `--encryption key-file=value`. Starting with v20.07.0, the Dgraph Alpha +server can alternatively be configured to interface with a +[Hashicorp Vault](https://www.vaultproject.io/) server to obtain keys. + + + `encryption key-file=value` flag or `vault` superflag was used for + encryption-at-rest and will now also be used for encrypted backups. + + +## Online restore + +To restore from a backup to a live cluster, execute a mutation on the `/admin` +endpoint with the following format: + +```graphql +mutation { + restore( + input: { + location: "/path/to/backup/directory" + backupId: "id_of_backup_to_restore" + } + ) { + message + code + } +} +``` + +Online restores only require you to send this request. The `UID` and timestamp +leases are updated accordingly. The latest backup to be restored should contain +the same number of groups in its `manifest.json` file as the cluster to which it +is being restored. + + + When using backups made from a Dgraph cluster that uses encryption (so backups + are encrypted), you need to use the same key from that original cluster when + doing a restore process. Dgraph's [Encryption at + Rest](./enterprise-features/encryption-at-rest) uses a symmetric-key algorithm + where the same key is used for both encryption and decryption, so the + encryption key from that cluster is needed for the restore process. + + +Online restore can be performed from Amazon S3 / Minio or from a local +directory. Below is the documentation for the fields inside `RestoreInput` that +can be passed into the mutation. + +```graphql +input RestoreInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + location: String! + + """ + Backup ID of the backup series to restore. This ID is included in the manifest.json file. + If missing, it defaults to the latest series. + """ + backupId: String + + """ + Number of the backup within the backup series to be restored. Backups with a greater value + will be ignored. If the value is zero or is missing, the entire series will be restored. + """ + backupNum: Int + + """ + Path to the key file needed to decrypt the backup. This file should be accessible + by all Alpha servers in the group. The backup will be written using the encryption key + with which the cluster was started, which might be different than this key. + """ + encryptionKeyFile: String + + """ + Vault server address where the key is stored. This server must be accessible + by all Alpha servers in the group. Default "http://localhost:8200". + """ + vaultAddr: String + + """ + Path to the Vault RoleID file. + """ + vaultRoleIDFile: String + + """ + Path to the Vault SecretID file. + """ + vaultSecretIDFile: String + + """ + Vault kv store path where the key lives. Default "secret/data/dgraph". + """ + vaultPath: String + + """ + Vault kv store field whose value is the key. Default "enc_key". + """ + vaultField: String + + """ + Vault kv store field's format. Must be "base64" or "raw". Default "base64". + """ + vaultFormat: String + + """ + Access key credential for the destination. + """ + accessKey: String + + """ + Secret key credential for the destination. + """ + secretKey: String + + """ + AWS session token, if required. + """ + sessionToken: String + + """ + Set to true to allow backing up to S3 or Minio bucket that requires no credentials. + """ + anonymous: Boolean + + """ + All the backups with num >= incrementalFrom will be restored. + """ + incrementalFrom: Int + + """ + If `isPartial` is set to true then the cluster is kept in draining mode after + restore to ensure that the database is not corrupted by any mutations or tablet moves in + between two restores. + """ + isPartial: Boolean +} +``` + +Restore requests returns immediately without waiting for the operation to +finish. + +## Incremental Restore + +You can use incremental restore to restore a set of incremental backups on a +cluster with a part of the backup already restored. The system does not accept +any mutations made between a normal restore and an incremental restore, because +the cluster is in the draining mode. When the cluster is in a draining mode only +an admin request to bring the cluster back to normal mode is accepted. + +Note: Before you start an incremental restore ensure that you set `isPartial` to +`true` in your normal restore. + +To incrementally restore from a backup to a live cluster, execute a mutation on +the `/admin` endpoint with the following format: + +```graphql +mutation{ + restore(input:{ + incrementalFrom:"incremental_backup_from", + location: "/path/to/backup/directory", + backupId: "id_of_backup_to_restore"' + }){ + message + code + } +} +``` + +## Namespace Aware Restore + +You can use namespace-aware restore to restore a single namespace from a backup +that contains multiple namespaces. The created restore will be available in the +default namespace. For example, if you restore namespace 2 using the +restoreTenant API, then after the restore operation is completed, the cluster +will have only the default namespace, and it will contain the data from +namespace 2. Namespace aware restore supports incremental restore. + +To restore from a backup to a live cluster, execute a mutation on the `/admin` +endpoint with the following format: + +```graphql +mutation { + restoreTenant( + input: { + restoreInput: { + incrementalFrom: "incremental_backup_from" + location: "/path/to/backup/directory" + backupId: "id_of_backup_to_restore" + } + fromNamespace: namespaceToBeRestored + } + ) { + message + code + } +} +``` + +Documentation of restoreTenant inputs + +``` +input RestoreTenantInput { + """ + restoreInput contains fields that are required for the restore operation, + i.e., location, backupId, and backupNum + """ + restoreInput: RestoreInput + + """ + fromNamespace is the namespace of the tenant that needs to be restored into namespace 0 of the new cluster. + """ + fromNamespace: Int! +} +``` + +## Offline restore (DEPRECATED) + +The restore utility is now a standalone tool. A new flag, +`--encryption key-file=value`, is now part of the restore utility, so you can +use it to decrypt the backup. The file specified using this flag must contain +the same key that was used for encryption during backup. Alternatively, starting +with `v20.07.0`, the `vault` superflag can be used to restore a backup. + +You can use the `dgraph restore` command to restore the postings directory from +a previously-created backup to a directory in the local filesystem. This command +restores a backup to a new Dgraph cluster, so it is not designed to restore a +backup to a Dgraph cluster that is currently live. During a restore operation, a +new Dgraph Zero server might run to fully restore the backup state. + +You can use the `--location` (`-l`) flag to specify a source URI with Dgraph +backup objects. This URI supports all the schemes used for backup. + +You can use the `--postings` (`-p`) flag to set the directory where restored +posting directories are saved. This directory contains a posting directory for +each group in the restored backup. + +You can use the `--zero` (`-z`) flag to specify a Dgraph Zero server address to +update the start timestamp and UID lease using the restored version. If no +Dgraph Zero server address is passed, the command will complain unless you set +the value of the `--force_zero` flag to false. If do not pass a zero value to +this command, you need to manually update the timestamp and UID lease using the +Dgraph Zero server's HTTP 'assign' endpoint. The updated values should be those +that are printed near the end of the command's output. + +You use the `--backup_id` optional flag to specify the ID of the backup series +to restore. A backup series consists of a full backup and all of the incremental +backups built on top of it. Each time a new full backup is created, a new backup +series with a different ID is started. The backup series ID is stored in each +`manifest.json` file stored in each backup folder. + +You use the `--encryption key-file=value` flag in cases where you took the +backup in an encrypted cluster. The string for this flag must point to the +location of the same key file used to run the cluster. + +You use the `--vault` [superflag](./deploy/cli-command-reference) to specify the +[Hashicorp Vault](https://www.vaultproject.io/) server address (`addr`), role id +(`role-id-file`), secret id (`secret-id-file`) and the field that contains the +encryption key (`enc-field`) that was used to encrypt the backup. + +The restore feature creates a cluster with as many groups as the original +cluster had at the time of the last backup. For each group, `dgraph restore` +creates a posting directory (`p`) that corresponds to the backup group ID. +For example, a backup for Dgraph Alpha group 2 would have the name +`.../r32-g2.backup` and would be loaded to posting directory `p2`. + +After running the restore command, the directories inside the `postings` +directory need to be manually copied over to the machines/containers running the +Dgraph Alpha servers before running the `dgraph alpha` command. For example, in +a database cluster with two Dgraph Alpha groups and one replica each, `p1` needs +to be moved to the location of the first Dgraph Alpha node and `p2` needs to be +moved to the location of the second Dgraph Alpha node. + +By default, Dgraph will look for a posting directory with the name `p`, so make +sure to rename the directories after moving them. You can also use the `-p` +option of the `dgraph alpha` command to specify a different path from the +default. + +### Restore from Amazon S3 + +```sh +dgraph restore --postings "/var/db/dgraph" --location "s3://s3..amazonaws.com/" +``` + +### Restore from MinIO + +```sh +dgraph restore --postings "/var/db/dgraph" --location "minio://127.0.0.1:9000/" +``` + +### Restore from Local Directory or NFS + +```sh +dgraph restore --postings "/var/db/dgraph" --location "/var/backups/dgraph" +``` + +### Restore and Update Timestamp + +Specify the Zero server address and port for the new cluster with `--zero`/`-z` +to update the timestamp. + +```sh +dgraph restore --postings "/var/db/dgraph" --location "/var/backups/dgraph" --zero "localhost:5080" +``` diff --git a/dgraph/reference/enterprise-features/change-data-capture.mdx b/dgraph/reference/enterprise-features/change-data-capture.mdx new file mode 100644 index 00000000..c95967a3 --- /dev/null +++ b/dgraph/reference/enterprise-features/change-data-capture.mdx @@ -0,0 +1,157 @@ +--- +title: Change Data Capture +description: + With a Dgraph enterprise license, you can use Dgraph's change data capture + (CDC) capabilities to track data changes over time. +--- + +With a Dgraph [enterprise license](./enterprise-features/license), you can use +change data capture (CDC) to track data changes over time; including mutations +and drops in your database. Dgraph's CDC implementation lets you use Kafka or a +local file as a _sink_ to store CDC updates streamed by Dgraph Alpha leader +nodes. + +When CDC is enabled, Dgraph streams events for all `set` and `delete` mutations, +except those that affect password fields; along with any drop events. Live +Loader events are recorded by CDC, but Bulk Loader events aren't. + +CDC events are based on changes to Raft logs. So, if the sink is not reachable +by the Alpha leader node, then Raft logs expand as events are collected on that +node until the sink is available again. You should enable CDC on all Dgraph +Alpha nodes to avoid interruptions in the stream of CDC events. + +## Enable CDC with Kafka sink + +Kafka records CDC events under the `dgraph-cdc` topic. The topic must be created +before events are sent to the broker. To enable CDC and sink events to Kafka, +start Dgraph Alpha with the `--cdc` command and the sub-options shown below, as +follows: + +```bash +dgraph alpha --cdc "kafka=kafka-hostname:port; sasl-user=tstark; sasl-password=m3Ta11ic" +``` + +If you use Kafka on the localhost without SASL authentication, you can just +specify the hostname and port used by Kafka, as follows: + +```bash +dgraph alpha --cdc "localhost:9092" +``` + +If the Kafka cluster to which you are connecting requires TLS, the `ca-cert` +option is required. Note that this certificate can be self-signed. + +## Enable CDC with file sink + +To enable CDC and sink results to a local unencrypted file, start Dgraph Alpha +with the `--cdc` command and the sub-option shown below, as follows: + +```bash +dgraph alpha --cdc "file=local-file-path" +``` + +## CDC command reference + +The `--cdc` option includes several sub-options that you can use to configure +CDC when running the `dgraph alpha` command: + +| Sub-option | Example `dgraph alpha` command option | Notes | +| ---------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------ | +| `tls` | `--tls=false` | boolean flag to enable/disable TLS while connecting to Kafka. | +| `ca-cert` | `--cdc "ca-cert=/cert-dir/ca.crt"` | Path and filename of the CA root certificate used for TLS encryption, if not specified, Dgraph uses system certs if `tls=true` | +| `client-cert` | `--cdc "client-cert=/c-certs/client.crt"` | Path and filename of the client certificate used for TLS encryption | +| `client-key` | `--cdc "client-cert=/c-certs/client.key"` | Path and filename of the client certificate private key | +| `file` | `--cdc "file=/sink-dir/cdc-file"` | Path and filename of a local file sink (alternative to Kafka sink) | +| `kafka` | `--cdc "kafka=kafka-hostname; sasl-user=tstark; sasl-password=m3Ta11ic"` | Hostname(s) of the Kafka hosts. May require authentication using the `sasl-user` and `sasl-password` sub-options | +| `sasl-user` | `--cdc "kafka=kafka-hostname; sasl-user=tstark; sasl-password=m3Ta11ic"` | SASL username for Kafka. Requires the `kafka` and `sasl-password` sub-options | +| `sasl-password` | `--cdc "kafka=kafka-hostname; sasl-user=tstark; sasl-password=m3Ta11ic"` | SASL password for Kafka. Requires the `kafka` and `sasl-username` sub-options | +| `sasl-mechanism` | `--cdc "kafka=kafka-hostname; sasl-mechanism=PLAIN"` | The SASL mechanism for Kafka (PLAIN, SCRAM-SHA-256 or SCRAM-SHA-512). The default is PLAIN | + +## CDC data format + +CDC events are in JSON format. Most CDC events look like the following example: + +```json +{ + "key": "0", + "value": { + "meta": { "commit_ts": 5 }, + "type": "mutation", + "event": { + "operation": "set", + "uid": 2, + "attr": "counter.val", + "value": 1, + "value_type": "int" + } + } +} +``` + +The `Meta.Commit_Ts` value (shown above as `"meta":{"commit_ts":5}`) will +increase with each CDC event, so you can use this value to find duplicate events +if those occur due to Raft leadership changes in your Dgraph Alpha group. + +### Mutation event examples + +A set mutation event updating `counter.val` to 10 would look like the following: + +```json +{ + "meta": { "commit_ts": 29 }, + "type": "mutation", + "event": { + "operation": "set", + "uid": 3, + "attr": "counter.val", + "value": 10, + "value_type": "int" + } +} +``` + +Similarly, a delete mutation event that removes all values for the `Author.name` +field for a specified node would look like the following: + +```json +{ + "meta": { "commit_ts": 44 }, + "type": "mutation", + "event": { + "operation": "del", + "uid": 7, + "attr": "Author.name", + "value": "_STAR_ALL", + "value_type": "default" + } +} +``` + +### Drop event examples + +CDC drop events look like the following example event for "drop all": + +```json +{ "meta": { "commit_ts": 13 }, "type": "drop", "event": { "operation": "all" } } +``` + +The `operation` field specifies which drop operation (`attribute`, `type`, +specified `data`, or `all` data) is tracked by the CDC event. + +## CDC and multi-tenancy + +When you enable CDC in a [multi-tenant environment](./multitenancy), CDC events +streamed to Kafka are distributed by the Kafka client. It distributes events +between the available Kafka partitions based on their multi-tenancy namespace. + +## Known limitations + +CDC has the following known limitations: + +- CDC events do not track old values that are updated or removed by mutation or + drop operations; only new values are tracked +- CDC does not currently track schema updates +- You can only configure or enable CDC when starting Alpha nodes using the + `dgraph alpha` command +- If a node crashes or the leadership of a Raft group changes, CDC might have + duplicate events, but no data loss diff --git a/dgraph/reference/enterprise-features/encryption-at-rest.mdx b/dgraph/reference/enterprise-features/encryption-at-rest.mdx new file mode 100644 index 00000000..f7b72d48 --- /dev/null +++ b/dgraph/reference/enterprise-features/encryption-at-rest.mdx @@ -0,0 +1,176 @@ +--- +title: Encryption at Rest +--- + + + This feature was introduced in + [v1.1.1](https://github.com/dgraph-io/dgraph/releases/tag/v1.1.1). For + migrating unencrypted data to a new Dgraph cluster with encryption enabled, + you need to [export the database](./dgraph-administration.md#export-database) + and [import data](./about_import), preferably using the [bulk + loader](./bulk-loader). + + +Encryption at rest refers to the encryption of data that is stored physically in +any digital form. It ensures that sensitive data on disks is not readable by any +user or application without a valid key that is required for decryption. Dgraph +provides encryption at rest as an enterprise feature. If encryption is enabled, +Dgraph uses +[Advanced Encryption Standard (AES)](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) +algorithm to encrypt the data and secure it. + +Prior to v20.07.0, the encryption key file must be present on the local file +system. Starting with +[v20.07.0](https://github.com/dgraph-io/dgraph/releases/tag/v20.07.0), we have +added support for encryption keys sitting on Vault servers. This allows an +alternate way to configure the encryption keys needed for encrypting the data at +rest. + +## Set up Encryption + +To enable encryption, we need to pass a file that stores the data encryption key +with the option `--encryption key-file=value`. The key size must be 16, 24, or +32 bytes long, and the key size determines the corresponding block size for AES +encryption ,i.e. AES-128, AES-192, and AES-256, respectively. + +You can use the following command to create the encryption key file (set _count_ +to the desired key size): + +```bash +tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file +``` + + On a macOS you may have to use +`LC_CTYPE=C; tr -dc 'a-zA-Z0-9' < /dev/urandom | dd bs=1 count=32 of=enc_key_file`. +To view the key use `cat enc_key_file`. Alternatively, you can +use the `--vault` [superflag's](./deploy/cli-command-reference) options to +enable encryption, as +[explained below](#example-using-dgraph-cli-with-hashicorp-vault-configuration). + +## Turn on Encryption + +Here is an example that starts one Zero server and one Alpha server with the +encryption feature turned on: + +```bash +dgraph zero --my="localhost:5080" --replicas 1 --raft "idx=1" +dgraph alpha --encryption key-file="./enc_key_file" --my="localhost:7080" --zero="localhost:5080" +``` + +If multiple Alpha nodes are part of the cluster, you will need to pass the +`--encryption key-file` option to each of the Alphas. + +Once an Alpha has encryption enabled, the encryption key must be provided in +order to start the Alpha server. If the Alpha server restarts, the +`--encryption key-file` option must be set along with the key in order to +restart successfully. + +### Storing encryption key secret in Hashicorp Vault + +You can save the encryption key secret in +[Hashicorp Vault](https://www.vaultproject.io/) K/V Secret instead of as file on +Dgraph Alpha. + +To use [Hashicorp Vault](https://www.vaultproject.io/), meet the following +prerequisites for the Vault Server. + +1. Ensure that the Vault server is accessible from Dgraph Alpha and configured + using URL `http://fqdn[ip]:port`. +2. Enable [AppRole Auth method](https://www.vaultproject.io/docs/auth/approle) + and enable [KV Secrets Engine](https://www.vaultproject.io/docs/secrets/kv). +3. Save the value of the key (16, 24, or 32 bytes long) that Dgraph Alpha will + use in a KV Secret path + ([K/V Version 1](https://www.vaultproject.io/docs/secrets/kv/kv-v1) or + [K/V Version 2](https://www.vaultproject.io/docs/secrets/kv/kv-v2)). For + example, you can upload this below to KV Secrets Engine Version 2 path of + `secret/data/dgraph/alpha`: + ```json + { + "options": { + "cas": 0 + }, + "data": { + "enc_key": "qIvHQBVUpzsOp74PmMJjHAOfwIA1e6zm%" + } + } + ``` +4. Create or use a role with an attached policy that grants access to the + secret. For example, the following policy would grant access to + `secret/data/dgraph/alpha`: + ```hcl + path "secret/data/dgraph/*" { + capabilities = [ "read", "update" ] + } + ``` +5. Using the `role_id` generated from the previous step, create a corresponding + `secret_id`, and copy the `role_id` and `secret_id` over to local files, like + `./dgraph/vault/role_id` and `./dgraph/vault/secret_id`, that will be used by + Dgraph Alpha nodes. + + + To learn more about the above steps, see [Dgraph Vault Integration: + Docker](https://github.com/dgraph-io/dgraph/blob/main/contrib/config/vault/docker/README.md). + + + + The key format for the `enc-field` option can be defined using `enc-format` + with the values `base64` (default) or `raw`. + + +### Example using Dgraph CLI with Hashicorp Vault configuration + +The following example shows how to use Dgraph with a Vault server that holds the +encryption key: + +```bash +## Start Dgraph Zero in different terminal tab or window +dgraph zero --my=localhost:5080 --replicas 1 --raft "idx=1" + +## Start Dgraph Alpha in different terminal tab or window +dgraph alpha --my="localhost:7080" --zero="localhost:5080" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` + +If multiple Dgraph Alpha nodes are part of the cluster, you must pass the +`--encryption key-file` flag or the `--vault` superflag with appropriate +superflag options to each of the Dgraph Alpha nodes. + +After an Alpha node has encryption enabled, you must provide the encryption key +to start the Alpha server. If the Alpha server restarts, the +`--encryption key-file` or the `--vault` superflag's options must be set along +with the key to restart successfully. + +## Turn off Encryption + +You can use [live loader](./live-loader) or [bulk loader](./bulk-loader) to +decrypt the data while importing. + +## Change Encryption Key + +The master encryption key set by the `--encryption key-file` option (or one used +in Vault KV store) does not change automatically. The master encryption key +encrypts underlying _data keys_ which are changed on a regular basis +automatically (more info about this is covered on the encryption-at-rest +[blog][encblog] post). + +[encblog]: + https://dgraph.io/blog/post/encryption-at-rest-dgraph-badger#one-key-to-rule-them-all-many-keys-to-find-them + +Changing the existing key to a new one is called key rotation. You can rotate +the master encryption key by using the `badger rotate` command on both p and w +directories for each Alpha. To maintain availability in HA cluster +configurations, you can do this rotate the key one Alpha at a time in a rolling +manner. + +You'll need both the current key and the new key in two different files. Specify +the directory you rotate ("p" or "w") for the `--dir` flag, the old key for the +`--old-key-path` flag, and the new key with the `--new-key-path` flag. + +``` +badger rotate --dir p --old-key-path enc_key_file --new-key-path new_enc_key_file +badger rotate --dir w --old-key-path enc_key_file --new-key-path new_enc_key_file +``` + +Then, you can start Alpha with the `new_enc_key_file` key file to use the new +key. diff --git a/dgraph/reference/enterprise-features/index.mdx b/dgraph/reference/enterprise-features/index.mdx new file mode 100644 index 00000000..84d565f6 --- /dev/null +++ b/dgraph/reference/enterprise-features/index.mdx @@ -0,0 +1,76 @@ +--- +title: Enterprise Features +--- + + diff --git a/dgraph/reference/enterprise-features/learner-nodes.mdx b/dgraph/reference/enterprise-features/learner-nodes.mdx new file mode 100644 index 00000000..9baa0f8e --- /dev/null +++ b/dgraph/reference/enterprise-features/learner-nodes.mdx @@ -0,0 +1,92 @@ +--- +title: Learner Nodes +description: + Learner nodes let you spin-up read-only replica instance to serve best-effort + queries faster +--- + +A Learner node is an enterprise-only feature that allows a user to spin-up a +read-only replica instance across the world without paying a latency cost. When +enabled, a Dgraph cluster using learner nodes can serve best-effort queries +faster. + +A "learner node" can still accept write operations. The node forwards them over +to the Alpha group leader and does the writing just like a typical Alpha node. +It will just be slower, depending on the latency between the Alpha node and the +learner node. + + + A learner node instance can forward `/admin` operations and perform both read + and write operations, but writing will incur in network call latency to the + main cluster. + + +## Set up a Learner node + +The learner node feature works at the Dgraph Alpha group level. To use it, first +you need to set up an Alpha instance as a learner node. Once the learner +instance is up, this replica can be used to run best-effort queries with zero +latency overhead. Because it's an Enterprise feature, a learner node won't be +able to connect to a Dgraph Zero node until the Zero node has a valid license. + +To spin up a learner node, first make sure that you start all the nodes, +including the Dgraph Zero leader and the Dgraph Alpha leader, with the `--my` +flag so that these nodes will be accessible to the learner node. Then, start an +Alpha instance as follows: + +```sh +dgraph alpha --raft="learner=true; group=N" --my :5080 +``` + +This allows the new Alpha instance to get all the updates from the group "N" +leader without participating in the Raft elections. + + + You must specify the `--my` flag to set the IP address and port of Dgraph + Zero, the Dgraph Alpha leader node, and the learner node. If you don't, you + will get an error similar to the following: `Error during SubscribeForUpdates` + + +## Best-effort Queries + +Regular queries use the strict consistency model, and any write operation to the +cluster anywhere would be read immediately. + +Best-effort queries apply the eventual consistency model. A write to the cluster +will be seen eventually to the node. In regular conditions, the eventual +consistency is usually achieved quickly. + +A best-effort query to a learner node returns any data that is already available +in that learner node. The response is still a valid data snapshot, but at a +timestamp which is not the latest one. + + + Best-effort queries won't be forwarded to a Zero node to get the latest + timestamp. + + +You can still send typical read queries (strict consistency) to a learner node. +They would just incur an extra latency cost due to having to reach out the Zero +leader. + + + If the learner node needs to serve normal queries, at least one Alpha leader + must be available. + + +## Use-case examples + +### Geographic distribution + +Consider this scenario: + +_You want to achieve low latency for clients in a remote geographical region, +distant from your Dgraph cluster._ + +You can address this need by using a learner node to run best-effort queries. +This read-only replica instance can be across distant geographies and you can +use best-effort queries to get instant responses. + +Because learner nodes support read and write operations, users in the remote +location can do everything with this learner node, as if they were working with +the full cluster. diff --git a/dgraph/reference/enterprise-features/license.mdx b/dgraph/reference/enterprise-features/license.mdx new file mode 100644 index 00000000..49a58bcd --- /dev/null +++ b/dgraph/reference/enterprise-features/license.mdx @@ -0,0 +1,56 @@ +--- +title: License +--- + +Dgraph enterprise features are proprietary licensed under the [Dgraph Community +License][dcl]. All Dgraph releases contain proprietary code for enterprise +features. Enabling these features requires an enterprise contract from +[contact@dgraph.io](mailto:contact@dgraph.io) or the +[discuss forum](https://discuss.dgraph.io). + +**Dgraph enterprise features are enabled by default for 30 days in a new +cluster**. After the trial period of thirty (30) days, the cluster must obtain a +license from Dgraph to continue using the enterprise features released in the +proprietary code. + + + At the conclusion of your 30-day trial period if a license has not been + applied to the cluster, access to the enterprise features will be suspended. + The cluster will continue to operate without enterprise features. + + +When you have an enterprise license key, the license can be applied to the +cluster by including it as the body of a POST request and calling +`/enterpriseLicense` HTTP endpoint on any Zero server. + +```sh +curl -X POST localhost:6080/enterpriseLicense --upload-file ./licensekey.txt +``` + +It can also be applied by passing the path to the enterprise license file (using +the flag `--enterprise_license`) to the `dgraph zero` command used to start the +server. The second option is useful when the process needs to be automated. + +```sh +dgraph zero --enterprise_license ./licensekey.txt +``` + +**Warning messages related to license expiry** + +Dgraph will print a warning message in the logs when your license is about to +expire. If you are planning to implement any log monitoring solution, you may +note this pattern and configure suitable alerts for yourself. You can find an +example of this message below: + +```sh +Your enterprise license will expire in 6 days from now. To continue using enterprise features after 6 days from now, apply a valid license. To get a new license, contact us at https://dgraph.io/contact. +``` + +Once your license has expired, you will see the following warning message in the +logs. + +```sh +Your enterprise license has expired and enterprise features are disabled. To continue using enterprise features, apply a valid license. To receive a new license, contact us at https://dgraph.io/contact. +``` + +[dcl]: https://github.com/dgraph-io/dgraph/blob/main/licenses/DCL.txt diff --git a/dgraph/reference/enterprise-features/lsbackup.mdx b/dgraph/reference/enterprise-features/lsbackup.mdx new file mode 100644 index 00000000..82a7a81e --- /dev/null +++ b/dgraph/reference/enterprise-features/lsbackup.mdx @@ -0,0 +1,179 @@ +--- +title: Backup List Tool +--- + +The `lsbackup` command-line tool prints information about the stored backups in +a user-defined location. + +## Parameters + +The `lsbackup` command has two flags: + +```txt +Flags: + -h, --help help for lsbackup + -l, --location string Sets the source location URI (required). + --verbose Outputs additional info in backup list. +``` + +- `--location`: indicates a [source URI](#source-uri) with Dgraph backup + objects. This URI supports all the schemes used for backup. +- `--verbose`: if enabled will print additional information about the selected + backup. + +For example, you can execute the `lsbackup` command as follows: + +```sh +dgraph lsbackup -l +``` + +### Source URI + +Source URI formats: + +- `[scheme]://[host]/[path]?[args]` +- `[scheme]:///[path]?[args]` +- `/[path]?[args]` (only for local or NFS) + +Source URI parts: + +- `scheme`: service handler, one of: `s3`, `minio`, `file` +- `host`: remote address; e.g.: `dgraph.s3.amazonaws.com` +- `path`: directory, bucket or container at target; e.g.: `/dgraph/backups/` +- `args`: specific arguments that are ok to appear in logs + +## Output + +The following snippet is an example output of `lsbackup`: + +```json +[ + { + "path": "/home/user/Dgraph/20.11/backup/manifest.json", + "since": 30005, + "backup_id": "reverent_vaughan0", + "backup_num": 1, + "encrypted": false, + "type": "full" + } +] +``` + +If the `--verbose` flag was enabled, the output would look like this: + +```json +[ + { + "path": "/home/user/Dgraph/20.11/backup/manifest.json", + "since": 30005, + "backup_id": "reverent_vaughan0", + "backup_num": 1, + "encrypted": false, + "type": "full", + "groups": { + "1": [ + "dgraph.graphql.schema_created_at", + "dgraph.graphql.xid", + "dgraph.drop.op", + "dgraph.type", + "dgraph.cors", + "dgraph.graphql.schema_history", + "score", + "dgraph.graphql.p_query", + "dgraph.graphql.schema", + "dgraph.graphql.p_sha256hash", + "series" + ] + } + } +] +``` + +### Return values + +- `path`: Name of the backup + +- `since`: is the timestamp at which this backup was taken. It's called Since + because it will become the timestamp from which to backup in the next + incremental backup. + +- `groups`: is the map of valid groups to predicates at the time the backup was + created. This is printed only if `--verbose` flag is enabled + +- `encrypted`: Indicates whether this backup is encrypted or not + +- `type`: Indicates whether this backup is a full or incremental one + +- `drop_operation`: lists the various DROP operations that took place since the + last backup. These are used during restore to redo those operations before + applying the backup. (This is printed only if `--verbose` flag is enabled) + +- `backup_num`: is a monotonically increasing number assigned to each backup in + a series. The full backup as BackupNum equal to one and each incremental + backup gets assigned the next available number. This can be used to verify the + integrity of the data during a restore. + +- `backup_id`: is a unique ID assigned to all the backups in the same series. + +## Examples + +### S3 + +Checking information about backups stored in an AWS S3 bucket: + +```sh +dgraph lsbackup -l s3:///s3.us-west-2.amazonaws.com/dgraph_backup +``` + +You might need to set up access and secret key environment variables in the +shell (or session) you are going to run the `lsbackup` command. For example: + +``` +AWS_SECRET_ACCESS_KEY= +AWS_ACCESS_ID= +``` + +### MinIO + +Checking information about backups stored in a MinIO bucket: + +```sh +dgraph lsbackup -l minio://localhost:9000/dgraph_backup +``` + +In case the MinIO server is started without `tls`, you must specify that +`secure=false` as it set to `true` by default. You also need to set the +environment variables for the access key and secret key. + +In order to get the `lsbackup` running, you should following these steps: + +- Set `MINIO_ACCESS_KEY` as an environment variable for the running shell this + can be done with the following command: (`minioadmin` is the default access + key, unless is changed by the user) + + ``` + export MINIO_ACCESS_KEY=minioadmin + ``` + +- Set MINIO_SECRET_KEY as an environment variable for the running shell this can + be done with the following command: (`minioadmin` is the default secret key, + unless is changed by the user) + + ``` + export MINIO_SECRET_KEY=minioadmin + ``` + +- Add the argument `secure=false` to the `lsbackup command`, that means the + command will look like: (the double quotes `"` are required) + + ```sh + dgraph lsbackup -l "minio://localhost:9000/?secure=false" + ``` + +### Local + +Checking information about backups stored locally (on disk): + +```sh +dgraph lsbackup -l ~/dgraph_backup +``` diff --git a/dgraph/reference/enterprise-features/multitenancy.mdx b/dgraph/reference/enterprise-features/multitenancy.mdx new file mode 100644 index 00000000..0698d32a --- /dev/null +++ b/dgraph/reference/enterprise-features/multitenancy.mdx @@ -0,0 +1,333 @@ +--- +title: Multi-Tenancy +--- + +Multi-tenancy is an enterprise-only feature that allows various tenants to +co-exist in the same Dgraph Cluster using `uint64` namespaces. With +multi-tenancy, each tenant can only log into their own namespace and operate in +their own namespace. + + + Multi-tenancy is an enterprise feature and needs [Access Control + Lists](./access-control-lists) (ACL) enabled to work. + + +## Overview + +Multi-tenancy is built upon [Access Control Lists](./access-control-lists) +(ACL), and enables multiple tenants to share a Dgraph Cluster using unique +namespaces. The tenants are logically separated, and their data lies in the same +`p` directory. Each namespace has a group guardian, which has root access to +that namespace. + +The default namespace is called a `galaxy`. +[Guardians of the Galaxy](#guardians-of-the-galaxy) get special access to create +or delete namespaces and change passwords of users of other namespaces. + + + Dgraph provides a timeout limit per query that's configurable using the + `--limit` superflag's `query-limit` option. There's no time limit for queries + by default, but you can override it when running Dgraph Alpha. For + multi-tenant environments a suggested `query-limit` value is 500ms. + + +## FAQ + +- How access controls and policies are handled among different tenants? + + In previous versions of Dgraph, the + [Access Control Lists](/access-control-lists) (ACL) feature offered a unified + control solution across the entire database. With the new multi-tenancy + feature, the ACL policies are now scoped down to individual tenants in the + database. + + + Only super-admins ([Guardians of the galaxy](#guardians-of-the-galaxy)) have + access across tenants. The super admin is used only for database + administration operations, such as exporting data of all tenants. _Guardian_ + of the _Galaxy_ group cannot read across tenants. + + +- What's the ACL granularity in a multi-tenancy environment? Is it per tenant? + + The access controls are applied per tenant to either specific predicates or + all predicates that exist for the tenant. For example, the user `John Smith` + belonging to the group `Data Approvers` for a tenant `Accounting` may only + have read-only access over predicates while user `Jane Doe`, belonging to the + group `Data Editors` within that same tenant, may have access to modify those + predicates. All the ACL rules need to be defined for each tenant in your + backend. The level of granularity available allows for defining rules over + specific predicates or all predicates belonging to that tenant. + +- Are tenants a physical separation or a logical one? + + Tenants are a logical separation. In this example, data needs to be written + twice for 2 different tenants. Each client must authenticate within a tenant, + and can only modify data within the tenant as allowed by the configured ACLs. + +- Can data be copied from one tenant to the other? + + Yes, but not by breaking any ACL or tenancy constraints. This can be done by + exporting data from one tenant and importing data to another. + +## Namespace + +A multi-tenancy Namespace acts as a logical silo, so data stored in one +namespace is not accessible from another namespace. Each namespace has a group +guardian (with root access to that namespace), and a unique `uint64` identifier. +Users are members of a single namespace, and cross-namespace queries are not +allowed. + + + If a user wants to access multiple namespaces, the user needs to be created + separately for each namespace. + + +The default namespace (`0x00`) is called a `galaxy`. A +[Guardian of the Galaxy](#guardians-of-the-galaxy) has special access to create +or delete namespaces and change passwords of users of other namespaces. + +## Access Control Lists + +Multi-tenancy defines certain ACL roles for the shared Cluster: + +- [Guardians of the Galaxy](#guardians-of-the-galaxy) (Super Admins) +- Guardians of the namespace can perform the following operations: + + - create users and groups within the namespace + - assign users to groups within the namespace + - assign predicates to groups within the namespace + - add users to groups within the namespace + - export namespac + + e + + - drop data within the namespace + - query and mutate within the namespace + + + Guardians of the namespace cannot query or mutate across namespaces. + + +- Normal users can perform the following operations: + + - login into a namespace + - query within the namespace + - mutate within the namespace + + Normal users cannot query or mutate across namespaces. + +### Guardians of the Galaxy + +A _Guardian of the Galaxy_ is a Super Admin of the default namespace (`0x00`). + +As a super-admin, a _Guardian of the Galaxy_ can: + +- [Create](#create-a-namespace) and [delete](#delete-a-namespace) namespaces +- Reset the passwords +- Query and mutate the default namespace (`0x00`) +- Trigger Cluster-wide [backups](#backups) (no namespace-specific backup) +- Trigger Cluster-wide or namespace-specific [exports](#exports) (exports + contain information about the namespace) + +For example, if the user `rocket` is part of the _Guardians of the Galaxy_ group +(namespace `0x00`), he can only read/write on namespace `0x00`. + +## Create a Namespace + +Only members of the [Guardians of the Galaxy](#guardians-of-the-galaxy) group +can create a namespace. A namespace can be created by calling `/admin` with the +`addNamespace` mutation, and will return the assigned number for the new +namespace. + + + To create a namespace, the _Guardian_ must send the JWT access token in the + `X-Dgraph-AccessToken` header. + + +For example, to create a new namespace: + +```graphql +mutation { + addNamespace(input: { password: "mypass" }) { + namespaceId + message + } +} +``` + +By sending the mutation above, a namespace is created. A _Guardian group_ is +also automatically created for that namespace. A `groot` user with password +`mypass` (default is `password`) is created in the guardian group. You can then +use these credentials to login into the namespace and perform operations like +[`addUser`](./access-control-lists.md#create-a-regular-user). + +## List Namespaces + +Only members of the [Guardians of the Galaxy](#guardians-of-the-galaxy) group +can list active namespaces. You can check available namespaces using the +`/state` endpoint. + +For example, if you have a multi-tenant Cluster with multiple namespaces, as a +_Guardian of the Galaxy_ you can query `state` from GraphQL: + +```graphql +query { + state { + namespaces + } +} +``` + +In the response, namespaces that are available and active are listed. + +```json +{ + "data": { + "state": { + "namespaces": [2, 1, 0] + } + } +} +``` + +## Delete a Namespace + +Only members of the [Guardians of the Galaxy](#guardians-of-the-galaxy) group +can delete a namespace. A namespace can be dropped by calling `/admin` with the +`deleteNamespace` mutation. + + + To delete a namespace, the _Guardian_ must send the JWT access token in the + `X-Dgraph-AccessToken` header. + + +For example, to drop the namespace `123`: + +```graphql +mutation { + deleteNamespace(input: { namespaceId: 123 }) { + namespaceId + message + } +} +``` + + + Members of `namespace-guardians` can't delete namespaces, they can only + perform queries and mutations. + + +## Reset passwords + +Only members of the _Guardians of the Galaxy_ can reset passwords across +namespaces. A password can be reset by calling `/admin` with the `resetPassword` +mutation. + +For example, to reset the password for user `groot` from the namespace `100`: + +```graphql +mutation { + resetPassword( + input: { userId: "groot", password: "newpassword", namespace: 100 } + ) { + userId + message + } +} +``` + +## Drop Operations + +The `drop all` operations can be triggered only by a +[Guardian of the Galaxy](#guardians-of-the-galaxy). They're executed at Cluster +level and delete data across namespaces. All other `drop` operations run at +namespace level and are namespace specific. For information about other drop +operations, see [Alter the database](./raw-http.md#alter-the-database). + + + `drop all` operation is executed at Cluster level and the operation deletes + data and schema across namespaces. Guardian of the namespace can trigger `drop + data` operation within the namespace. The `drop data` operation deletes all + the data but retains the schema only. + + +For example: + +``` +curl 'http://localhost:8080/alter' \ + -H 'X-Dgraph-AccessToken: ' \ + --data-raw '{"drop_op":"DATA"}' \ + --compressed +``` + +## Backups + +Backups are currently Cluster-wide only, but [exports](#exports) can be created +by namespace. Only a [Guardian of the Galaxy](#guardians-of-the-galaxy) can +trigger a backup. + +### Data import + +[Initial import](./bulk-loader) and [Live import](/live-loader) tools support +multi-tenancy. + +## Exports + +Exports can be generated Cluster-wide or at namespace level. These exported sets +of `.rdf` or `.json` files and schemas include the multi-tenancy namespace +information. + +If a _Guardian of the Galaxy_ exports the whole Cluster, a single folder +containing the export data of all the namespaces in a single `.rdf` or `.json` +file and a single schema will be generated. + +Guardians of a Namespace can trigger an Export for their namespace. + +A namespace-specific export will contain the namespace value in the generated +`.rdf` file: + +```rdf +<0x01> "name" "ibrahim" <0x12> . -> this belongs to namespace 0x12 +<0x01> "name" "ibrahim" <0x0> . -> this belongs to namespace 0x00 +``` + +For example, when the _Guardian of the Galaxy_ user is used to export the +namespace `0x1234` to a folder in the export directory (by default this +directory is `export`): + +```graphql +mutation { + export(input: { format: "rdf", namespace: 1234 }) { + response { + message + } + } +} +``` + +When using the _Guardian of the Namespace_, there's no need to specify the +namespace in the GraphQL mutation, as they can only export within their own +namespace: + +```graphql +mutation { + export(input: {format: "rdf") { + response { + message + } + } +} +``` + +To export all the namespaces: (this is only valid for _Guardians of the Galaxy_) + +```graphql +mutation { + export(input: { format: "rdf", namespace: -1 }) { + response { + message + } + } +} +``` diff --git a/dgraph/reference/graphql-dql/dql-for-graphql.mdx b/dgraph/reference/graphql-dql/dql-for-graphql.mdx new file mode 100644 index 00000000..13a41b01 --- /dev/null +++ b/dgraph/reference/graphql-dql/dql-for-graphql.mdx @@ -0,0 +1,13 @@ +--- +title: Use DQL in GraphQL +--- + +Dgraph Query Language ([DQL](./dql)) can be used to extend GraphQL API +capabilities when writing: + +- [custom DQL resolvers](./custom-dql) +- [subscriptions on DQL queries](./subscriptions#subscriptions-to-custom-dql) + +When writing custom DQL query resolvers, you must understand the +[GraphQL - DQL schema mapping](./graphql-dql-schema) to use proper aliases +inside DQL queries to map them to the GraphQL response. diff --git a/dgraph/reference/graphql-dql/graphql-data-loading.mdx b/dgraph/reference/graphql-dql/graphql-data-loading.mdx new file mode 100644 index 00000000..ca6f9f8c --- /dev/null +++ b/dgraph/reference/graphql-dql/graphql-data-loading.mdx @@ -0,0 +1,22 @@ +--- +title: Data loading +--- + +After you have deployed your first GraphQL Schema, you get a GraphQL API served +on `/graphql` endpoint and an empty backend. You can populate the graph database +using the mutations operations on the GraphQL API. + +A more efficient way to populate the database is to use the Dgraph's +[import tools](./importdata). + +The first step is to understand the [schema mapping](./graphql-dql-schema) and +to prepare your RDF files or JSON files to follow the internal Dgraph predicates +names. You also have to make sure that you properly generate data for the +`dgraph.type` predicate so that each node is asscociated with it's type. + +If you are using the [initial import](./bulk-loader) tool, you can provide the +GraphQL schema along with the data to import when executing the bulk load. + +If you are using the [live import](./live-loader) tool, you must first deploy +your GraphQL Schema and then proceed with the import. Deploying the schema +first, will generate the predicates indexes and reduce the loading time. diff --git a/dgraph/reference/graphql-dql/graphql-data-migration.mdx b/dgraph/reference/graphql-dql/graphql-data-migration.mdx new file mode 100644 index 00000000..f89496a5 --- /dev/null +++ b/dgraph/reference/graphql-dql/graphql-data-migration.mdx @@ -0,0 +1,70 @@ +--- +title: GraphQL data migration +--- + +When deploying a new version of your GraphQL Schema, Dgraph will update the +underlying DQL Schema but will not alter the data. + +As explained in [GraphQL and DQL Schemas](./graphql-dql-schema) overview, Dgraph +has no constraints at the database level and any node with predicates is valid. + +You may face with several data GraphQL API and data discrepancies. + +### unused fields + +For example, let's assume that you have deployed the following schema: + +```graphql +type TestDataMigration { + id: ID! + someInfo: String! + someOtherInfo: String +} +``` + +Then you create a `TestDataMigration` with `someOtherInfo` value. + +Then you update the Schema and remove the field. + +```graphql +type TestDataMigration { + id: ID! + someInfo: String! +} +``` + +The data you have previously created is still in the graph database ! + +Moreover if you delete the `TestDataMigration` object using its `id`, the +GraphQL API delete operation will be successful. + +If you followed the [GraphQL - DQL Schema mapping](./graphql-dql-schema), you +understand that Dgraph has used the list the known list of predicates (id, +someInfo) and removed them. In fact, Dgraph also removed the `dgraph.type` +predicate and so this `TestDataMigration` node is not visible anymore to the +GraphQL API. + +The point is that a node with this `uid` exists and has a predicate +`someOtherInfo`. This is because this data has been created initially and +nothing in the process of deploying a new version and then using a delete +operation by ID instructed Dgraph to delete this predicate. + +You end up with a node without type (i.e without a `dgraph.type` predicate) and +with an old predicate value which is 'invisible' to your GraphQL API! + +When doing a GraphQL schema deployement, you must take care of the data cleaning +and data migration. The good news is that DQL offers you the tools to identify +(search) potential issues and to correct the data (mutations). + +In the previous case, you can alter the database and completely delete the +predicate or you can write an 'upsert' DQL query that will search the nodes of +interest and delete the unused predicate for those nodes. + +### new non-nullable field + +Another obvious example appears if you deploy a new version containing a new +non-nullable field for an existing type. The existing 'nodes' of the same type +in the graph do not have this predicate. A Gra[hQL query reaching those nodes +will return a list of errors. You can easily write an 'upsert' DQL mutation to +find all node of this type not having the new predicate and update them with a +default value. diff --git a/dgraph/reference/graphql-dql/graphql-dgraph.mdx b/dgraph/reference/graphql-dql/graphql-dgraph.mdx new file mode 100644 index 00000000..a7569fbe --- /dev/null +++ b/dgraph/reference/graphql-dql/graphql-dgraph.mdx @@ -0,0 +1,51 @@ +--- +title: GraphQL on Existing Dgraph +--- + +### How to use GraphQL on an existing Dgraph instance + +In the case where you have an existing Dgraph instance which has been created +using a DQL Schema (and populated with Dgraph import tools) and you want to +expose some or all of the data using a GraphQL API, you can use the +[@dgraph directive](./directive-dgraph) to customize how Dgraph maps GraphQL +type names and fields names to DQL types and predicates. + +### Language support in GraphQL + +In your GraphQL schema, you need to define a field for each language that you +want to use. In addition, you also need to apply the `@dgraph(pred: "...")` +directive on that field, with the `pred` argument set to point to the correct +DQL predicate with a language tag for the language that you want to use it for. +Dgraph will automatically add a `@lang` directive in the DQL schema for the +corresponding predicate. + + + By default, the DQL predicate for a GraphQL field is generated as + `Typename.FieldName`. + + +For example: + +```graphql +type Person { + name: String # Person.name is the auto-generated DQL predicate for this GraphQL field, unless overridden using @dgraph(pred: "...") + nameHi: String @dgraph(pred: "Person.name@hi") # this field exposes the value for the language tag `@hi` for the DQL predicate `Person.name` to GraphQL + nameEn: String @dgraph(pred: "Person.name@en") + nameHi_En: String @dgraph(pred: "Person.name@hi:en") # this field uses multiple language tags: `@hi` and `@en` + nameHi_En_untag: String @dgraph(pred: "Person.name@hi:en:.") # as this uses `.`, it will give untagged values if there is no value for `@hi` or `@en` +} +``` + +If a GraphQL field uses more than one language tag, then it won't be part of any +mutation input. Like, in the above example the fields `nameHi_En` and +`nameHi_En_untag` can't be given as an input to any mutation. Only the fields +which use one or no language can be given in a mutation input, like `name`, +`nameHi`, and `nameEn`. + +All the fields can be queried, irrespective of whether they use one language or +more. + + + GraphQL won’t be able to query `Person.name@*` type of language tags because + of the structural requirements of GraphQL. + diff --git a/dgraph/reference/graphql-dql/graphql-dql-schema.mdx b/dgraph/reference/graphql-dql/graphql-dql-schema.mdx new file mode 100644 index 00000000..581cd91b --- /dev/null +++ b/dgraph/reference/graphql-dql/graphql-dql-schema.mdx @@ -0,0 +1,205 @@ +--- +title: GraphQL and DQL schemas +--- + +The first step in mastering DQL in the context of GraphQL API is probably to +understand the fundamental difference between GraphQL schema and DQL schema. + +### In GraphQL, the schema is a central notion. + +GraphQL is a strongly typed language. Contrary to REST which is organized in +terms of endpoints, GraphQL APIs are organized in terms of types and fields. The +type system is used to define the schema, which is a contract between client and +server. GraphQL uses types to ensure Apps only ask for what’s possible and +provide clear and helpful errors. + +In the [GraphQL Quick start](./quick-start), we have used a schema to generate a +GraphQL API: + +```graphql +type Product { + productID: ID! + name: String @search(by: [term]) + reviews: [Review] @hasInverse(field: about) +} + +type Customer { + username: String! @id @search(by: [hash, regexp]) + reviews: [Review] @hasInverse(field: by) +} + +type Review { + id: ID! + about: Product! + by: Customer! + comment: String @search(by: [fulltext]) + rating: Int @search +} +``` + +The API and the engine logic are generated from the schema defining the types of +objects we are dealing with, the fields, and the relationships in the form of +fields referencing other types. + +### In DQL, the schema described the predicates + +Dgraph maintains a list of all predicates names with their type and indexes in +the [Dgraph types schema](./dql-schema). + +### Schema mapping + +When deploying a GraphQL Schema, Dgraph will generates DQL predicates and types +for the graph backend. In order to distinguish a field `name` from a type +`Person` from the field `name` of different type (they may have different +indexes), Dgraph is using a dotted notation for the DQL schema. + +For example, deploying the following GraphQL Schema + +```graphql +type Person { + id: ID + name: String! + friends: [Person] +} +``` + +will lead the the declaration of 3 predicates in the DQL Schema: + +- `Person.id default` +- `Person.name string` +- `Person.friends [uid]` + +and one DQL type + +``` +type Person { + Person.name + Person.friends +} +``` + +Once again, the DQL type is just a declaration of the list of predicates that +one can expect to be present in a node of having `dgraph.type` equal `Person`. + +The default mapping can be customized by using the +[@dgraph directive](./directive-dgraph). + +#### GraphQL ID type and Dgraph `uid` + +Person.id is not part of the Person DQL type: internally Dgraph is using `uid` +predicate as unique identifier for every node in the graph. Dgraph returns the +value of `uid` when a GraphQL field of type ID is requested. + +#### @search directive and predicate indexes + +`@search` directive tells Dgraph what search to build into your GraphQL API. + +```graphql +type Person { + name: String @search(by: [hash]) + ... +``` + +Is simply translated into a prediate index specification in the Dgraph schema: + +``` +Person.name: string @index(hash) . +``` + +#### Constraints + +DQL does not have 'non nullable' constraint `!` nor 'unique' constraint. +Constraints on the graph are handled by correctly using `upsert` operation in +DQL. + +#### DQL queries + +You can use DQL to query the data generated by the GraphQL API operations. For +example the GraphQL Query + +```graphql +query { + queryPerson { + id + name + friends { + id + name + } + } +} +``` + +can be executed in DQL + +```graphql +{ + queryPerson(func: type(Person)) { + id: uid + name: Person.name + friends: Person.friends { + id: uid + name: Person.name + } + } +} +``` + +Note that in this query, we are using `aliases` such as `name: Person.name` to +name the predicates in the JSON response,as they are declared in the GraphQL +schema. + +#### GraphQL Interface + +DQL does not have the concept of interfaces. + +Considering the following GraphQL schema : + +```graphql +interface Location { + id: ID! + geoloc: Point +} + +type Property implements Location { + price: Float +} +``` + +The predicates and types generated for a `Property` are: + +```graphql +Location.geoloc: geo . +Location.name: string . +Property.price: float . +type Property { + Location.name + Location.geoloc + Property.price +} +``` + +### Consequences + +The fact that the GraphQL API backend is a graph in Dgraph, implies that you can +use Dgraph DQL on the data that is also served by the GraphQL API operations. + +In particular, you can + +- use Dgraph DQL mutations but also Dgraph's [import tools](/importdata) to + populate the graph after you have deployed a GraphQL Schema. See + [GraphQL data loading](./graphql-data-loading.md) +- use DQL to query the graph in the context of authorization rules and custom + resolvers. +- add knowledge to your graph such as meta-data, score, annotations, ..., but + also relationships or relationships attributes (facets) that could be the + result of similarity computation, threat detection a.s.o. The added data could + be hidden from your GraphQL API clients but be available to logic written with + DQL clients. +- break things using DQL: DQL is powerful and is bypassing constraints expressed + in the GraphQL schema. You can for example delete a node predicate that is + mandatory in the GraphQL API! Hopefully there are ways to secure who can + read/write/delete predicates. ( see the [ACL](./access-control-lists)) + section. +- fix things using DQL: this is especially useful when doing GraphQL Schema + updates which require some [data migrations](./graphql-data-migration.md). diff --git a/dgraph/reference/graphql-dql/index.mdx b/dgraph/reference/graphql-dql/index.mdx new file mode 100644 index 00000000..81c3ab9f --- /dev/null +++ b/dgraph/reference/graphql-dql/index.mdx @@ -0,0 +1,19 @@ +--- +title: GraphQL - DQL interoperability +description: + Dgraph Query Language (DQL) is Dgraph’s proprietary language to add, modify, + delete and fetch data. +--- + +As aGraphQL developer, you can deploy a GraphQL Schema in Dgraph and immediately +get a GraphQL API served on `/graphql` endpoint and a backend; you don't need to +concern yourself with the powerful graph database running in the background. + +However, by leveraging the graph database and using Dgraph Query Language (DQL), +the Dgraph’s proprietary language, you can address advanced use cases and +overcome some limitations of the GraphQL specification. + +This section covers how to use DQL in the conjunction with GraphQL API, what are +the best parctices and the points of attention. + +### In this section diff --git a/dgraph/reference/graphql/admin/index.mdx b/dgraph/reference/graphql/admin/index.mdx new file mode 100644 index 00000000..d420de73 --- /dev/null +++ b/dgraph/reference/graphql/admin/index.mdx @@ -0,0 +1,1067 @@ +--- +title: Administrative API +description: + This documentation presents the Admin API and explains how to run a Dgraph + database with GraphQL. +--- + +This article presents the Admin API and explains how to run a Dgraph database +with GraphQL. + +## Running Dgraph with GraphQL + +The simplest way to start with Dgraph GraphQL is to run the all-in-one Docker +image. + +``` +docker run -it -p 8080:8080 dgraph/standalone:%VERSION_HERE +``` + +That brings up GraphQL at `localhost:8080/graphql` and `localhost:8080/admin`, +but is intended for quickstart and doesn't persist data. + +## Advanced options + +Once you've tried out Dgraph GraphQL, you'll need to move past the +`dgraph/standalone` and run and deploy Dgraph instances. + +Dgraph is a distributed graph database. It can scale to huge data and shard that +data across a cluster of Dgraph instances. GraphQL is built into Dgraph in its +Alpha nodes. To learn how to manage and deploy a Dgraph cluster, check our +[deployment guide](https://dgraph.io/docs/deploy/). + +GraphQL schema introspection is enabled by default, but you can disable it by +setting the `--graphql` superflag's `introspection` option to false +(`--graphql introspection=false`) when starting the Dgraph Alpha nodes in your +cluster. + +## Dgraph's schema + +Dgraph's GraphQL runs in Dgraph and presents a GraphQL schema where the queries +and mutations are executed in the Dgraph cluster. So the GraphQL schema is +backed by Dgraph's schema. + + + This means that if you have a Dgraph instance and change its GraphQL schema, + the schema of the underlying Dgraph will also be changed! + + +## Endpoints + +When you start Dgraph with GraphQL, two GraphQL endpoints are served. + +### /graphql + +At `/graphql` you'll find the GraphQL API for the types you've added. That's +what your app would access and is the GraphQL entry point to Dgraph. If you need +to know more about this, see the +[quick start](https://dgraph.io/docs/graphql/quick-start/) and +[schema docs](https://dgraph.io/docs/graphql/schema/). + +### /admin + +At `/admin` you'll find an admin API for administering your GraphQL instance. +The admin API is a GraphQL API that serves POST and GET as well as compressed +data, much like the `/graphql` endpoint. + +Here are the important types, queries, and mutations from the `admin` schema. + +```graphql +""" +The Int64 scalar type represents a signed 64‐bit numeric non‐fractional value. +Int64 can represent values in range [-(2^63),(2^63 - 1)]. +""" +scalar Int64 + +""" +The UInt64 scalar type represents an unsigned 64‐bit numeric non‐fractional value. +UInt64 can represent values in range [0,(2^64 - 1)]. +""" +scalar UInt64 + +""" +The DateTime scalar type represents date and time as a string in RFC3339 format. +For example: "1985-04-12T23:20:50.52Z" represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC. +""" +scalar DateTime + +""" +Data about the GraphQL schema being served by Dgraph. +""" +type GQLSchema @dgraph(type: "dgraph.graphql") { + id: ID! + + """ + Input schema (GraphQL types) that was used in the latest schema update. + """ + schema: String! @dgraph(pred: "dgraph.graphql.schema") + + """ + The GraphQL schema that was generated from the 'schema' field. + This is the schema that is being served by Dgraph at /graphql. + """ + generatedSchema: String! +} + +type Cors @dgraph(type: "dgraph.cors") { + acceptedOrigins: [String] +} + +""" +A NodeState is the state of an individual node in the Dgraph cluster. +""" +type NodeState { + """ + Node type : either 'alpha' or 'zero'. + """ + instance: String + + """ + Address of the node. + """ + address: String + + """ + Node health status : either 'healthy' or 'unhealthy'. + """ + status: String + + """ + The group this node belongs to in the Dgraph cluster. + See : https://dgraph.io/docs/deploy/cluster-setup/. + """ + group: String + + """ + Version of the Dgraph binary. + """ + version: String + + """ + Time in nanoseconds since the node started. + """ + uptime: Int64 + + """ + Time in Unix epoch time that the node was last contacted by another Zero or Alpha node. + """ + lastEcho: Int64 + + """ + List of ongoing operations in the background. + """ + ongoing: [String] + + """ + List of predicates for which indexes are built in the background. + """ + indexing: [String] + + """ + List of Enterprise Features that are enabled. + """ + ee_features: [String] +} + +type MembershipState { + counter: UInt64 + groups: [ClusterGroup] + zeros: [Member] + maxUID: UInt64 + maxNsID: UInt64 + maxTxnTs: UInt64 + maxRaftId: UInt64 + removed: [Member] + cid: String + license: License + """ + Contains list of namespaces. Note that this is not stored in proto's MembershipState and + computed at the time of query. + """ + namespaces: [UInt64] +} + +type ClusterGroup { + id: UInt64 + members: [Member] + tablets: [Tablet] + snapshotTs: UInt64 + checksum: UInt64 +} + +type Member { + id: UInt64 + groupId: UInt64 + addr: String + leader: Boolean + amDead: Boolean + lastUpdate: UInt64 + clusterInfoOnly: Boolean + forceGroupId: Boolean +} + +type Tablet { + groupId: UInt64 + predicate: String + force: Boolean + space: Int + remove: Boolean + readOnly: Boolean + moveTs: UInt64 +} + +type License { + user: String + maxNodes: UInt64 + expiryTs: Int64 + enabled: Boolean +} + +directive @dgraph( + type: String + pred: String +) on OBJECT | INTERFACE | FIELD_DEFINITION +directive @id on FIELD_DEFINITION +directive @secret(field: String!, pred: String) on OBJECT | INTERFACE + +type UpdateGQLSchemaPayload { + gqlSchema: GQLSchema +} + +input UpdateGQLSchemaInput { + set: GQLSchemaPatch! +} + +input GQLSchemaPatch { + schema: String! +} + +input ExportInput { + """ + Data format for the export, e.g. "rdf" or "json" (default: "rdf") + """ + format: String + """ + Namespace for the export in multi-tenant cluster. Users from guardians of galaxy can export + all namespaces by passing a negative value or specific namespaceId to export that namespace. + """ + namespace: Int + + """ + Destination for the export: e.g. Minio or S3 bucket or /absolute/path + """ + destination: String + + """ + Access key credential for the destination. + """ + accessKey: String + + """ + Secret key credential for the destination. + """ + secretKey: String + + """ + AWS session token, if required. + """ + sessionToken: String + + """ + Set to true to allow backing up to S3 or Minio bucket that requires no credentials. + """ + anonymous: Boolean +} + +input TaskInput { + id: String! +} +type Response { + code: String + message: String +} + +type ExportPayload { + response: Response + exportedFiles: [String] +} + +type DrainingPayload { + response: Response +} + +type ShutdownPayload { + response: Response +} + +type TaskPayload { + kind: TaskKind + status: TaskStatus + lastUpdated: DateTime +} +enum TaskStatus { + Queued + Running + Failed + Success + Unknown +} +enum TaskKind { + Backup + Export + Unknown +} +input ConfigInput { + """ + Estimated memory the caches can take. Actual usage by the process would be + more than specified here. The caches will be updated according to the + cache_percentage flag. + """ + cacheMb: Float + + """ + True value of logRequest enables logging of all the requests coming to alphas. + False value of logRequest disables above. + """ + logRequest: Boolean +} + +type ConfigPayload { + response: Response +} + +type Config { + cacheMb: Float +} +input RemoveNodeInput { + """ + ID of the node to be removed. + """ + nodeId: UInt64! + """ + ID of the group from which the node is to be removed. + """ + groupId: UInt64! +} +type RemoveNodePayload { + response: Response +} +input MoveTabletInput { + """ + Namespace in which the predicate exists. + """ + namespace: UInt64 + """ + Name of the predicate to move. + """ + tablet: String! + """ + ID of the destination group where the predicate is to be moved. + """ + groupId: UInt64! +} +type MoveTabletPayload { + response: Response +} +enum AssignKind { + UID + TIMESTAMP + NAMESPACE_ID +} +input AssignInput { + """ + Choose what to assign: UID, TIMESTAMP or NAMESPACE_ID. + """ + what: AssignKind! + """ + How many to assign. + """ + num: UInt64! +} +type AssignedIds { + """ + The first UID, TIMESTAMP or NAMESPACE_ID assigned. + """ + startId: UInt64 + """ + The last UID, TIMESTAMP or NAMESPACE_ID assigned. + """ + endId: UInt64 + """ + TIMESTAMP for read-only transactions. + """ + readOnly: UInt64 +} +type AssignPayload { + response: AssignedIds +} + +input BackupInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + destination: String! + """ + Access key credential for the destination. + """ + accessKey: String + """ + Secret key credential for the destination. + """ + secretKey: String + """ + AWS session token, if required. + """ + sessionToken: String + """ + Set to true to allow backing up to S3 or Minio bucket that requires no credentials. + """ + anonymous: Boolean + """ + Force a full backup instead of an incremental backup. + """ + forceFull: Boolean +} +type BackupPayload { + response: Response + taskId: String +} +input RestoreInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + location: String! + """ + Backup ID of the backup series to restore. This ID is included in the manifest.json file. + If missing, it defaults to the latest series. + """ + backupId: String + """ + Number of the backup within the backup series to be restored. Backups with a greater value + will be ignored. If the value is zero or missing, the entire series will be restored. + """ + backupNum: Int + """ + Path to the key file needed to decrypt the backup. This file should be accessible + by all alphas in the group. The backup will be written using the encryption key + with which the cluster was started, which might be different than this key. + """ + encryptionKeyFile: String + """ + Vault server address where the key is stored. This server must be accessible + by all alphas in the group. Default "http://localhost:8200". + """ + vaultAddr: String + """ + Path to the Vault RoleID file. + """ + vaultRoleIDFile: String + """ + Path to the Vault SecretID file. + """ + vaultSecretIDFile: String + """ + Vault kv store path where the key lives. Default "secret/data/dgraph". + """ + vaultPath: String + """ + Vault kv store field whose value is the key. Default "enc_key". + """ + vaultField: String + """ + Vault kv store field's format. Must be "base64" or "raw". Default "base64". + """ + vaultFormat: String + """ + Access key credential for the destination. + """ + accessKey: String + """ + Secret key credential for the destination. + """ + secretKey: String + """ + AWS session token, if required. + """ + sessionToken: String + """ + Set to true to allow backing up to S3 or Minio bucket that requires no credentials. + """ + anonymous: Boolean +} +type RestorePayload { + """ + A short string indicating whether the restore operation was successfully scheduled. + """ + code: String + """ + Includes the error message if the operation failed. + """ + message: String +} +input ListBackupsInput { + """ + Destination for the backup: e.g. Minio or S3 bucket. + """ + location: String! + """ + Access key credential for the destination. + """ + accessKey: String + """ + Secret key credential for the destination. + """ + secretKey: String + """ + AWS session token, if required. + """ + sessionToken: String + """ + Whether the destination doesn't require credentials (e.g. S3 public bucket). + """ + anonymous: Boolean +} +type BackupGroup { + """ + The ID of the cluster group. + """ + groupId: UInt64 + """ + List of predicates assigned to the group. + """ + predicates: [String] +} +type Manifest { + """ + Unique ID for the backup series. + """ + backupId: String + """ + Number of this backup within the backup series. The full backup always has a value of one. + """ + backupNum: UInt64 + """ + Whether this backup was encrypted. + """ + encrypted: Boolean + """ + List of groups and the predicates they store in this backup. + """ + groups: [BackupGroup] + """ + Path to the manifest file. + """ + path: String + """ + The timestamp at which this backup was taken. The next incremental backup will + start from this timestamp. + """ + since: UInt64 + """ + The type of backup, either full or incremental. + """ + type: String +} +type LoginResponse { + """ + JWT token that should be used in future requests after this login. + """ + accessJWT: String + """ + Refresh token that can be used to re-login after accessJWT expires. + """ + refreshJWT: String +} +type LoginPayload { + response: LoginResponse +} +type User + @dgraph(type: "dgraph.type.User") + @secret(field: "password", pred: "dgraph.password") { + """ + Username for the user. Dgraph ensures that usernames are unique. + """ + name: String! @id @dgraph(pred: "dgraph.xid") + groups: [Group] @dgraph(pred: "dgraph.user.group") +} +type Group @dgraph(type: "dgraph.type.Group") { + """ + Name of the group. Dgraph ensures uniqueness of group names. + """ + name: String! @id @dgraph(pred: "dgraph.xid") + users: [User] @dgraph(pred: "~dgraph.user.group") + rules: [Rule] @dgraph(pred: "dgraph.acl.rule") +} +type Rule @dgraph(type: "dgraph.type.Rule") { + """ + Predicate to which the rule applies. + """ + predicate: String! @dgraph(pred: "dgraph.rule.predicate") + """ + Permissions that apply for the rule. Represented following the UNIX file permission + convention. That is, 4 (binary 100) represents READ, 2 (binary 010) represents WRITE, + and 1 (binary 001) represents MODIFY (the permission to change a predicate’s schema). + The options are: + * 1 (binary 001) : MODIFY + * 2 (010) : WRITE + * 3 (011) : WRITE+MODIFY + * 4 (100) : READ + * 5 (101) : READ+MODIFY + * 6 (110) : READ+WRITE + * 7 (111) : READ+WRITE+MODIFY + Permission 0, which is equal to no permission for a predicate, blocks all read, + write and modify operations. + """ + permission: Int! @dgraph(pred: "dgraph.rule.permission") +} +input StringHashFilter { + eq: String +} +enum UserOrderable { + name +} +enum GroupOrderable { + name +} +input AddUserInput { + name: String! + password: String! + groups: [GroupRef] +} +input AddGroupInput { + name: String! + rules: [RuleRef] +} +input UserRef { + name: String! +} +input GroupRef { + name: String! +} +input RuleRef { + """ + Predicate to which the rule applies. + """ + predicate: String! + """ + Permissions that apply for the rule. Represented following the UNIX file permission + convention. That is, 4 (binary 100) represents READ, 2 (binary 010) represents WRITE, + and 1 (binary 001) represents MODIFY (the permission to change a predicate’s schema). + The options are: + * 1 (binary 001) : MODIFY + * 2 (010) : WRITE + * 3 (011) : WRITE+MODIFY + * 4 (100) : READ + * 5 (101) : READ+MODIFY + * 6 (110) : READ+WRITE + * 7 (111) : READ+WRITE+MODIFY + Permission 0, which is equal to no permission for a predicate, blocks all read, + write and modify operations. + """ + permission: Int! +} +input UserFilter { + name: StringHashFilter + and: UserFilter + or: UserFilter + not: UserFilter +} +input UserOrder { + asc: UserOrderable + desc: UserOrderable + then: UserOrder +} +input GroupOrder { + asc: GroupOrderable + desc: GroupOrderable + then: GroupOrder +} +input UserPatch { + password: String + groups: [GroupRef] +} +input UpdateUserInput { + filter: UserFilter! + set: UserPatch + remove: UserPatch +} +input GroupFilter { + name: StringHashFilter + and: UserFilter + or: UserFilter + not: UserFilter +} +input SetGroupPatch { + rules: [RuleRef!]! +} +input RemoveGroupPatch { + rules: [String!]! +} +input UpdateGroupInput { + filter: GroupFilter! + set: SetGroupPatch + remove: RemoveGroupPatch +} +type AddUserPayload { + user: [User] +} +type AddGroupPayload { + group: [Group] +} +type DeleteUserPayload { + msg: String + numUids: Int +} +type DeleteGroupPayload { + msg: String + numUids: Int +} +input AddNamespaceInput { + password: String +} +input DeleteNamespaceInput { + namespaceId: Int! +} +type NamespacePayload { + namespaceId: UInt64 + message: String +} +input ResetPasswordInput { + userId: String! + password: String! + namespace: Int! +} +type ResetPasswordPayload { + userId: String + message: String + namespace: UInt64 +} +input EnterpriseLicenseInput { + """ + The contents of license file as a String. + """ + license: String! +} +type EnterpriseLicensePayload { + response: Response +} + +type Query { + getGQLSchema: GQLSchema + health: [NodeState] + state: MembershipState + config: Config + task(input: TaskInput!): TaskPayload + + getUser(name: String!): User + getGroup(name: String!): Group + """ + Get the currently logged in user. + """ + getCurrentUser: User + queryUser( + filter: UserFilter + order: UserOrder + first: Int + offset: Int + ): [User] + queryGroup( + filter: GroupFilter + order: GroupOrder + first: Int + offset: Int + ): [Group] + """ + Get the information about the backups at a given location. + """ + listBackups(input: ListBackupsInput!): [Manifest] +} +type Mutation { + """ + Update the Dgraph cluster to serve the input schema. This may change the GraphQL + schema, the types and predicates in the Dgraph schema, and cause indexes to be recomputed. + """ + updateGQLSchema(input: UpdateGQLSchemaInput!): UpdateGQLSchemaPayload + + """ + Starts an export of all data in the cluster. Export format should be 'rdf' (the default + if no format is given), or 'json'. + See : https://dgraph.io/docs/deploy/dgraph-administration/#export-database + """ + export(input: ExportInput!): ExportPayload + + """ + Set (or unset) the cluster draining mode. In draining mode no further requests are served. + """ + draining(enable: Boolean): DrainingPayload + + """ + Shutdown this node. + """ + shutdown: ShutdownPayload + + """ + Alter the node's config. + """ + config(input: ConfigInput!): ConfigPayload + """ + Remove a node from the cluster. + """ + removeNode(input: RemoveNodeInput!): RemoveNodePayload + """ + Move a predicate from one group to another. + """ + moveTablet(input: MoveTabletInput!): MoveTabletPayload + """ + Lease UIDs, Timestamps or Namespace IDs in advance. + """ + assign(input: AssignInput!): AssignPayload + + """ + Start a binary backup. See : https://dgraph.io/docs/enterprise-features/binary-backups/#create-a-backup + """ + backup(input: BackupInput!): BackupPayload + """ + Start restoring a binary backup. See : https://dgraph.io/docs/enterprise-features/binary-backups/#online-restore + """ + restore(input: RestoreInput!): RestorePayload + """ + Login to Dgraph. Successful login results in a JWT that can be used in future requests. + If login is not successful an error is returned. + """ + login( + userId: String + password: String + namespace: Int + refreshToken: String + ): LoginPayload + """ + Add a user. When linking to groups: if the group doesn't exist it is created; if the group + exists, the new user is linked to the existing group. It's possible to both create new + groups and link to existing groups in the one mutation. + Dgraph ensures that usernames are unique, hence attempting to add an existing user results + in an error. + """ + addUser(input: [AddUserInput!]!): AddUserPayload + """ + Add a new group and (optionally) set the rules for the group. + """ + addGroup(input: [AddGroupInput!]!): AddGroupPayload + """ + Update users, their passwords and groups. As with AddUser, when linking to groups: if the + group doesn't exist it is created; if the group exists, the new user is linked to the existing + group. If the filter doesn't match any users, the mutation has no effect. + """ + updateUser(input: UpdateUserInput!): AddUserPayload + """ + Add or remove rules for groups. If the filter doesn't match any groups, + the mutation has no effect. + """ + updateGroup(input: UpdateGroupInput!): AddGroupPayload + deleteGroup(filter: GroupFilter!): DeleteGroupPayload + deleteUser(filter: UserFilter!): DeleteUserPayload + """ + Add a new namespace. + """ + addNamespace(input: AddNamespaceInput): NamespacePayload + """ + Delete a namespace. + """ + deleteNamespace(input: DeleteNamespaceInput!): NamespacePayload + """ + Reset password can only be used by the Guardians of the galaxy to reset password of + any user in any namespace. + """ + resetPassword(input: ResetPasswordInput!): ResetPasswordPayload + """ + Apply enterprise license. + """ + enterpriseLicense(input: EnterpriseLicenseInput!): EnterpriseLicensePayload +} +``` + +You'll notice that the `/admin` schema is very much the same as the schemas +generated by Dgraph GraphQL. + +- The `health` query lets you know if everything is connected and if there's a + schema currently being served at `/graphql`. +- The `state` query returns the current state of the cluster and group + membership information. For more information about `state` see + [here](./deploy/dgraph-zero.md#more-about-the-state-endpoint). +- The `config` query returns the configuration options of the cluster set at the + time of starting it. +- The `getGQLSchema` query gets the current GraphQL schema served at `/graphql`, + or returns null if there's no such schema. +- The `updateGQLSchema` mutation allows you to change the schema currently + served at `/graphql`. + +## Enterprise features + +Enterprise Features like ACL, Backups and Restore are also available using the +GraphQL API at `/admin` endpoint. + +- [ACL](./enterprise-features/access-control-lists.md#accessing-secured-dgraph) +- [Backups](./enterprise-features/binary-backups.md#create-a-backup) +- [Restore](./enterprise-features/binary-backups.md#online-restore) + +## First start + +On first starting with a blank database: + +- There's no schema served at `/graphql`. +- Querying the `/admin` endpoint for `getGQLSchema` returns + `"getGQLSchema": null`. +- Querying the `/admin` endpoint for `health` lets you know that no schema has + been added. + +## Validating a schema + +You can validate a GraphQL schema before adding it to your database by sending +your schema definition in an HTTP POST request to the to the +`/admin/schema/validate` endpoint, as shown in the following example: + +Request header: + +```ssh +path: /admin/schema/validate +method: POST +``` + +Request body: + +```graphql +type Person { + name: String +} +``` + +This endpoint returns a JSON response that indicates if the schema is valid or +not, and provides an error if isn't valid. In this case, the schema is valid, so +the JSON response includes the following message: `Schema is valid`. + +## Modifying a schema + +There are two ways you can modify a GraphQL schema: + +- Using `/admin/schema` +- Using the `updateGQLSchema` mutation on `/admin` + + + While modifying the GraphQL schema, if you get errors like + `errIndexingInProgress`, `another operation is already running` or `server is + not ready`, please wait a moment and then retry the schema update. + + +### Using `/admin/schema` + +The `/admin/schema` endpoint provides a simplified method to add and update +schemas. + +To create a schema you only need to call the `/admin/schema` endpoint with the +required schema definition. For example: + +```graphql +type Person { + name: String +} +``` + +If you have the schema definition stored in a `schema.graphql` file, you can use +`curl` like this: + +``` +curl -X POST localhost:8080/admin/schema --data-binary '@schema.graphql' +``` + +On successful execution, the `/admin/schema` endpoint will give you a JSON +response with a success code. + +### Using `updateGQLSchema` to add or modify a schema + +Another option to add or modify a GraphQL schema is the `updateGQLSchema` +mutation. + +For example, to create a schema using `updateGQLSchema`, run this mutation on +the `/admin` endpoint: + +```graphql +mutation { + updateGQLSchema(input: { set: { schema: "type Person { name: String }" } }) { + gqlSchema { + schema + generatedSchema + } + } +} +``` + +## Initial schema + +Regardless of the method used to upload the GraphQL schema, on a black database, +adding this schema + +```graphql +type Person { + name: String +} +``` + +would cause the following: + +- The `/graphql` endpoint would refresh and serve the GraphQL schema generated + from type `type Person { name: String }`. +- The schema of the underlying Dgraph instance would be altered to allow for the + new `Person` type and `name` predicate. +- The `/admin` endpoint for `health` would return that a schema is being served. +- The mutation would return `"schema": "type Person { name: String }"` and the + generated GraphQL schema for `generatedSchema` (this is the schema served at + `/graphql`). +- Querying the `/admin` endpoint for `getGQLSchema` would return the new schema. + +## Migrating a schema + +Given an instance serving the GraphQL schema from the previous section, updating +the schema to the following + +```graphql +type Person { + name: String @search(by: [regexp]) + dob: DateTime +} +``` + +would change the GraphQL definition of `Person` and result in the following: + +- The `/graphql` endpoint would refresh and serve the GraphQL schema generated + from the new type. +- The schema of the underlying Dgraph instance would be altered to allow for + `dob` (predicate `Person.dob: datetime .` is added, and `Person.name` becomes + `Person.name: string @index(regexp).`) and indexes are rebuilt to allow the + regexp search. +- The `health` is unchanged. +- Querying the `/admin` endpoint for `getGQLSchema` would return the updated + schema. + +## Removing indexes from a schema + +Adding a schema through GraphQL doesn't remove existing data (it only removes +indexes). + +For example, starting from the schema in the previous section and modifying it +with the initial schema + +```graphql +type Person { + name: String +} +``` + +would have the following effects: + +- The `/graphql` endpoint would refresh to serve the schema built from this + type. +- Thus, field `dob` would no longer be accessible, and there would be no search + available on `name`. +- The search index on `name` in Dgraph would be removed. +- The predicate `dob` in Dgraph would be left untouched (the predicate remains + and no data is deleted). diff --git a/dgraph/reference/graphql/custom/custom-dql.mdx b/dgraph/reference/graphql/custom/custom-dql.mdx new file mode 100644 index 00000000..f53666f8 --- /dev/null +++ b/dgraph/reference/graphql/custom/custom-dql.mdx @@ -0,0 +1,140 @@ +--- +title: Custom DQL +description: + Dgraph Query Language (DQL) includes support for custom logic. Specify the DQL + query you want to execute and the Dgraph GraphQL API will execute it +--- + +Dgraph Query Language ([DQL](./dql)) lets you build custom resolvers logic that +goes beyond what is possible with the current GraphQL CRUD API. + +To define a DQL custom query, use the notation: + +```graphql + @custom(dql: """ + ... + """) +``` + + + Since v21.03, you can also [subscribe to custom + DQL](/graphql/subscriptions/#subscriptions-to-custom-dql) queries. + + +For example, lets say you had following schema: + +```graphql +type Tweets { + id: ID! + text: String! @search(by: [fulltext]) + author: User + timestamp: DateTime! @search +} +type User { + screen_name: String! @id + followers: Int @search + tweets: [Tweets] @hasInverse(field: author) +} +``` + +and you wanted to query tweets containing some particular text sorted by the +number of followers their author has. Then, this is not possible with the +automatically generated CRUD API. Similarly, let's say you have a table sort of +UI component in your application which displays only a user's name and the +number of tweets done by that user. Doing this with the auto-generated CRUD API +would require you to fetch unnecessary data at client side, and then employ +client side logic to find the count. Instead, all this could simply be achieved +by specifying a DQL query for such custom use-cases. + +So, you would need to modify your schema like this: + +```graphql +type Tweets { + id: ID! + text: String! @search(by: [fulltext]) + author: User + timestamp: DateTime! @search +} +type User { + screen_name: String! @id + followers: Int @search + tweets: [Tweets] @hasInverse(field: author) +} +type UserTweetCount @remote { + screen_name: String + tweetCount: Int +} + +type Query { + queryTweetsSortedByAuthorFollowers(search: String!): [Tweets] + @custom( + dql: """ + query q($search: string) { + var(func: type(Tweets)) @filter(anyoftext(Tweets.text, $search)) { + Tweets.author { + followers as User.followers + } + authorFollowerCount as sum(val(followers)) + } + queryTweetsSortedByAuthorFollowers(func: uid(authorFollowerCount), orderdesc: val(authorFollowerCount)) { + id: uid + text: Tweets.text + author: Tweets.author { + screen_name: User.screen_name + followers: User.followers + } + timestamp: Tweets.timestamp + } + } + """ + ) + + queryUserTweetCounts: [UserTweetCount] + @custom( + dql: """ + query { + queryUserTweetCounts(func: type(User)) { + screen_name: User.screen_name + tweetCount: count(User.tweets) + } + } + """ + ) +} +``` + +Now, if you run following query, it would fetch you the tweets containing +"GraphQL" in their text, sorted by the number of followers their author has: + +```graphql +query { + queryTweetsSortedByAuthorFollowers(search: "GraphQL") { + text + } +} +``` + +There are following points to note while specifying the DQL query for such +custom resolvers: + +- The name of the DQL query that you want to map to the GraphQL response, should + be same as the name of the GraphQL query. +- You must use proper aliases inside DQL queries to map them to the GraphQL + response. +- If you are using variables in DQL queries, their names should be same as the + name of the arguments for the GraphQL query. +- For variables, only scalar GraphQL arguments like `Boolean`, `Int`, `Float`, + etc are allowed. Lists and Object types are not allowed to be used as + variables with DQL queries. +- You would be able to query only those many levels with GraphQL which you have + mapped with the DQL query. For instance, in the first custom query above, we + haven't mapped an author's tweets to GraphQL alias, so, we won't be able to + fetch author's tweets using that query. +- If the custom GraphQL query returns an interface, and you want to use + `__typename` in GraphQL query, then you should add `dgraph.type` as a field in + DQL query without any alias. This is not required for types, only for + interfaces. +- to subscribe to a custom DQL query, use the `@withSubscription` directive. See + the [Subscriptions article](/graphql/subscriptions/) for more information. + +--- diff --git a/dgraph/reference/graphql/custom/custom-overview.mdx b/dgraph/reference/graphql/custom/custom-overview.mdx new file mode 100644 index 00000000..126bf908 --- /dev/null +++ b/dgraph/reference/graphql/custom/custom-overview.mdx @@ -0,0 +1,60 @@ +--- +title: Custom Resolvers Overview +description: + Dgraph creates a GraphQL API from nothing more than GraphQL types. To + customize the behavior of your schema, you can implement custom resolvers. +--- + +Dgraph creates a GraphQL API from nothing more than GraphQL types. That's great, +and gets you moving fast from an idea to a running app. However, at some point, +as your app develops, you might want to customize the behavior of your schema. + +In Dgraph, you do that with code (in any language you like) that implements +custom resolvers. + +Dgraph doesn't execute your custom logic itself. It makes external HTTP +requests. That means, you can deploy your custom logic into the same Kubernetes +cluster as your Dgraph instance, deploy and call, for example, AWS Lambda +functions, or even make calls to existing HTTP and GraphQL endpoints. + +## The `@custom` directive + +There are three places you can use the `@custom` directive and thus tell Dgraph +where to apply custom logic. + +1. You can add custom queries to the Query type + +```graphql +type Query { + myCustomQuery(...): QueryResultType @custom(...) +} +``` + +2. You can add custom mutations to the Mutation type + +```graphql +type Mutation { + myCustomMutation(...): MutationResult @custom(...) +} +``` + +3. You can add custom fields to your types + +```graphql +type MyType { + ... + customField: FieldType @custom(...) + ... +} +``` + +## Learn more + +Find out more about the `@custom` directive [here](/graphql/custom/directive), +or check out: + +- [custom query examples](/graphql/custom/query) +- [custom mutation examples](/graphql/custom/mutation), or +- [custom field examples](/graphql/custom/field) + +--- diff --git a/dgraph/reference/graphql/custom/directive.mdx b/dgraph/reference/graphql/custom/directive.mdx new file mode 100644 index 00000000..a40cf3eb --- /dev/null +++ b/dgraph/reference/graphql/custom/directive.mdx @@ -0,0 +1,562 @@ +--- +title: The @custom Directive +description: + The @custom directive is used to define custom queries, mutations, and fields. + The result types can be local or remote. +--- + +The `@custom` directive is used to define custom queries, mutations and fields. + +In all cases, the result type (of the query, mutation or field) can be either: + +- a type that's stored in Dgraph (that's any type you've defined in your + schema), or +- a type that's not stored in Dgraph and is marked with the `@remote` directive. + +Because the result types can be local or remote, you can call other HTTP +endpoints, call remote GraphQL, or even call back to your Dgraph instance to add +extra logic on top of Dgraph's graph search or mutations. + +Here's the GraphQL definition of the directives: + +```graphql +directive @custom(http: CustomHTTP) on FIELD_DEFINITION +directive @remote on OBJECT | INTERFACE + +input CustomHTTP { + url: String! + method: HTTPMethod! + body: String + graphql: String + mode: Mode + forwardHeaders: [String!] + secretHeaders: [String!] + introspectionHeaders: [String!] + skipIntrospection: Boolean +} + +enum HTTPMethod { + GET + POST + PUT + PATCH + DELETE +} +enum Mode { + SINGLE + BATCH +} +``` + +Each definition of custom logic must include: + +- the `url` where the custom logic is called. This can include a path and + parameters that depend on query/mutation arguments or other fields. +- the HTTP `method` to use in the call. For example, when calling a REST + endpoint with `GET`, `POST`, etc. + +Optionally, the custom logic definition can also include: + +- a `body` definition that can be used to construct a HTTP body from from + arguments or fields. +- a list of `forwardHeaders` to take from the incoming request and add to the + outgoing HTTP call. Used, for example, if the incoming request contains an + auth token that must be passed to the custom logic. +- a list of `secretHeaders` to take from the `Dgraph.Secret` defined in the + schema file and add to the outgoing HTTP call. Used, for example, for a server + side API key and other static value that must be passed to the custom logic. +- the `graphql` query/mutation to call if the custom logic is a GraphQL server + and whether to introspect or not (`skipIntrospection`) the remote GraphQL + endpoint. +- `mode` which is used for resolving fields by calling an external GraphQL + query/mutation. It can either be `BATCH` or `SINGLE`. +- a list of `introspectionHeaders` to take from the `Dgraph.Secret` + [object](#dgraphsecret) defined in the schema file. They're added to the + introspection requests sent to the endpoint. + +The result type of custom queries and mutations can be any object type in your +schema, including `@remote` types. For custom fields the type can be object +types or scalar types. + +The `method` can be any of the HTTP methods: `GET`, `POST`, `PUT`, `PATCH`, or +`DELETE`, and `forwardHeaders` is a list of headers that should be passed from +the incoming request to the outgoing HTTP custom request. Let's look at each of +the other `http` arguments in detail. + +## Dgraph.Secret + +Sometimes you might want to forward some static headers to your custom API which +can't be exposed to the client. This could be an API key from a payment +processor or an auth token for your organization on GitHub. These secrets can be +specified as comments in the schema file and then can be used in `secretHeaders` +and `introspectionHeaders` while defining the custom directive for a +field/query. + +```graphql +type Query { + getTopUsers(id: ID!): [User] + @custom( + http: { + url: "http://api.github.com/topUsers" + method: "POST" + introspectionHeaders: ["Github-Api-Token"] + secretHeaders: ["Authorization:Github-Api-Token"] + graphql: "..." + } + ) +} + +# Dgraph.Secret Github-Api-Token "long-token" +``` + +In the above request, `Github-Api-Token` would be sent as a header with value +`long-token` for the introspection request. For the actual `/graphql` request, +the `Authorization` header would be sent with the value `long-token`. + + + `Authorization:Github-Api-Token` syntax tells us to use the value for + `Github-Api-Token` from `Dgraph.Secret` and forward it to the custom API with + the header key as `Authorization`. + + +## The URL and method + +The URL can be as simple as a fixed URL string, or include details drawn from +the arguments or fields. + +A simple string might look like: + +```graphql +type Query { + myCustomQuery: MyResult + @custom(http: { url: "https://my.api.com/theQuery", method: GET }) +} +``` + +While, in more complex cases, the arguments of the query/mutation can be used as +a pattern for the URL: + +```graphql +type Query { + myGetPerson(id: ID!): Person + @custom(http: { url: "https://my.api.com/person/$id", method: GET }) + + getPosts(authorID: ID!, numToFetch: Int!): [Post] + @custom( + http: { + url: "https://my.api.com/person/$authorID/posts?limit=$numToFetch" + method: GET + } + ) +} +``` + +In this case, a query like + +```graphql +query { + getPosts(authorID: "auth123", numToFetch: 10) { + title + } +} +``` + +gets transformed to an outgoing HTTP GET request to the URL +`https://my.api.com/person/auth123/posts?limit=10`. + +When using custom logic on fields, the URL can draw from other fields in the +type. For example: + +```graphql +type User { + username: String! @id + ... + posts: [Post] @custom(http: { + url: "https://my.api.com/person/$username/posts", + method: GET + }) +} +``` + +Note that: + +- Fields or arguments used in the path of a URL, such as `username` or + `authorID` in the examples above, must be marked as non-nullable (have `!` in + their type); whereas, those used in parameters, such as `numToFetch`, can be + nullable. +- Currently, only scalar fields or arguments are allowed to be used in URLs or + bodies; though, see body below, this doesn't restrict the objects you can + construct and pass to custom logic functions. +- Currently, the body can only contain alphanumeric characters in the key and + other characters like `_` are not yet supported. +- Currently, constant values are not also not allowed in the body template. This + would soon be supported. + +## The body + +Many HTTP requests, such as add and update operations on REST APIs, require a +JSON formatted body to supply the data. In a similar way to how `url` allows +specifying a url pattern to use in resolving the custom request, Dgraph allows a +`body` pattern that is used to build HTTP request bodies. + +For example, this body can be structured JSON that relates a mutation's +arguments to the JSON structure required by the remote endpoint. + +```graphql +type Mutation { + newMovie(title: String!, desc: String, dir: ID, imdb: ID): Movie @custom(http: { + url: "http://myapi.com/movies", + method: "POST", + body: "{ title: $title, imdbID: $imdb, storyLine: $desc, director: { id: $dir }}", + }) +``` + +A request with +`newMovie(title: "...", desc: "...", dir: "dir123", imdb: "tt0120316")` is +transformed into a `POST` request to `http://myapi.com/movies` with a JSON body +of: + +```json +{ + "title": "...", + "imdbID": "tt0120316", + "storyLine": "...", + "director": { + "id": "dir123" + } +} +``` + +`url` and `body` templates can be used together in a single custom definition. + +For both `url` and `body` templates, any non-null arguments or fields must be +present to evaluate the custom logic. And the following rules are applied when +building the request from the template for nullable arguments or fields. + +- If the value of a nullable argument is present, it's used in the template. +- If a nullable argument is present, but null, then in a body `null` is + inserted, while in a url nothing is added. For example, if the `desc` argument + above is null then `{ ..., storyLine: null, ...}` is constructed for the body. + Whereas, in a URL pattern like `https://a.b.c/endpoint?arg=$gqlArg`, if + `gqlArg` is present, but null, the generated URL is + `https://a.b.c/endpoint?arg=`. +- If a nullable argument is not present, nothing is added to the URL/body. That + would mean the constructed body would not contain `storyLine` if the `desc` + argument is missing, and in `https://a.b.c/endpoint?arg=$gqlArg` the result + would be `https://a.b.c/endpoint` if `gqlArg` were not present in the request + arguments. + +## Calling GraphQL custom resolvers + +Custom queries, mutations and fields can be implemented by custom GraphQL +resolvers. In this case, use the `graphql` argument to specify which +query/mutation on the remote server to call. The syntax includes if the call is +a query or mutation, the arguments, and what query/mutation to use on the remote +endpoint. + +For example, you can pass arguments to queries onward as arguments to remote +GraphQL endpoints: + +```graphql +type Query { + getPosts(authorID: ID!, numToFetch: Int!): [Post] + @custom( + http: { + url: "https://my.api.com/graphql" + method: POST + graphql: "query($authorID: ID!, $numToFetch: Int!) { posts(auth: $authorID, first: $numToFetch) }" + } + ) +} +``` + +You can also define your own inputs and pass those to the remote GraphQL +endpoint. + +```graphql +input NewMovieInput { ... } + +type Mutation { + newMovie(input: NewMovieInput!): Movie @custom(http: { + url: "http://movies.com/graphql", + method: "POST", + graphql: "mutation($input: NewMovieInput!) { addMovie(data: $input) }", + }) +``` + +When a schema is uploaded, Dgraph will try to introspect the remote GraphQL +endpoints on any custom logic that uses the `graphql` argument. From the results +of introspection, it tries to match up arguments, input and object types to +ensure that the calls to and expected responses from the remote GraphQL make +sense. + +If that introspection isn't possible, set `skipIntrospection: true` in the +custom definition and Dgraph won't perform GraphQL schema introspection for this +custom definition. + +## Remote types + +Any type annotated with the `@remote` directive is not stored in Dgraph. This +allows your Dgraph GraphQL instance to serve an API that includes both data +stored locally and data stored or generated elsewhere. You can also use custom +fields, for example, to join data from disparate datasets. + +Remote types can only be returned by custom resolvers and Dgraph won't generate +any search or CRUD operations for remote types. + +The schema definition used to define your Dgraph GraphQL API must include +definitions of all the types used. If a custom logic call returns a type not +stored in Dgraph, then that type must be added to the Dgraph schema with the +`@remote` directive. + +For example, you api might use custom logic to integrate with GitHub, using +either `https://api.github.com` or the GitHub GraphQL api +`https://api.github.com/graphql` and calling the `user` query. Either way, your +GraphQL schema will need to include the type you expect back from that remote +call. That could be linking a `User` as stored in your Dgraph instance with the +`Repository` data from GitHub. With `@remote` types, that's as simple as adding +the type and custom call to your schema. + +```graphql +# GitHub's repository type +type Repository @remote { ... } + +# Dgraph user type +type User { + # local user name = GitHub id + username: String! @id + + # ... + # other data stored in Dgraph + # ... + + # join local data with remote + repositories: [Repository] @custom(http: { + url: "https://api.github.com/users/$username/repos", + method: GET + }) +} +``` + +Just defining the connection is all it takes and then you can ask a single +GraphQL query that performs a local query and joins with (potentially many) +remote data sources. + +### RemoteResponse directive + +In combination with the `@remote` directive, in a GraphQL schema you can also +use the `@remoteResponse` directive. You can define the `@remoteResponse` +directive on the fields of a `@remote` type in order to map the JSON key +response of a custom query to a GraphQL field. + +For example, in the given GraphQL schema there's a defined custom DQL query, +whose JSON response contains the results of the `groupby` clause in the +`@groupby` key. By using the `@remoteResponse` directive you'll map the +`groupby` field in `GroupUserMapQ` type to the `@groupby` key in the JSON +response: + +```graphql +type User { + screen_name: String! @id + followers: Int @search + tweets: [Tweets] @hasInverse(field: user) +} +type UserTweetCount @remote { + screen_name: String + tweetCount: Int +} +type UserMap @remote { + followers: Int + count: Int +} +type GroupUserMapQ @remote { + groupby: [UserMap] @remoteResponse(name: "@groupby") +} +``` + +it's possible to define the following `@custom` DQL query: + +```graphql +queryUserKeyMap: [GroupUserMapQ] @custom(dql: """ +{ + queryUserKeyMap(func: type(User)) @groupby(followers: User.followers) { + count(uid) + } +} +""") +``` + +## How Dgraph processes custom results + +Given types like + +```graphql +type Post @remote { + id: ID! + title: String! + datePublished: DateTime + author: Author +} + +type Author { ... } +``` + +and a custom query + +```graphql +type Query { + getCustomPost(id: ID!): Post + @custom(http: { url: "https://my.api.com/post/$id", method: GET }) + + getPosts(authorID: ID!, numToFetch: Int!): [Post] + @custom( + http: { + url: "https://my.api.com/person/$authorID/posts?limit=$numToFetch" + method: GET + } + ) +} +``` + +Dgraph turns the `getCustomPost` query into a HTTP request to +`https://my.api.com/post/$id` and expects a single JSON object with fields `id`, +`title`, `datePublished` and `author` as result. Any additional fields are +ignored, while if non-nullable fields (like `id` and `title`) are missing, +GraphQL error propagation will be triggered. + +For `getPosts`, Dgraph expects the HTTP call to +`https://my.api.com/person/$authorID/posts?limit=$numToFetch` to return a JSON +array of JSON objects, with each object matching the `Post` type as described +above. + +If the custom resolvers are GraphQL calls, like: + +```graphql +type Query { + getCustomPost(id: ID!): Post + @custom( + http: { + url: "https://my.api.com/graphql" + method: POST + graphql: "query(id: ID) { post(postID: $id) }" + } + ) + + getPosts(authorID: ID!, numToFetch: Int!): [Post] + @custom( + http: { + url: "https://my.api.com/graphql" + method: POST + graphql: "query(id: ID) { postByAuthor(authorID: $id, first: $numToFetch) }" + } + ) +} +``` + +then Dgraph expects a GraphQL call to `post` to return a valid GraphQL result +like `{ "data": { "post": {...} } }` and will use the JSON object that is the +value of `post` as the data resolved by the request. + +Similarly, Dgraph expects `postByAuthor` to return data like +`{ "data": { "postByAuthor": [ {...}, ... ] } }` and will use the array value of +`postByAuthor` to build its array of posts result. + +## How errors from custom endpoints are handled + +When a query returns an error while resolving from a custom HTTP endpoint, the +error is added to the `errors` array and sent back to the user in the JSON +response. + +When a field returns an error while resolving a custom HTTP endpoint, the +field's value becomes `null` and the error is added to the `errors` JSON array. +The rest of the fields are still resolved as required by the request. + +For example, a query from a custom HTTP endpoint will return an error in the +following format: + +```json +{ + "errors": [ + { + "message": "Rest API returns Error for myFavoriteMovies query", + "locations": [ + { + "line": 5, + "column": 4 + } + ], + "path": ["Movies", "name"] + } + ] +} +``` + +## How custom fields are resolved + +When evaluating a request that includes custom fields, Dgraph might run multiple +resolution stages to resolve all the fields. Dgraph must also ensure it requests +enough data to forfull the custom fields. For example, given the `User` type +defined as: + +```graphql +type User { + username: String! @id + ... + posts: [Post] @custom(http: { + url: "https://my.api.com/person/$username/posts", + method: GET + }) +} +``` + +a query such as: + +```graphql +query { + queryUser { + username + posts + } +} +``` + +is executed by first querying in Dgraph for `username` and then using the result +to resolve the custom field `posts` (which relies on `username`). For a request +like: + +```graphql +query { + queryUser { + posts + } +} +``` + +Dgraph works out that it must first get `username` so it can run the custom +field `posts`, even though `username` isn't part of the original query. So +Dgraph retrieves enough data to satisfy the custom request, even if that +involves data that isn't asked for in the query. + +There are currently a few limitations on custom fields: + +- each custom call must include either an `ID` or `@id` field +- arguments are not allowed (soon custom field arguments will be allowed and + will be used in the `@custom` directive in the same manner as for custom + queries and mutations), and +- a custom field can't depend on another custom field (longer term, we intend to + lift this restriction). + +## Restrictions / Roadmap + +Our custom logic is still in beta and we are improving it quickly. Here's a few +points that we plan to work on soon: + +- adding arguments to custom fields +- relaxing the restrictions on custom fields using id values +- iterative evaluation of `@custom` and `@remote` - in the current version you + can't have `@custom` inside an `@remote` type once we add this, you'll be able + to extend remote types with custom fields, and +- allowing fine tuning of the generated API, for example removing of customizing + the generated CRUD mutations. + +--- diff --git a/dgraph/reference/graphql/custom/field.mdx b/dgraph/reference/graphql/custom/field.mdx new file mode 100644 index 00000000..b11fbd60 --- /dev/null +++ b/dgraph/reference/graphql/custom/field.mdx @@ -0,0 +1,93 @@ +--- +title: Custom Fields +description: + Custom fields allow you to extend your types with custom logic as well as make + joins between your local data and remote data. +--- + +Custom fields allow you to extend your types with custom logic as well as make +joins between your local data and remote data. + +Let's say we are building an app for managing projects. Users will login with +their GitHub id and we want to connect some data about their work stored in +Dgraph with say their GitHub profile, issues, etc. + +Our first version of our users might start out with just their GitHub username +and some data about what projects they are working on. + +```graphql +type User { + username: String! @id + projects: [Project] + tickets: [Ticket] +} +``` + +We can then add their GitHub repositories by just extending the definitions with +the types and custom field needed to make the remote call. + +```graphql +# GitHub's repository type +type Repository @remote { ... } + +# Dgraph user type +type User { + # local user name = GitHub id + username: String! @id + + # join local data with remote + repositories: [Repository] @custom(http: { + url: "https://api.github.com/users/$username/repos", + method: GET + }) +} +``` + +We could similarly join with say the GitHub user details, or open pull requests, +to further fill out the join between GitHub and our local data. Instead of the +REST API, let's use the GitHub GraphQL endpoint + +```graphql +# GitHub's User type +type GitHubUser @remote { ... } + +# Dgraph user type +type User { + # local user name = GitHub id + username: String! @id + + # join local data with remote + gitDetails: GitHubUser @custom(http: { + url: "https://api.github.com/graphql", + method: POST, + graphql: "query(username: String!) { user(login: $username) }", + skipIntrospection: true + }) +} +``` + +Perhaps our app has some measure of their velocity that's calculated by a custom +function that looks at both their GitHub commits and some other places where +work is added. Soon we'll have a schema where we can render a user's home page, +the projects they work on, their open tickets, their GitHub details, etc. in a +single request that queries across multiple sources and can mix Dgraph filtering +with external calls. + +```graphql +query { + getUser(id: "aUser") { + username + projects(order: { asc: lastUpdate }, first: 10) { + projectName + } + tickets { + connectedGitIssue { ... } + } + velocityMeasure + gitDetails { ... } + repositories { ... } + } +} +``` + +--- diff --git a/dgraph/reference/graphql/custom/index.mdx b/dgraph/reference/graphql/custom/index.mdx new file mode 100644 index 00000000..fa9746ab --- /dev/null +++ b/dgraph/reference/graphql/custom/index.mdx @@ -0,0 +1,3 @@ +--- +title: Custom Resolvers +--- diff --git a/dgraph/reference/graphql/custom/mutation.mdx b/dgraph/reference/graphql/custom/mutation.mdx new file mode 100644 index 00000000..c4255401 --- /dev/null +++ b/dgraph/reference/graphql/custom/mutation.mdx @@ -0,0 +1,69 @@ +--- +title: Custom Mutations +description: + With custom mutations, you can use custom logic to define values for one or + more fields in a mutation +--- + +With custom mutations, you can use custom logic to define values for one or more +fields in a mutation. + +Let's say we have an application about authors and posts. Logged in authors can +add posts, but we want to do some input validation and add extra value when a +post is added. The key types might be as follows. + +```graphql +type Author { ... } + +type Post { + id: ID! + title: String + text: String + datePublished: DateTime + author: Author + ... +} +``` + +Dgraph generates an `addPost` mutation from those types, but we want to do +something extra. We don't want the `author` field to come in with the mutation, +that should get filled in from the JWT of the logged in user. Also, the +`datePublished` shouldn't be in the input; it should be set as the current time +at point of mutation. Maybe we also have some community guidelines about what +might constitute an offensive `title` or `text` in a post. Maybe users can only +post if they have enough community credit. + +We'll need custom code to do all that, so we can write a custom function that +takes in only the title and text of the new post. Internally, it can check that +the title and text satisfy the guidelines and that this user has enough credit +to make a post. If those checks pass, it then builds a full post object by +adding the current time as the `datePublished` and adding the `author` from the +JWT information it gets from the forward header. It can then call the `addPost` +mutation constructed by Dgraph to add the post into Dgraph and returns the +resulting post as its GraphQL output. + +So as well as the types above, we need a custom mutation: + +```graphql +type Mutation { + newPost(title: String!, text: String): Post + @custom( + http: { + url: "https://my.api.com/addPost" + method: "POST" + body: "{ postText: $text, postTitle: $title }" + forwardHeaders: ["AuthHdr"] + } + ) +} +``` + +## Learn more + +Find out more about how to turn off generated mutations and protecting mutations +with authorization rules at: + +- [Remote Types - Turning off Generated Mutations with `@remote` Directive](./directive/#remote-types) +- [Securing Mutations with the `@auth` Directive](./graphql/security/mutations.md) + +--- diff --git a/dgraph/reference/graphql/custom/query.mdx b/dgraph/reference/graphql/custom/query.mdx new file mode 100644 index 00000000..485c68b5 --- /dev/null +++ b/dgraph/reference/graphql/custom/query.mdx @@ -0,0 +1,90 @@ +--- +title: Custom Queries +description: + A custom query takes any number of scalar arguments and constructs the path, + parameters, and body of the request that's sent to the remote endpoint. +--- + +Let's say we want to integrate our app with an existing external REST API. +There's a few things we need to know: + +- The URL of the API, the path and any parameters required +- The shape of the resulting JSON data +- The method (GET, POST, etc.), and +- What authorization we need to pass to the external endpoint + +The custom query can take any number of scalar arguments and use those to +construct the path, parameters and body (we'll see an example of that in the +custom mutation section) of the request that gets sent to the remote endpoint. + +In an app, you'd deploy an endpoint that does some custom work and returns data +that's used in your UI, or you'd wrap some logic or call around an existing +endpoint. So that we can walk through a whole example, let's use the Twitter +API. + +To integrate a call that returns the data of Twitter user with our app, all we +need to do is add the expected result type `TwitterUser` and set up a custom +query: + +```graphql +type TwitterUser @remote { + id: ID! + name: String + screen_name: String + location: String + description: String + followers_count: Int + ... +} + +type Query{ + getCustomTwitterUser(name: String!): TwitterUser @custom(http:{ + url: "https://api.twitter.com/1.1/users/show.json?screen_name=$name" + method: "GET", + forwardHeaders: ["Authorization"] + }) +} +``` + +Dgraph will then be able to accept a GraphQL query like + +```graphql +query { + getCustomTwitterUser(name: "dgraphlabs") { + location + description + followers_count + } +} +``` + +construct a HTTP GET request to +`https://api.twitter.com/1.1/users/show.json?screen_name=dgraphlabs`, attach +header `Authorization` from the incoming GraphQL request to the outgoing HTTP, +and make the call and return a GraphQL result. + +The result JSON of the actual HTTP call will contain the whole object from the +REST endpoint (you can see how much is in the Twitter user object +[here](https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/user-object)). +But, the GraphQL query only asked for some of that, so Dgraph filters out any +returned values that weren't asked for in the GraphQL query and builds a valid +GraphQL response to the query and returns GraphQL. + +```json +{ + "data": { + "getCustomTwitterUser": { "location": ..., "description": ..., "followers_count": ... } + } +} +``` + +Your version of the remote type doesn't have to be equal to the remote type. For +example, if you don't want to allow users to query the full Twitter user, you +include in the type definition only the fields that can be queried. + +All the usual options for custom queries are allowed; for example, you can have +multiple queries in a single GraphQL request and a mix of custom and Dgraph +generated queries, you can get the result compressed by setting +`Accept-Encoding` to `gzip`, etc. + +--- diff --git a/dgraph/reference/graphql/federation/index.mdx b/dgraph/reference/graphql/federation/index.mdx new file mode 100644 index 00000000..9b0938a6 --- /dev/null +++ b/dgraph/reference/graphql/federation/index.mdx @@ -0,0 +1,218 @@ +--- +title: Apollo Federation +description: + Dgraph now supports Apollo federation so that you can create a gateway GraphQL + service that includes the Dgraph GraphQL API and other GraphQL services +--- + +Dgraph supports +[Apollo federation](https://www.apollographql.com/docs/federation/) starting in +release version 21.03. This lets you create a gateway GraphQL service that +includes the Dgraph GraphQL API and other GraphQL services. + +## Support for Apollo federation directives + +The current implementation supports the following five directives: `@key`, +`@extends`, `@external`, `@provides`, and `@requires`. + +### `@key` directive + +This directive takes one field argument inside it: the `@key` field. There are +few limitations on how to use `@key` directives: + +- Users can define the `@key` directive only once for a type +- Support for multiple key fields is not currently available. +- Since the `@key` field acts as a foreign key to resolve entities from the + service where it is extended, the field provided as an argument inside the + `@key` directive should be of `ID` type or have the `@id` directive on it. + +For example - + +```graphql +type User @key(fields: "id") { + id: ID! + name: String +} +``` + +### `@extends` directive + +This directive provides support for extended definitions. For example, if the +above-defined `User` type is defined in some other service, you can extend it in +Dgraph's GraphQL service by using the `@extends` directive, as follows: + +```graphql +type User @key(fields: "id") @extends { + id: String! @id @external + products: [Product] +} +``` + +You can also achieve this with the `extend` keyword; so you have a choice +between two types of syntax to extend a type into your Dgraph GraphQL service: +`extend type User ...` or `type User @extends ...`. + +### `@external` directive + +You use this directive when the given field is not stored in this service. It +can only be used on extended type definitions. For example, it is used in the +example shown above on the `id` field of the `User` type. + +### `@provides` directive + +You use this directive on a field that tells the gateway to return a specific +fieldset from the base type while fetching the field. + +For example - + +```graphql +type Review @key(fields: "id") { + product: Product @provides(fields: "name price") +} + +extend type Product @key(fields: "upc") { + upc: String @external + name: String @external + price: Int @external +} +``` + +While fetching `Review.product` from the `review` service, and if the `name` or +`price` is also queried, the gateway will fetch these from the `review` service +itself. So, the `review` service also resolves these fields, even though both +fields are `@external`. + +### `@requires` directive + +You use this directive on a field to annotate the fieldset of the base type. You +can use it to develop a query plan where the required fields may not be needed +by the client, but the service may need additional information from other +services. + +For example - + +```graphql +extend type User @key(fields: "id") { + id: ID! @external + email: String @external + reviews: [Review] @requires(fields: "email") +} +``` + +When the gateway fetches `user.reviews` from the `review` service, the gateway +will get `user.email` from the `User` service and provide it as an argument to +the `_entities` query. + +Using `@requires` alone on a field doesn't make much sense. In cases where you +need to use `@requires`, you should also add some custom logic on that field. +You can add such logic using the `@lambda` or `@custom(http: {...})` directives. + +Here's an example - + +1. Schema: + +```graphql +extend type User @key(fields: "id") { + id: ID! @external + email: String @external + reviews: [Review] @requires(fields: "email") @lambda +} +``` + +2. Lambda Script: + +```js +// returns a list of reviews for a user +async function userReviews({ parent, graphql }) { + let reviews = [] + // find the reviews for a user using the email and return them. + // Even though the email has been declared `@external`, it will be available as `parent.email` as it is mentioned in `@requires`. + return reviews +} +self.addGraphQLResolvers({ + "User.reviews": userReviews, +}) +``` + +## Generated queries and mutations + +In this section, you will see what all queries and mutations will be available +to individual service and to the Apollo gateway. + +Let's take the below schema as an example - + +```graphql +type Mission @key(fields: "id") { + id: ID! + crew: [Astronaut] + designation: String! + startDate: String + endDate: String +} + +type Astronaut @key(fields: "id") @extends { + id: ID! @external + missions: [Mission] +} +``` + +The queries and mutations which are exposed to the gateway are - + +```graphql +type Query { + getMission(id: ID!): Mission + queryMission( + filter: MissionFilter + order: MissionOrder + first: Int + offset: Int + ): [Mission] + aggregateMission(filter: MissionFilter): MissionAggregateResult +} + +type Mutation { + addMission(input: [AddMissionInput!]!): AddMissionPayload + updateMission(input: UpdateMissionInput!): UpdateMissionPayload + deleteMission(filter: MissionFilter!): DeleteMissionPayload + addAstronaut(input: [AddAstronautInput!]!): AddAstronautPayload + updateAstronaut(input: UpdateAstronautInput!): UpdateAstronautPayload + deleteAstronaut(filter: AstronautFilter!): DeleteAstronautPayload +} +``` + +The queries for `Astronaut` are not exposed to the gateway because they are +resolved through the `_entities` resolver. However, these queries are available +on the Dgraph GraphQL API endpoint. + +## Mutation for `extended` types + +If you want to add an object of `Astronaut` type which is extended in this +service. The mutation `addAstronaut` takes `AddAstronautInput`, which is +generated as follows: + +```graphql +input AddAstronautInput { + id: ID! + missions: [MissionRef] +} +``` + +The `id` field is of `ID` type, which is usually generated internally by Dgraph. +But, In this case, it's provided as an input. The user should provide the same +`id` value that is present in the GraphQL service where the type `Astronaut` is +defined. + +For example, let's assume that the type `Astronaut` is defined in some other +service, `AstronautService`, as follows: + +```graphql +type Astronaut @key(fields: "id") { + id: ID! + name: String! +} +``` + +When adding an object of type `Astronaut`, you should first add it to the +`AstronautService` service. Then, you can call the `addAstronaut` mutation with +the value of `id` provided as an argument that must be equal to the value in +`AstronautService` service. diff --git a/dgraph/reference/graphql/graphql-clients/endpoint/graphql-get-request.mdx b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-get-request.mdx new file mode 100644 index 00000000..3ee3f874 --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-get-request.mdx @@ -0,0 +1,20 @@ +--- +title: GET Request +description: + Get the structure for GraphQL requests and responses, how to enable + compression for them, and configuration options for extensions +--- + +GraphQL request may also be sent using an `HTTP GET` operation. + +\GET requests must be sent in the following format. The query, variables, and +operation are sent as URL-encoded query parameters in the URL. + +``` +http://localhost:8080/graphql?query={...}&variables={...}&operationName=... +``` + +- `query` is mandatory +- `variables` is only required if the query contains GraphQL variables. +- `operationName` is only required if there are multiple operations in the + query; in which case, operations must also be named. diff --git a/dgraph/reference/graphql/graphql-clients/endpoint/graphql-request.mdx b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-request.mdx new file mode 100644 index 00000000..0093647b --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-request.mdx @@ -0,0 +1,386 @@ +--- +title: POST Request +description: + Get the structure for GraphQL requests and responses, how to enable + compression for them, and configuration options for extensions +--- + +## POST `/graphql` + +### Headers + +| Header | Optionality | Value | +| :-------------------------------------- | :--------------------------------------- | :------------------------------------------------------------------------------------------- | +| Content-Type | mandatory | `application/graphql` or `application/json` | +| Content-Encoding | optional | `gzip` to send compressed data | +| Accept-Encoding | optional | `gzip` to enabled data compression on response | +| X-Dgraph-AccessToken | if `ACL` is enabled | pass the access token you got in the login response to access predicates protected by an ACL | +| X-Auth-Token | if `anonymous access` is disabled | Admin Key or Client key | +| header as set in `Dgraph.Authorization` | if GraphQL `Dgraph.Authorization` is set | valid JWT used by @auth directives | + + + Refer to GraphQL [security](./graphql/security) settings for explanations + about `anonymous access` and `Dgraph.Authorization`. + + +### Payload format + +POST requests sent with the Content-Type header `application/graphql` must have +a POST body content as a GraphQL query string. For example, the following is a +valid POST body for a query: + +```graphql +query { + getTask(id: "0x3") { + id + title + completed + user { + username + name + } + } +} +``` + +POST requests sent with the Content-Type header `application/json` must have a +POST body in the following JSON format: + +```json +{ + "query": "...", + "operationName": "...", + "variables": { "var": "val", ... } +} +``` + +GraphQL requests can contain one or more operations. Operations include `query`, +`mutation`, or `subscription`. If a request only has one operation, then it can +be unnamed like the following: + +## Single Operation + +The most basic request contains a single anonymous (unnamed) operation. Each +operation can have one or more queries within in. For example, the following +query has `query` operation running the queries "getTask" and "getUser": + +```graphql +query { + getTask(id: "0x3") { + id + title + completed + } + getUser(username: "dgraphlabs") { + username + } +} +``` + +Response: + +```json +{ + "data": { + "getTask": { + "id": "0x3", + "title": "GraphQL docs example", + "completed": true + }, + "getUser": { + "username": "dgraphlabs" + } + } +} +``` + +You can optionally name the operation as well, though it's not required if the +request only has one operation as it's clear what needs to be executed. + +### Query Shorthand + +If a request only has a single query operation, then you can use the short-hand +form of omitting the "query" keyword: + +```graphql +{ + getTask(id: "0x3") { + id + title + completed + } + getUser(username: "dgraphlabs") { + username + } +} +``` + +This simplifies queries when a query doesn't require an operation name or +[variables](/graphql/api/variables). + +## Multiple Operations + +If a request has two or more operations, then each operation must have a name. A +request can only execute one operation, so you must also include the operation +name to execute in the request (see the "operations" field for +[requests](/graphql/api/requests)). Every operation name in a request must be +unique. + +For example, in the following request has the operation names "getTaskAndUser" +and "completedTasks". + +```graphql +query getTaskAndUser { + getTask(id: "0x3") { + id + title + completed + } + queryUser(filter: { username: { eq: "dgraphlabs" } }) { + username + name + } +} + +query completedTasks { + queryTask(filter: { completed: true }) { + title + completed + } +} +``` + +When executing the following request (as an HTTP POST request in JSON format), +specifying the "getTaskAndUser" operation executes the first query: + +```json +{ + "query": "query getTaskAndUser { getTask(id: \"0x3\") { id title completed } queryUser(filter: {username: {eq: \"dgraphlabs\"}}) { username name }\n}\n\nquery completedTasks { queryTask(filter: {completed: true}) { title completed }}", + "operationName": "getTaskAndUser" +} +``` + +```json +{ + "data": { + "getTask": { + "id": "0x3", + "title": "GraphQL docs example", + "completed": true + }, + "queryUser": [ + { + "username": "dgraphlabs", + "name": "Dgraph Labs" + } + ] + } +} +``` + +And specifying the "completedTasks" operation executes the second query: + +```json +{ + "query": "query getTaskAndUser { getTask(id: \"0x3\") { id title completed } queryUser(filter: {username: {eq: \"dgraphlabs\"}}) { username name }\n}\n\nquery completedTasks { queryTask(filter: {completed: true}) { title completed }}", + "operationName": "completedTasks" +} +``` + +```json +{ + "data": { + "queryTask": [ + { + "title": "GraphQL docs example", + "completed": true + }, + { + "title": "Show second operation", + "completed": true + } + ] + } +} +``` + +### multiple queries execution + +When an operation contains multiple queries, they are run concurrently and +independently in a Dgraph readonly transaction per query. + +When an operation contains multiple mutations, they are run serially, in the +order listed in the request, and in a transaction per mutation. If a mutation +fails, the following mutations are not executed, and previous mutations are not +rolled back. + +### Variables + +Variables simplify GraphQL queries and mutations by letting you pass data +separately. A GraphQL request can be split into two sections: one for the query +or mutation, and another for variables. + +Variables can be declared after the `query` or `mutation` and are passed like +arguments to a function and begin with `$`. + +#### Query Example + +```graphql +query post($filter: PostFilter) { + queryPost(filter: $filter) { + title + text + author { + name + } + } +} +``` + +**Variables** + +```graphql +{ + "filter": { + "title": { + "eq": "First Post" + } + } +} +``` + +#### Mutation Example + +```graphql +mutation addAuthor($author: AddAuthorInput!) { + addAuthor(input: [$author]) { + author { + name + posts { + title + text + } + } + } +} +``` + +**Variables** + +```graphql +{ + "author": { + "name": "A.N. Author", + "dob": "2000-01-01", + "posts": [{ + "title": "First Post", + "text": "Hello world!" + }] + } +} +``` + +### Fragments + +A GraphQL fragment is associated with a type and is a reusable subset of the +fields from this type. Here, we declare a `postData` fragment that can be used +with any `Post` object: + +```graphql +fragment postData on Post { + id + title + text + author { + username + displayName + } +} +query allPosts { + queryPost(order: { desc: title }) { + ...postData + } +} +mutation addPost($post: AddPostInput!) { + addPost(input: [$post]) { + post { + ...postData + } + } +} +``` + +### Using fragments with interfaces + +It is possible to define fragments on interfaces. Here's an example of a query +that includes in-line fragments: + +**Schema** + +```graphql +interface Employee { + ename: String! +} +interface Character { + id: ID! + name: String! @search(by: [exact]) +} +type Human implements Character & Employee { + totalCredits: Float +} +type Droid implements Character { + primaryFunction: String +} +``` + +**Query** + +```graphql +query allCharacters { + queryCharacter { + name + __typename + ... on Human { + totalCredits + } + ... on Droid { + primaryFunction + } + } +} +``` + +The `allCharacters` query returns a list of `Character` objects. Since `Human` +and `Droid` implements the `Character` interface, the fields in the result would +be returned according to the type of object. + +**Result** + +```graphql +{ + "data": { + "queryCharacter": [ + { + "name": "Human1", + "__typename": "Human", + "totalCredits": 200.23 + }, + { + "name": "Human2", + "__typename": "Human", + "totalCredits": 2.23 + }, + { + "name": "Droid1", + "__typename": "Droid", + "primaryFunction": "Code" + }, + { + "name": "Droid2", + "__typename": "Droid", + "primaryFunction": "Automate" + } + ] + } +} +``` diff --git a/dgraph/reference/graphql/graphql-clients/endpoint/graphql-response.mdx b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-response.mdx new file mode 100644 index 00000000..2e7b521f --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/endpoint/graphql-response.mdx @@ -0,0 +1,211 @@ +--- +title: HTTP Response +description: + Get the structure for GraphQL requests and responses, how to enable + compression for them, and configuration options for extensions +--- + +### Responses + +All responses, including errors, always return HTTP 200 OK status codes. + +The response is a JSON map including the fields `"data"`, `"errors"`, or +`"extensions"` following the GraphQL specification. They follow the following +formats. + +Successful queries are in the following format: + +```json +{ + "data": { ... }, + "extensions": { ... } +} +``` + +Queries that have errors are in the following format. + +```json +{ + "errors": [ ... ], +} +``` + +#### "data" field + +The "data" field contains the result of your GraphQL request. The response has +exactly the same shape as the result. For example, notice that for the following +query, the response includes the data in the exact shape as the query. + +Query: + +```graphql +query { + getTask(id: "0x3") { + id + title + completed + user { + username + name + } + } +} +``` + +Response: + +```json +{ + "data": { + "getTask": { + "id": "0x3", + "title": "GraphQL docs example", + "completed": true, + "user": { + "username": "dgraphlabs", + "name": "Dgraph Labs" + } + } + } +} +``` + +#### "errors" field + +The "errors" field is a JSON list where each entry has a `"message"` field that +describes the error and optionally has a `"locations"` array to list the +specific line and column number of the request that points to the error +described. For example, here's a possible error for the following query, where +`getTask` needs to have an `id` specified as input: + +Query: + +```graphql +query { + getTask() { + id + } +} +``` + +Response: + +```json +{ + "errors": [ + { + "message": "Field \"getTask\" argument \"id\" of type \"ID!\" is required but not provided.", + "locations": [ + { + "line": 2, + "column": 3 + } + ] + } + ] +} +``` + +#### Error propagation + +Before returning query and mutation results, Dgraph uses the types in the schema +to apply GraphQL +[value completion](https://graphql.github.io/graphql-spec/June2018/#sec-Value-Completion) +and +[error handling](https://graphql.github.io/graphql-spec/June2018/#sec-Errors-and-Non-Nullability). +That is, `null` values for non-nullable fields, e.g. `String!`, cause error +propagation to parent fields. + +In short, the GraphQL value completion and error propagation mean the following. + +- Fields marked as nullable (i.e. without `!`) can return `null` in the json + response. +- For fields marked as non-nullable (i.e. with `!`) Dgraph never returns null + for that field. +- If an instance of type has a non-nullable field that has evaluated to null, + the whole instance results in null. +- Reducing an object to null might cause further error propagation. For example, + querying for a post that has an author with a null name results in null: the + null name (`name: String!`) causes the author to result in null, and a null + author causes the post (`author: Author!`) to result in null. +- Error propagation for lists with nullable elements, e.g. `friends [Author]`, + can result in nulls inside the result list. +- Error propagation for lists with non-nullable elements results in null for + `friends [Author!]` and would cause further error propagation for + `friends [Author!]!`. + +Note that, a query that results in no values for a list will always return the +empty list `[]`, not `null`, regardless of the nullability. For example, given a +schema for an author with `posts: [Post!]!`, if an author has not posted +anything and we queried for that author, the result for the posts field would be +`posts: []`. + +A list can, however, result in null due to GraphQL error propagation. For +example, if the definition is `posts: [Post!]`, and we queried for an author who +has a list of posts. If one of those posts happened to have a null title (title +is non-nullable `title: String!`), then that post would evaluate to null, the +`posts` list can't contain nulls and so the list reduces to null. + +#### "extensions" field + +The "extensions" field contains extra metadata for the request with metrics and +trace information for the request. + +- `"touched_uids"`: The number of nodes that were touched to satisfy the + request. This is a good metric to gauge the complexity of the query. +- `"tracing"`: Displays performance tracing data in [Apollo + Tracing][apollo-tracing] format. This includes the duration of the whole query + and the duration of each operation. + +[apollo-tracing]: https://github.com/apollographql/apollo-tracing + +Here's an example of a query response with the extensions field: + +```json +{ + "data": { + "getTask": { + "id": "0x3", + "title": "GraphQL docs example", + "completed": true, + "user": { + "username": "dgraphlabs", + "name": "Dgraph Labs" + } + } + }, + "extensions": { + "touched_uids": 9, + "tracing": { + "version": 1, + "startTime": "2020-07-29T05:54:27.784837196Z", + "endTime": "2020-07-29T05:54:27.787239465Z", + "duration": 2402299, + "execution": { + "resolvers": [ + { + "path": ["getTask"], + "parentType": "Query", + "fieldName": "getTask", + "returnType": "Task", + "startOffset": 122073, + "duration": 2255955, + "dgraph": [ + { + "label": "query", + "startOffset": 171684, + "duration": 2154290 + } + ] + } + ] + } + } + } +} +``` + +**Turn off extensions** + +To turn off extensions set the `--graphql` superflag's `extensions` option to +false (`--graphql extensions=false`) when running Dgraph Alpha. diff --git a/dgraph/reference/graphql/graphql-clients/endpoint/index.mdx b/dgraph/reference/graphql/graphql-clients/endpoint/index.mdx new file mode 100644 index 00000000..3c9b4095 --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/endpoint/index.mdx @@ -0,0 +1,45 @@ +--- +title: /graphql endpoint +description: + Get the structure for GraphQL requests and responses, how to enable + compression for them, and configuration options for extensions +--- + +When you deploy a GraphQL schema, Dgraph serves the corresponding +[spec-compliant GraphQL](https://graphql.github.io/graphql-spec/June2018/) API +at the HTTP endpoint `/graphql`. GraphQL requests can be sent via HTTP POST or +HTTP GET requests. + +### Getting your GraphQL Endpoint + + + + +- access the [Overview](https://cloud.dgraph.io/_/dashboard) panel of Dgraph + Cloud dashboard. +- the `GraphQL Endpoint` is displayed at the bottom. +- click on the link button to copy it. + + + + +`/graphql` is served by the Alpha nodes of the Dgraph cluster on the +HTTP-external-public port. Refer to +[ports usage](/graphql/graphql-clients/ports-usage). + +For a local install the graphql endpoint would be + +``` +http://localhost:8080/graphql +``` + +The URL depends on your configuration and specifically + +- the port offest defined by `--port_offset` option of the dgraph alpha command. +- the configuration of TLS for https. +- the usage of a load balancer. + + + + +### In this section diff --git a/dgraph/reference/graphql/graphql-clients/graphql-ide.mdx b/dgraph/reference/graphql/graphql-clients/graphql-ide.mdx new file mode 100644 index 00000000..ce002341 --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/graphql-ide.mdx @@ -0,0 +1,21 @@ +--- +title: GraphQL IDEs +--- + +As Dgraph serves a +[spec-compliant GraphQL](https://graphql.github.io/graphql-spec/June2018/) API, +you can use your favorite GraphQL IDE. + +- Postman +- Insomnia +- GraphiQL +- VSCode with GraphQL extensions + +### General IDE setup + +- Copy Dgraph GraphQL endpoint. +- Set the security header as required. +- use IDE instrospection capability. + +You are ready to write GraphQL queries and mutation and to run them against +Dgraph cluster. diff --git a/dgraph/reference/graphql/graphql-clients/graphql-ui.mdx b/dgraph/reference/graphql/graphql-clients/graphql-ui.mdx new file mode 100644 index 00000000..1ea59173 --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/graphql-ui.mdx @@ -0,0 +1,18 @@ +--- +title: Client libraries +--- + +When building an application in React, Vue, Svelte or any of you favorite +framework, using a GraphQL client library is a must. + +As Dgraph serves a +[spec-compliant GraphQL](https://graphql.github.io/graphql-spec/June2018/) API +from your schema, supports instropection and GraphQL subscriptions, the +integration with GraphQL UI client libraries is seamless. + +Here is a not limited list of popular GraphQL UI clients that you can use with +Dgraph to build applications: + +- [graphql-request](https://github.com/jasonkuhrt/graphql-request) +- [URQL](https://github.com/urql-graphql/urql) +- [Apollo client](https://github.com/apollographql/apollo-client) diff --git a/dgraph/reference/graphql/graphql-clients/index.mdx b/dgraph/reference/graphql/graphql-clients/index.mdx new file mode 100644 index 00000000..fce4f042 --- /dev/null +++ b/dgraph/reference/graphql/graphql-clients/index.mdx @@ -0,0 +1,5 @@ +--- +title: GraphQL Client +--- + +### In this section diff --git a/dgraph/reference/graphql/index.mdx b/dgraph/reference/graphql/index.mdx new file mode 100644 index 00000000..fa5a90b1 --- /dev/null +++ b/dgraph/reference/graphql/index.mdx @@ -0,0 +1,29 @@ +--- +title: GraphQL API +description: + Generate a GraphQL API and a graph backend from a single GraphQL schema. +--- + +Dgraph lets you generate a GraphQL API and a graph backend from a single +[GraphQL schema](./schema/_index.md), no resolvers or custom queries are needed. +Dgraph automatically generates the GraphQL operations for +[queries](./queries/_index.md) and [mutations](/mutations/_index.md) + +GraphQL developers can [get started](./quick-start/index.md) in minutes, and +need not concern themselves with the powerful graph database running in the +background. + +Dgraph extends the [GraphQL specifications](https://spec.graphql.org/) with +[directives](./schema/directives/_index.md) and allows you to customize the +behavior of GraphQL operations using [custom resolvers](./custom-overview.md) or +to write you own resolver logic with [Lambda resolvers](./lambda-overview.md). + +Dgraph also supports + +- [GraphQL subscriptions](./subscriptions/index.md) with the `@withSubscription` + directive: a client application can execute a subscription query and receive + real-time updates when the subscription query result is updated. +- [Apollo federation](./federation/index) : you can create a gateway GraphQL + service that includes the Dgraph GraphQL API and other GraphQL services. + +Refer to the following pages for more details: diff --git a/dgraph/reference/graphql/lambda/field.mdx b/dgraph/reference/graphql/lambda/field.mdx new file mode 100644 index 00000000..234b18fc --- /dev/null +++ b/dgraph/reference/graphql/lambda/field.mdx @@ -0,0 +1,221 @@ +--- +title: Lambda Fields +description: + Start with lambda resolvers by defining it in your GraphQL schema. Then define + your JavaScript mutation function and add it as a resolver in your JS source + code. +--- + +### Schema + +To set up a lambda function, first you need to define it on your GraphQL schema +by using the `@lambda` directive. + +For example, to define a lambda function for the `rank` and `bio` fields in +`Author`: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash, trigram]) + dob: DateTime @search + reputation: Float @search + bio: String @lambda + rank: Int @lambda + isMe: Boolean @lambda +} +``` + +You can also define `@lambda` fields on interfaces, as follows: + +```graphql +interface Character { + id: ID! + name: String! @search(by: [exact]) + bio: String @lambda +} + +type Human implements Character { + totalCredits: Float +} + +type Droid implements Character { + primaryFunction: String +} +``` + +### Resolvers + +After the schema is ready, you can define your JavaScript mutation function and +add it as a resolver in your JS source code. To add the resolver you can use +either the `addGraphQLResolvers` or `addMultiParentGraphQLResolvers` methods. + + + A Lambda Field resolver can use a combination of `parents`, `parent`, `dql`, + or `graphql` inside the function. + + + + This example uses `parent` for the resolver function. You can find additional + resolver examples using `dql` in the [Lambda queries article](./query), and + using `graphql` in the [Lambda mutations article](./mutation). + + +For example, to define JavaScript lambda functions for... + +- `Author`, +- `Character`, +- `Human`, and +- `Droid` + +...and add them as resolvers, do the following: + +```javascript +const authorBio = ({ parent: { name, dob } }) => + `My name is ${name} and I was born on ${dob}.` +const characterBio = ({ parent: { name } }) => `My name is ${name}.` +const humanBio = ({ parent: { name, totalCredits } }) => + `My name is ${name}. I have ${totalCredits} credits.` +const droidBio = ({ parent: { name, primaryFunction } }) => + `My name is ${name}. My primary function is ${primaryFunction}.` + +self.addGraphQLResolvers({ + "Author.bio": authorBio, + "Character.bio": characterBio, + "Human.bio": humanBio, + "Droid.bio": droidBio, +}) +``` + +For example, you can add a resolver for `rank` using a `graphql` call, as +follows: + +```javascript +async function rank({ parents }) { + const idRepList = parents.map(function (parent) { + return { id: parent.id, rep: parent.reputation } + }) + const idRepMap = {} + idRepList + .sort((a, b) => (a.rep > b.rep ? -1 : 1)) + .forEach((a, i) => (idRepMap[a.id] = i + 1)) + return parents.map((p) => idRepMap[p.id]) +} + +self.addMultiParentGraphQLResolvers({ + "Author.rank": rank, +}) +``` + +The following example demonstrates using the client-provided JWT to return +`true` if the custom claim for `USER` from the JWT matches the `id` of the +`Author`. + +```javascript +async function isMe({ parent, authHeader }) { + if (!authHeader) return false + if (!authHeader.value) return false + const headerValue = authHeader.value + if (headerValue === "") return false + const base64Url = headerValue.split(".")[1] + const base = base64Url.replace(/-/g, "+").replace(/_/g, "/") + const allClaims = JSON.parse(atob(base64)) + if (!allClaims["https://my.app.io/jwt/claims"]) return false + const customClaims = allClaims["https://my.app.io/jwt/claims"] + return customClaims.USER === parent.id +} + +self.addGraphQLResolvers({ + "Author.isMe": isMe, +}) +``` + +### Example + +For example, if you execute the following GraphQL query: + +```graphql +query { + queryAuthor { + name + bio + rank + isMe + } +} +``` + +...you should see a response such as the following: + +```json +{ + "queryAuthor": [ + { + "name": "Ann Author", + "bio": "My name is Ann Author and I was born on 2000-01-01T00:00:00Z.", + "rank": 3, + "isMe": false + } + ] +} +``` + +In the same way, if you execute the following GraphQL query on the `Character` +interface: + +```graphql +query { + queryCharacter { + name + bio + } +} +``` + +...you should see a response such as the following: + +```json +{ + "queryCharacter": [ + { + "name": "Han", + "bio": "My name is Han." + }, + { + "name": "R2-D2", + "bio": "My name is R2-D2." + } + ] +} +``` + + + The `Human` and `Droid` types will inherit the `bio` lambda field from the + `Character` interface. + + +For example, if you execute a `queryHuman` query with a selection set containing +`bio`, then the lambda function registered for `Human.bio` is executed, as +follows: + +```graphql +query { + queryHuman { + name + bio + } +} +``` + +This query generates the following response: + +```json +{ + "queryHuman": [ + { + "name": "Han", + "bio": "My name is Han. I have 10 credits." + } + ] +} +``` diff --git a/dgraph/reference/graphql/lambda/index.mdx b/dgraph/reference/graphql/lambda/index.mdx new file mode 100644 index 00000000..d28af4a7 --- /dev/null +++ b/dgraph/reference/graphql/lambda/index.mdx @@ -0,0 +1,3 @@ +--- +title: Lambda Resolvers +--- diff --git a/dgraph/reference/graphql/lambda/lambda-overview.mdx b/dgraph/reference/graphql/lambda/lambda-overview.mdx new file mode 100644 index 00000000..d0c674f8 --- /dev/null +++ b/dgraph/reference/graphql/lambda/lambda-overview.mdx @@ -0,0 +1,350 @@ +--- +title: Dgraph Lambda Overview +description: + Lambda provides a way to write custom logic in JavaScript, integrate it with + your GraphQL schema, and execute it using the GraphQL API in a few easy steps. +--- + +Lambda provides a way to write your custom logic in JavaScript, integrate it +with your GraphQL schema, and execute it using the GraphQL API in a few easy +steps: + +1. Set up a Dgraph cluster with a working lambda server (not required for + [Dgraph Cloud](https://dgraph.io/cloud) users) +2. Declare lambda queries, mutations, and fields in your GraphQL schema as + needed +3. Define lambda resolvers for them in a JavaScript file + +This also simplifies the job of developers, as they can build a complex backend +that is rich with business logic, without setting up multiple different +services. Also, you can build your backend in JavaScript, which means you can +build both your frontend and backend using the same language. + +Dgraph doesn't execute your custom logic itself. It makes external HTTP requests +to a user-defined lambda server. [Dgraph Cloud](https://dgraph.io/cloud) will do +all of this for you. + + + If you want to deploy your own lambda server, you can find the implementation + of Dgraph Lambda in our [open-source + repository](https://github.com/dgraph-io/dgraph-lambda). Please refer to the + documentation on [setting up a lambda server](/graphql/lambda/server) for more + details. + + + + If you're using [Dgraph Cloud](https://dgraph.io/cloud), the final compiled + script file must be under 500Kb + + +## Declaring lambda in a GraphQL schema + +There are three places where you can use the `@lambda` directive and thus tell +Dgraph where to apply custom JavaScript logic. + +- You can add lambda fields to your types and interfaces, as follows: + +```graphql +type MyType { + ... + customField: String @lambda +} +``` + +- You can add lambda queries to the Query type, as follows: + +```graphql +type Query { + myCustomQuery(...): QueryResultType @lambda +} +``` + +- You can add lambda mutations to the Mutation type, as follows: + +```graphql +type Mutation { + myCustomMutation(...): MutationResult @lambda +} +``` + +## Defining lambda resolvers in JavaScript + +A lambda resolver is a user-defined JavaScript function that performs custom +actions over the GraphQL types, interfaces, queries, and mutations. There are +two methods to register JavaScript resolvers: + +- `self.addGraphQLResolvers` +- `self.addMultiParentGraphQLResolvers` + + + Functions `self.addGraphQLResolvers` and `self.addMultiParentGraphQLResolvers` + can be called multiple times in your resolver code. + + +### addGraphQLResolvers + +The `self.addGraphQLResolvers` method takes an object as an argument, which maps +a resolver name to the resolver function that implements it. The resolver +functions registered using `self.addGraphQLResolvers` receive +`{ parent, args, graphql, dql }` as argument: + +- `parent`, the parent object for which to resolve the current lambda field + registered using `addGraphQLResolver`. The `parent` receives all immediate + fields of that object, whether or not they were actually queried. Available + only for types and interfaces (`null` for queries and mutations) +- `args`, the set of arguments for lambda queries and mutations +- `graphql`, a function to execute auto-generated GraphQL API calls from the + lambda server. The user's auth header is passed back to the `graphql` + function, so this can be used securely +- `dql`, provides an API to execute DQL from the lambda server +- `authHeader`, provides the JWT key and value of the auth header passed from + the client + +The `addGraphQLResolvers` can be represented with the following TypeScript +types: + +```TypeScript +type GraphQLResponse { + data?: Record + errors?: { message: string }[] +} + +type AuthHeader { + key: string + value: string +} + +type GraphQLEventWithParent = { + parent: Record | null + args: Record + graphql: (query: string, vars?: Record, authHeader?: AuthHeader) => Promise + dql: { + query: (dql: string, vars?: Record) => Promise + mutate: (dql: string) => Promise + } + authHeader: AuthHeader +} + +function addGraphQLResolvers(resolvers: { + [key: string]: (e: GraphQLEventWithParent) => any; +}): void +``` + + + `self.addGraphQLResolvers` is the default choice for registering resolvers + when the result of the lambda for each parent is independent of other parents. + + +Each resolver function should return data in the exact format as the return type +of GraphQL field, query, or mutation for which it is being registered. + +In the following example, the resolver function `myTypeResolver` registered for +the `customField` field in `MyType` returns a string because the return type of +that field in the GraphQL schema is `String`: + +```javascript +const myTypeResolver = ({ parent: { customField } }) => + `My value is ${customField}.` + +self.addGraphQLResolvers({ + "MyType.customField": myTypeResolver, +}) +``` + +Another resolver example using a `graphql` call: + +```javascript +async function todoTitles({ graphql }) { + const results = await graphql("{ queryTodo { title } }") + return results.data.queryTodo.map((t) => t.title) +} + +self.addGraphQLResolvers({ + "Query.todoTitles": todoTitles, +}) +``` + +### addMultiParentGraphQLResolvers + +The `self.addMultiParentGraphQLResolvers` is useful in scenarios where you want +to perform computations involving all the parents returned from Dgraph for a +lambda field. This is useful in two scenarios: + +- When you want to perform a computation between parents +- When you want to execute a complex query, and want to optimize it by firing a + single query for all the parents + +This method takes an object as an argument, which maps a resolver name to the +resolver function that implements it. The resolver functions registered using +this method receive `{ parents, args, graphql, dql }` as argument: + +- `parents`, a list of parent objects for which to resolve the current lambda + field registered using `addMultiParentGraphQLResolvers`. Available only for + types and interfaces (`null` for queries and mutations) +- `args`, the set of arguments for lambda queries and mutations (`null` for + types and interfaces) +- `graphql`, a function to execute auto-generated GraphQL API calls from the + lambda server +- `dql`, provides an API to execute DQL from the lambda server +- `authHeader`, provides the JWT key and value of the auth header passed from + the client + +The `addMultiParentGraphQLResolvers` can be represented with the following +TypeScript types: + +```TypeScript +type GraphQLResponse { + data?: Record + errors?: { message: string }[] +} + +type AuthHeader { + key: string + value: string +} + +type GraphQLEventWithParents = { + parents: (Record)[] | null + args: Record + graphql: (query: string, vars?: Record, authHeader?: AuthHeader) => Promise + dql: { + query: (dql: string, vars?: Record) => Promise + mutate: (dql: string) => Promise + } + authHeader: AuthHeader +} + +function addMultiParentGraphQLResolvers(resolvers: { + [key: string]: (e: GraphQLEventWithParents) => any; +}): void +``` + + + This method should not be used for lambda queries or lambda mutations. + + +Each resolver function should return data as a list of the return type of +GraphQL field for which it is being registered. + +In the following example, the resolver function `rank()` registered for the +`rank` field in `Author`, returns a list of integers because the return type of +that field in the GraphQL schema is `Int`: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash, trigram]) + reputation: Float @search + rank: Int @lambda +} +``` + +```javascript +import { sortBy } from "lodash" + +/* +This function computes the rank of each author based on the reputation of the author relative to other authors. +*/ +async function rank({ parents }) { + const idRepMap = {} + sortBy(parents, "reputation").forEach( + (parent, i) => (idRepMap[parent.id] = parents.length - i), + ) + return parents.map((p) => idRepMap[p.id]) +} + +self.addMultiParentGraphQLResolvers({ + "Author.rank": rank, +}) +``` + + + Scripts containing import packages (such as the example above) require + compilation using Webpack. + + +The following example resolver uses a `dql` call: + +```javascript +async function reallyComplexDql({ parents, dql }) { + const ids = parents.map((p) => p.id) + const someComplexResults = await dql.query( + `really-complex-query-here with ${ids}`, + ) + return parents.map((parent) => someComplexResults[parent.id]) +} + +self.addMultiParentGraphQLResolvers({ + "MyType.reallyComplexProperty": reallyComplexDql, +}) +``` + +The following resolver example uses a `graphql` call and manually overrides the +`authHeader` provided by the client: + +```javascript +async function secretGraphQL({ parents, graphql }) { + const ids = parents.map((p) => p.id); + const secretResults = await graphql( + `query myQueryName ($ids: [ID!]) { + queryMyType(filter: { id: $ids }) { + id + controlledEdge { + myField + } + } + }`, + { ids }, + { + key: 'X-My-App-Auth' + value: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJodHRwczovL215LmFwcC5pby9qd3QvY2xhaW1zIjp7IlVTRVIiOiJmb28ifSwiZXhwIjoxODAwMDAwMDAwLCJzdWIiOiJ0ZXN0IiwibmFtZSI6IkpvaG4gRG9lIDIiLCJpYXQiOjE1MTYyMzkwMjJ9.wI3857KzwjtZAtOjng6MnzKVhFSqS1vt1SjxUMZF4jc' + } + ); + return parents.map((parent) => { + const secretRes = secretResults.data.find(res => res.id === parent.id) + parent.secretProperty = null + if (secretRes) { + if (secretRes.controlledEdge) { + parent.secretProperty = secretRes.controlledEdge.myField + } + } + return parent + }); +} +self.addMultiParentGraphQLResolvers({ + "MyType.secretProperty": secretGraphQL, +}); +``` + +## Example + +For example, if you execute the following lambda query: + +```graphql +query { + queryMyType { + customField + } +} +``` + +...you should see a response such as the following: + +```json +{ + "queryMyType": [ + { + "customField": "My value is Lambda Example" + } + ] +} +``` + +## Learn more + +To learn more about the `@lambda` directive, see: + +- [Lambda fields](/graphql/lambda/field) +- [Lambda queries](/graphql/lambda/query) +- [Lambda mutations](/graphql/lambda/mutation) +- [Lambda server setup](/graphql/lambda/server) diff --git a/dgraph/reference/graphql/lambda/mutation.mdx b/dgraph/reference/graphql/lambda/mutation.mdx new file mode 100644 index 00000000..4292fe50 --- /dev/null +++ b/dgraph/reference/graphql/lambda/mutation.mdx @@ -0,0 +1,114 @@ +--- +title: Lambda Mutations +description: + Ready to use lambdas for mutations? This documentation takes you through the + schemas, resolvers, and examples. +--- + +### Schema + +To set up a lambda mutation, first you need to define it on your GraphQL schema +by using the `@lambda` directive. + + + `add`, `update`, and `delete` are reserved prefixes and they can't be used to + define Lambda mutations. + + +For example, to define a lambda mutation for `Author` that creates a new author +with a default `reputation` of `3.0` given just the `name`: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash, trigram]) + dob: DateTime + reputation: Float +} + +type Mutation { + newAuthor(name: String!): ID! @lambda +} +``` + +### Resolver + +Once the schema is ready, you can define your JavaScript mutation function and +add it as resolver in your JS source code. To add the resolver you can use +either the `addGraphQLResolvers` or `addMultiParentGraphQLResolvers` methods. + + + A Lambda Mutation resolver can use a combination of `parents`, `args`, `dql`, + or `graphql` inside the function. + + + + This example uses `graphql` for the resolver function. You can find additional + resolver examples using `dql` in the [Lambda queries article](./query), and + using `parent` in the [Lambda fields article](./field). + + +For example, to define the JavaScript `newAuthor()` lambda function and add it +as resolver: + +```javascript +async function newAuthor({ args, graphql }) { + // lets give every new author a reputation of 3 by default + const results = await graphql( + ` + mutation ($name: String!) { + addAuthor(input: [{ name: $name, reputation: 3.0 }]) { + author { + id + reputation + } + } + } + `, + { name: args.name }, + ) + return results.data.addAuthor.author[0].id +} + +self.addGraphQLResolvers({ + "Mutation.newAuthor": newAuthor, +}) +``` + +Alternatively, you can use `dql.mutate` to achieve the same results: + +```javascript +async function newAuthor({ args, dql, graphql }) { + // lets give every new author a reputation of 3 by default + const res = await dql.mutate(`{ + set { + _:newAuth "${args.name}" . + _:newAuth "3.0" . + _:newAuth "Author" . + } + }`) + return res.data.uids.newAuth +} +``` + +### Example + +Finally, if you execute this lambda mutation a new author `Ken Addams` with +`reputation=3.0` should be added to the database: + +```graphql +mutation { + newAuthor(name: "Ken Addams") +} +``` + +Afterwards, if you query the GraphQL database for `Ken Addams`, you would see: + +```json +{ + "getAuthor": { + "name": "Ken Addams", + "reputation": 3.0 + } +} +``` diff --git a/dgraph/reference/graphql/lambda/query.mdx b/dgraph/reference/graphql/lambda/query.mdx new file mode 100644 index 00000000..e8673ce4 --- /dev/null +++ b/dgraph/reference/graphql/lambda/query.mdx @@ -0,0 +1,100 @@ +--- +title: Lambda Queries +description: + Get started with the @lambda directive for queries. This documentation takes + you through the schemas, resolvers, and examples. +--- + +### Schema + +To set up a lambda query, first you need to define it on your GraphQL schema by +using the `@lambda` directive. + + + `get`, `query`, and `aggregate` are reserved prefixes and they can't be used + to define Lambda queries. + + +For example, to define a lambda query for `Author` that finds out authors given +an author's `name`: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash, trigram]) + dob: DateTime + reputation: Float +} + +type Query { + authorsByName(name: String!): [Author] @lambda +} +``` + +### Resolver + +Once the schema is ready, you can define your JavaScript query function and add +it as resolver in your JS source code. To add the resolver you can use either +the `addGraphQLResolvers` or `addMultiParentGraphQLResolvers` methods. + + + A Lambda Query resolver can use a combination of `parents`, `args`, `dql`, or + `graphql` inside the function. + + + + This example uses `dql` for the resolver function. You can find additional + resolver examples using `parent` in the [Lambda fields article](./query), and + using `graphql` in the [Lambda mutations article](./mutation). + + +For example, to define the JavaScript `authorsByName()` lambda function and add +it as resolver: + +```javascript +async function authorsByName({ args, dql }) { + const results = await dql.query( + `query queryAuthor($name: string) { + queryAuthor(func: type(Author)) @filter(eq(Author.name, $name)) { + name: Author.name + dob: Author.dob + reputation: Author.reputation + } + }`, + { $name: args.name }, + ) + return results.data.queryAuthor +} + +self.addGraphQLResolvers({ + "Query.authorsByName": authorsByName, +}) +``` + +### Example + +Finally, if you execute this lambda query + +```graphql +query { + authorsByName(name: "Ann Author") { + name + dob + reputation + } +} +``` + +You should see a response such as + +```json +{ + "authorsByName": [ + { + "name": "Ann Author", + "dob": "2000-01-01T00:00:00Z", + "reputation": 6.6 + } + ] +} +``` diff --git a/dgraph/reference/graphql/lambda/webhook.mdx b/dgraph/reference/graphql/lambda/webhook.mdx new file mode 100644 index 00000000..00c33cf6 --- /dev/null +++ b/dgraph/reference/graphql/lambda/webhook.mdx @@ -0,0 +1,110 @@ +--- +title: Lambda Webhooks +description: + Ready to use lambdas for webhooks? This documentation takes you through the + schemas, resolvers, and examples. +--- + +### Schema + +To set up a lambda webhook, you need to define it in your GraphQL schema by +using the `@lambdaOnMutate` directive along with the mutation events +(`add`/`update`/`delete`) you want to listen on. + + + Lambda webhooks only listen for events from the root mutation. You can create + a schema that is capable of creating deeply nested objects, but only the + parent level webhooks will be evoked for the mutation. + + +For example, to define a lambda webhook for all mutation events +(`add`/`update`/`delete`) on any `Author` object: + +```graphql +type Author @lambdaOnMutate(add: true, update: true, delete: true) { + id: ID! + name: String! @search(by: [hash, trigram]) + dob: DateTime + reputation: Float +} +``` + +### Resolver + +Once the schema is ready, you can define your JavaScript functions and add those +as resolvers in your JS source code. To add the resolvers you should use the +`addWebHookResolvers`method. + + + A Lambda Webhook resolver can use a combination of `event`, `dql`, `graphql` + or `authHeader` inside the function. + + +#### Event object + +You also have access to the `event` object within the resolver. Depending on the +value of `operation` field, only one of the fields (`add`/`update`/`delete`) +will be part of the `event` object. The definition of `event` is as follows: + +``` +"event": { + "__typename": "", + "operation": "", + "commitTs": + "add": { + "rootUIDs": [], + "input": [] + }, + "update": { + "rootUIDs": [], + "setPatch": , + "removePatch": + }, + "delete": { + "rootUIDs": [] + } +``` + +#### Resolver examples + +For example, to define JavaScript lambda functions for each mutation event for +which `@lambdaOnMutate` is enabled and add those as resolvers: + +```javascript +async function addAuthorWebhook({ event, dql, graphql, authHeader }) { + // execute what you want on addition of an author + // maybe send a welcome mail to the author +} + +async function updateAuthorWebhook({ event, dql, graphql, authHeader }) { + // execute what you want on update of an author + // maybe send a mail to the author informing that few details have been updated +} + +async function deleteAuthorWebhook({ event, dql, graphql, authHeader }) { + // execute what you want on deletion of an author + // maybe mail the author saying they have been removed from the platform +} + +self.addWebHookResolvers({ + "Author.add": addAuthorWebhook, + "Author.update": updateAuthorWebhook, + "Author.delete": deleteAuthorWebhook, +}) +``` + +### Example + +Finally, if you execute an `addAuthor` mutation, the `add` operation mapped to +the `addAuthorWebhook` resolver will be triggered: + +```graphql +mutation { + addAuthor(input: [{ name: "Ken Addams" }]) { + author { + id + name + } + } +} +``` diff --git a/dgraph/reference/graphql/mutations/add.mdx b/dgraph/reference/graphql/mutations/add.mdx new file mode 100644 index 00000000..64a53f88 --- /dev/null +++ b/dgraph/reference/graphql/mutations/add.mdx @@ -0,0 +1,91 @@ +--- +title: Add Mutations +description: + Add mutations allows you to add new objects of a particular type. Dgraph + automatically generates input and return types in the schema for the add + mutation +--- + +Add mutations allow you to add new objects of a particular type. + +We use the following schema to demonstrate some examples. + +**Schema**: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash]) + dob: DateTime + posts: [Post] +} + +type Post { + postID: ID! + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + datePublished: DateTime +} +``` + +Dgraph automatically generates input and return types in the schema for the +`add` mutation, as shown below: + +```graphql +addPost(input: [AddPostInput!]!): AddPostPayload + +input AddPostInput { + title: String! + text: String + datePublished: DateTime +} + +type AddPostPayload { + post(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] + numUids: Int +} +``` + +**Example**: Add mutation on single type with embedded value + +```graphql +mutation { + addAuthor(input: [{ name: "A.N. Author", posts: [] }]) { + author { + id + name + } + } +} +``` + +**Example**: Add mutation on single type using variables + +```graphql +mutation addAuthor($author: [AddAuthorInput!]!) { + addAuthor(input: $author) { + author { + id + name + } + } +} +``` + +Variables: + +```json +{ "author": { "name": "A.N. Author", "dob": "2000-01-01", "posts": [] } } +``` + + + You can convert an `add` mutation to an `upsert` mutation by setting the value + of the input variable `upsert` to `true`. For more information, see [Upsert + Mutations](/graphql/mutations/upsert). + + +## Examples + +You can refer to the following +[link](https://github.com/dgraph-io/dgraph/blob/main/graphql/resolve/add_mutation_test.yaml) +for more examples. diff --git a/dgraph/reference/graphql/mutations/deep.mdx b/dgraph/reference/graphql/mutations/deep.mdx new file mode 100644 index 00000000..6beed20b --- /dev/null +++ b/dgraph/reference/graphql/mutations/deep.mdx @@ -0,0 +1,113 @@ +--- +title: Deep Mutations +description: + You can perform deep mutations at multiple levels. Deep mutations do not alter + linked objects, but they can add deeply-nested new objects or link to existing + objects +--- + +You can perform deep mutations at multiple levels. Deep mutations do not alter +linked objects, but they can add deeply-nested new objects or link to existing +objects. To update an existing nested object, use the update mutation for its +type. + +We use the following schema to demonstrate some examples. + +## **Schema**: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash]) + dob: DateTime + posts: [Post] +} + +type Post { + postID: ID! + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + datePublished: DateTime +} +``` + +### **Example**: Adding deeply nested post with new author mutation using variables + +```graphql +mutation addAuthorWithPost($author: addAuthorInput!) { + addAuthor(input: [$author]) { + author { + id + name + posts { + title + text + } + } + } +} +``` + +Variables: + +```json +{ + "author": { + "name": "A.N. Author", + "dob": "2000-01-01", + "posts": [ + { + "title": "New post", + "text": "A really new post" + } + ] + } +} +``` + +### **Example**: Update mutation on deeply nested post and link to an existing author using variables + +The following example assumes that the post with the postID of `0x456` already +exists, and is not currently nested under the author having the id of `0x123`. + + + This syntax does not remove any other existing posts, it just adds the + existing post to any that may already be nested. + + +```graphql +mutation updateAuthorWithExistingPost($patch: UpdateAuthorInput!) { + updateAuthor(input: $patch) { + author { + id + posts { + title + text + } + } + } +} +``` + +Variables: + +```json +{ + "patch": { + "filter": { + "id": ["0x123"] + }, + "set": { + "posts": [ + { + "postID": "0x456" + } + ] + } + } +} +``` + +The example query above can't modify the existing post's title or text. To +modify the post's title or text, use the `updatePost` mutation either alongside +the mutation above, or as a separate transaction. diff --git a/dgraph/reference/graphql/mutations/delete.mdx b/dgraph/reference/graphql/mutations/delete.mdx new file mode 100644 index 00000000..92d6e353 --- /dev/null +++ b/dgraph/reference/graphql/mutations/delete.mdx @@ -0,0 +1,65 @@ +--- +title: Delete Mutations +--- + +Delete Mutations allow you to delete objects of a particular type. + +We use the following schema to demonstrate some examples. + +**Schema**: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash]) + dob: DateTime + posts: [Post] +} + +type Post { + postID: ID! + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + datePublished: DateTime +} +``` + +Dgraph automatically generates input and return types in the schema for the +`delete` mutation. Delete mutations take `filter` as an input to select specific +objects and returns the state of the objects before deletion. + +```graphql +deleteAuthor(filter: AuthorFilter!): DeleteAuthorPayload + +type DeleteAuthorPayload { + author(filter: AuthorFilter, order: AuthorOrder, first: Int, offset: Int): [Author] + msg: String + numUids: Int +} +``` + +**Example**: Delete mutation using variables + +```graphql +mutation deleteAuthor($filter: AuthorFilter!) { + deleteAuthor(filter: $filter) { + msg + author { + name + dob + } + } +} +``` + +Variables: + +```json +{ "filter": { "name": { "eq": "A.N. Author" } } } +``` + +## Examples + +You can refer to the following +[link](https://github.com/dgraph-io/dgraph/blob/main/graphql/resolve/delete_mutation_test.yaml) +for more examples. diff --git a/dgraph/reference/graphql/mutations/index.mdx b/dgraph/reference/graphql/mutations/index.mdx new file mode 100644 index 00000000..31561579 --- /dev/null +++ b/dgraph/reference/graphql/mutations/index.mdx @@ -0,0 +1,3 @@ +--- +title: Mutations +--- diff --git a/dgraph/reference/graphql/mutations/mutations-overview.mdx b/dgraph/reference/graphql/mutations/mutations-overview.mdx new file mode 100644 index 00000000..11f0cea5 --- /dev/null +++ b/dgraph/reference/graphql/mutations/mutations-overview.mdx @@ -0,0 +1,302 @@ +--- +title: Mutations Overview +description: + Mutations can be used to insert, update, or delete data. Dgraph automatically + generates GraphQL mutation for each type that you define in your schema +--- + +Mutations allow you to modify server-side data, and it also returns an object +based on the operation performed. It can be used to insert, update, or delete +data. Dgraph automatically generates GraphQL mutations for each type that you +define in your schema. The mutation field returns an object type that allows you +to query for nested fields. This can be useful for fetching an object's new +state after an add/update, or to get the old state of an object before a delete. + +**Example** + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash]) + dob: DateTime + posts: [Post] +} + +type Post { + postID: ID! + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + datePublished: DateTime +} +``` + +The following mutations would be generated from the above schema. + +```graphql +type Mutation { + addAuthor(input: [AddAuthorInput!]!): AddAuthorPayload + updateAuthor(input: UpdateAuthorInput!): UpdateAuthorPayload + deleteAuthor(filter: AuthorFilter!): DeleteAuthorPayload + addPost(input: [AddPostInput!]!): AddPostPayload + updatePost(input: UpdatePostInput!): UpdatePostPayload + deletePost(filter: PostFilter!): DeletePostPayload +} + +type AddAuthorPayload { + author( + filter: AuthorFilter + order: AuthorOrder + first: Int + offset: Int + ): [Author] + numUids: Int +} + +type AddPostPayload { + post(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] + numUids: Int +} + +type DeleteAuthorPayload { + author( + filter: AuthorFilter + order: AuthorOrder + first: Int + offset: Int + ): [Author] + msg: String + numUids: Int +} + +type DeletePostPayload { + post(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] + msg: String + numUids: Int +} + +type UpdateAuthorPayload { + author( + filter: AuthorFilter + order: AuthorOrder + first: Int + offset: Int + ): [Author] + numUids: Int +} + +type UpdatePostPayload { + post(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] + numUids: Int +} +``` + +## Input objects + +Mutations require input data, such as the data, to create a new object or an +object's ID to delete. Dgraph auto-generates the input object type for every +type in the schema. + +```graphql +input AddAuthorInput { + name: String! + dob: DateTime + posts: [PostRef] +} + +mutation { + addAuthor( + input: { + name: "A.N. Author", + lastName: "2000-01-01", + } + ) + { + ... + } +} +``` + +## Return fields + +Each mutation provides a set of fields that can be returned in the response. +Dgraph auto-generates the return payload object type for every type in the +schema. + +```graphql +type AddAuthorPayload { + author( + filter: AuthorFilter + order: AuthorOrder + first: Int + offset: Int + ): [Author] + numUids: Int +} +``` + +## Multiple fields in mutations + +A mutation can contain multiple fields, just like a query. While query fields +are executed in parallel, mutation fields run in series, one after the other. +This means that if we send two `updateAuthor` mutations in one request, the +first is guaranteed to finish before the second begins. This ensures that we +don't end up with a race condition with ourselves. If one of the mutations is +aborted due error like transaction conflict, we continue performing the next +mutations. + +**Example**: Mutation on multiple types + +```graphql +mutation ($post: AddPostInput!, $author: AddAuthorInput!) { + addAuthor(input: [$author]) { + author { + name + } + } + addPost(input: [$post]) { + post { + postID + title + text + } + } +} +``` + +Variables: + +```json +{ + "author": { + "name": "A.N. Author", + "dob": "2000-01-01", + "posts": [] + }, + "post": { + "title": "Exciting post", + "text": "A really good post", + "author": { + "name": "A.N. Author" + } + } +} +``` + +## Union mutations + +Mutations can be used to add a node to a `union` field in a type. + +For the following schema, + +```graphql +enum Category { + Fish + Amphibian + Reptile + Bird + Mammal + InVertebrate +} + +interface Animal { + id: ID! + category: Category @search +} + +type Dog implements Animal { + breed: String @search +} + +type Parrot implements Animal { + repeatsWords: [String] +} + +type Human { + name: String! + pets: [Animal!]! +} + +union HomeMember = Dog | Parrot | Human + +type Home { + id: ID! + address: String + members: [HomeMember] +} +``` + +This is the mutation for adding `members` to the `Home` type: + +```graphql +mutation { + addHome(input: [ + { + "address": "United Street", + "members": [ + { "dogRef": { "category": Mammal, "breed": "German Shepherd"} }, + { "parrotRef": { "category": Bird, "repeatsWords": ["squawk"]} }, + { "humanRef": { "name": "Han Solo"} } + ] + } + ]) { + home { + address + members { + ... on Dog { + breed + } + ... on Parrot { + repeatsWords + } + ... on Human { + name + } + } + } + } +} +``` + +## Vector Embedding mutations + +For types with vector embeddings Dgraph automatically generates the add +mutation. For this example of add mutation we use the following schema. + +```graphql +type User { + userID: ID! + name: String! + name_v: [Float!] + @embedding + @search(by: ["hnsw(metric: euclidean, exponent: 4)"]) +} + +mutation { + addUser( + input: [ + { + name: "iCreate with a Mini iPad" + name_v: [0.12, 0.53, 0.9, 0.11, 0.32] + } + { name: "Resistive Touchscreen", name_v: [0.72, 0.89, 0.54, 0.15, 0.26] } + { name: "Fitness Band", name_v: [0.56, 0.91, 0.93, 0.71, 0.24] } + { name: "Smart Ring", name_v: [0.38, 0.62, 0.99, 0.44, 0.25] } + ] + ) { + project { + id + name + name_v + } + } +} +``` + +Note: The embeddings are generated outside of Dgraph using any suitable machine +learning model. + +## Examples + +You can refer to the following +[link](https://github.com/dgraph-io/dgraph/tree/main/graphql/schema/testdata/schemagen) +for more examples. diff --git a/dgraph/reference/graphql/mutations/update.mdx b/dgraph/reference/graphql/mutations/update.mdx new file mode 100644 index 00000000..426d16fb --- /dev/null +++ b/dgraph/reference/graphql/mutations/update.mdx @@ -0,0 +1,124 @@ +--- +title: Update Mutations +description: + Update mutations let you to update existing objects of a particular type. With + update mutations, you can filter nodes and set or remove any field belonging + to a type. +--- + +Update mutations let you update existing objects of a particular type. With +update mutations, you can filter nodes and set or remove any field belonging to +a type. + +We use the following schema to demonstrate some examples. + +**Schema**: + +```graphql +type Author { + id: ID! + name: String! @search(by: [hash]) + dob: DateTime + posts: [Post] +} + +type Post { + postID: ID! + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + datePublished: DateTime +} +``` + +Dgraph automatically generates input and return types in the schema for the +`update` mutation. Update mutations take `filter` as an input to select specific +objects. You can specify `set` and `remove` operations on fields belonging to +the filtered objects. It returns the state of the objects after updating. + + + Executing an empty `remove {}` or an empty `set{}` doesn't have any effect on + the update mutation. + + +```graphql +updatePost(input: UpdatePostInput!): UpdatePostPayload + +input UpdatePostInput { + filter: PostFilter! + set: PostPatch + remove: PostPatch +} + +type UpdatePostPayload { + post(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] + numUids: Int +} +``` + +### Set + +For example, an update `set` mutation using variables: + +```graphql +mutation updatePost($patch: UpdatePostInput!) { + updatePost(input: $patch) { + post { + postID + title + text + } + } +} +``` + +Variables: + +```json +{ + "patch": { + "filter": { + "postID": ["0x123", "0x124"] + }, + "set": { + "text": "updated text" + } + } +} +``` + +### Remove + +For example an update `remove` mutation using variables: + +```graphql +mutation updatePost($patch: UpdatePostInput!) { + updatePost(input: $patch) { + post { + postID + title + text + } + } +} +``` + +Variables: + +```json +{ + "patch": { + "filter": { + "postID": ["0x123", "0x124"] + }, + "remove": { + "text": "delete this text" + } + } +} +``` + +### Examples + +You can refer to the following +[link](https://github.com/dgraph-io/dgraph/blob/main/graphql/resolve/update_mutation_test.yaml) +for more examples. diff --git a/dgraph/reference/graphql/mutations/upsert.mdx b/dgraph/reference/graphql/mutations/upsert.mdx new file mode 100644 index 00000000..ff8198a8 --- /dev/null +++ b/dgraph/reference/graphql/mutations/upsert.mdx @@ -0,0 +1,129 @@ +--- +title: Upsert Mutations +description: + Upsert mutations allow you to perform `add` or `update` operations based on + whether a particular ID exists in the database +--- + +Upsert mutations allow you to perform `add` or `update` operations based on +whether a particular `ID` exists in the database. The IDs must be external IDs, +defined using the `@id` directive in the schema. + +For example, to demonstrate how upserts work in GraphQL, take the following +schema: + +**Schema** + +```graphql +type Author { + id: String! @id + name: String! @search(by: [hash]) + posts: [Post] @hasInverse(field: author) +} + +type Post { + postID: String! @id + title: String! @search(by: [term, fulltext]) + text: String @search(by: [fulltext, term]) + author: Author! +} +``` + +Dgraph automatically generates input and return types in the schema for the +`add` mutation, as shown below: + +```graphql +addPost(input: [AddPostInput!]!, upsert: Boolean): AddPostPayload + +input AddPostInput { + postID: String! + title: String! + text: String + author: AuthorRef! +} +``` + +Suppose you want to update the `text` field of a post with the ID `mm2`. But you +also want to create a new post with that ID in case it doesn't already exist. To +do this, you use the `addPost` mutation, but with an additional input variable +`upsert`. + +This is a `Boolean` variable. Setting it to `true` will result in an upsert +operation. + +It will perform an `update` mutation and carry out the changes you specify in +your request if the particular ID exists. Otherwise, it will fall back to a +default `add` operation and create a new `Post` with that ID and the details you +provide. + +Setting `upsert` to `false` is the same as using a plain `add` operation—it'll +either fail or succeed, depending on whether the ID exists or not. + +**Example**: add mutation with `upsert` true: + +```graphql +mutation ($post: [AddPostInput!]!) { + addPost(input: $post, upsert: true) { + post { + postID + title + text + author { + id + } + } + } +} +``` + +With variables: + +```json +{ + "post": { + "postID": "mm2", + "title": "Second Post", + "text": "This is my second post, and updated with some new information.", + "author": { + "id": "micky" + } + } +} +``` + +If a post with the ID `mm2` exists, it will update the post with the new +details. Otherwise, it'll create a new `Post` with that ID and the values you +provided. In either case, you'll get the following response back: + +```graphql +"data": { + "addPost": { + "post": [ + { + "postID": "mm2", + "title": "Second Post", + "text": "This is my second post, and updated with some new information.", + "author": { + "id": "micky" + } + } + ] + } + } +``` + + + The default value of `upsert` will be `false`, for backward compatibility. + + + + The current behavior of `Add` and `Update` mutations is such that they do not + update deep level nodes. So Add mutations with `upsert` set to `true` will + only update values at the root level. + + +## Examples + +You can refer to the following +[link](https://github.com/dgraph-io/dgraph/blob/main/graphql/resolve/add_mutation_test.yaml) +for more examples. diff --git a/dgraph/reference/graphql/queries/aggregate.mdx b/dgraph/reference/graphql/queries/aggregate.mdx new file mode 100644 index 00000000..e3dce9e1 --- /dev/null +++ b/dgraph/reference/graphql/queries/aggregate.mdx @@ -0,0 +1,191 @@ +--- +title: Aggregate Queries +description: + Dgraph automatically generates aggregate queries for GraphQL schemas. These + are compatible with the @auth directive +--- + +Dgraph automatically generates aggregate queries for GraphQL schemas. Aggregate +queries fetch aggregate data, including the following: + +- _Count queries_ that let you count fields satisfying certain criteria + specified using a filter. +- _Advanced aggregate queries_ that let you calculate the maximum, minimum, sum + and average of specified fields. + +Aggregate queries are compatible with the `@auth` directive and follow the same +authorization rules as the `query` keyword. You can also use filters with +aggregate queries, as shown in some of the examples provided below. + +## Count queries at root + +For every `type` defined in a GraphQL schema, Dgraph generates an aggregate +query `aggregate`. This query includes a `count` field, as well as +[advanced aggregate query fields](#advanced-aggregate-queries-at-root). + +### Examples + +Example: Fetch the total number of `posts`. + +```graphql +query { + aggregatePost { + count + } +} +``` + +Example: Fetch the number of `posts` whose titles contain `GraphQL`. + +```graphql +query { + aggregatePost(filter: { title: { anyofterms: "GraphQL" } }) { + count + } +} +``` + +## Count queries for child nodes + +Dgraph also defines `Aggregate` fields for every field which is of +type `List[Type/Interface]` inside `query` queries, allowing you to +do a `count` on fields, or to use the +[advanced aggregate queries](#advanced-aggregate-queries-for-child-nodes). + +### Examples + +Example: Fetch the number of `posts` for all authors along with their `name`. + +```graphql +query { + queryAuthor { + name + postsAggregate { + count + } + } +} +``` + +Example: Fetch the number of `posts` with a `score` greater than `10` for all +authors, along with their `name` + +```graphql +query { + queryAuthor { + name + postsAggregate(filter: { score: { gt: 10 } }) { + count + } + } +} +``` + +## Advanced aggregate queries at root + +For every `type` defined in the GraphQL schema, Dgraph generates an aggregate +query `aggregate` that includes advanced aggregate query fields, and +also includes a `count` field (see +[Count queries at root](#count-queries-at-root)). Dgraph generates one or more +advanced aggregate query fields (`Min`, `Max`, +`Sum` and `Avg`) for fields in the schema that are typed +as `Int`, `Float`, `String` and `Datetime`. + + Advanced aggregate query fields are generated according to +a field's type. Fields typed as `Int` and `Float` get the following query +fields: `Max`, `Min`, `Sum` and +`Avg`. Fields typed as `String` and `Datetime` only get the +`Max`, `Min` query fields. + +### Examples + +Example: Fetch the average number of `posts` written by authors: + +```graphql +query { + aggregateAuthor { + numPostsAvg + } +} +``` + +Example: Fetch the total number of `posts` by all authors, and the maximum +number of `posts` by any single `Author`: + +```graphql +query { + aggregateAuthor { + numPostsSum + numPostsMax + } +} +``` + +Example: Fetch the average number of `posts` for authors with more than 20 +`friends`: + +```graphql +query { + aggregateAuthor(filter: { friends: { gt: 20 } }) { + numPostsAvg + } +} +``` + +## Advanced aggregate queries for child nodes + +Dgraph also defines aggregate `Aggregate` fields for child nodes +within `query` queries. This is done for each field that is of type +`List[Type/Interface]` inside `query` queries, letting you fetch +minimums, maximums, averages and sums for those fields. + + Aggregate query fields are generated according to a +field's type. Fields typed as `Int` and `Float` get the following query +fields:`Max`, `Min`, `Sum` and +`Avg`. Fields typed as `String` and `Datetime` only get the +`Max`, `Min` query fields. + +### Examples + +Example: Fetch the minimum, maximum and average `score` of the `posts` for each +`Author`, along with each author's `name`. + +```graphql +query { + queryAuthor { + name + postsAggregate { + scoreMin + scoreMax + scoreAvg + } + } +} +``` + +Example: Fetch the date of the most recent post with a `score` greater than `10` +for all authors, along with the author's `name`. + +```graphql +query { + queryAuthor { + name + postsAggregate(filter: { score: { gt: 10 } }) { + datePublishedMax + } + } +} +``` + +## Aggregate queries on null data + +Aggregate queries against empty data return `null`. This is true for both the +`Aggregate` fields and `aggregate` queries generated by +Dgraph. + +So, in the examples above, the following is true: + +- If there are no nodes of type `Author`, the `aggregateAuthor` query will + return null. +- If an `Author` has not written any posts, the field `postsAggregate` will be + null for that `Author`. diff --git a/dgraph/reference/graphql/queries/and-or-not.mdx b/dgraph/reference/graphql/queries/and-or-not.mdx new file mode 100644 index 00000000..caf4555b --- /dev/null +++ b/dgraph/reference/graphql/queries/and-or-not.mdx @@ -0,0 +1,102 @@ +--- +title: And, Or, and Not Operators in GraphQL +description: Every GraphQL search filter can use AND, OR, and NOT operators. +--- + +Every GraphQL search filter can use `and`, `or`, and `not` operators. + +GraphQL syntax uses infix notation, so: "a and b" is `a, and: { b }`, "a or b or +c" is `a, or: { b, or: c }`, and "not" is a prefix (`not:`). + +The following example queries demonstrate the use of `and`, `or`, and `not` +operators: + +Example: _"Posts that do not have "GraphQL" in the title"_ + +```graphql +queryPost(filter: { not: { title: { allofterms: "GraphQL"} } } ) { ... } +``` + +Example: _"Posts that have "GraphQL" or "Dgraph" in the title"_ + +```graphql +queryPost(filter: { + title: { allofterms: "GraphQL"}, + or: { title: { allofterms: "Dgraph" } } +} ) { ... } +``` + +Example: _"Posts that have "GraphQL" and "Dgraph" in the title"_ + +```graphql +queryPost(filter: { + title: { allofterms: "GraphQL"}, + and: { title: { allofterms: "Dgraph" } } +} ) { ... } +``` + +The `and` operator is implicit for a single filter object, if the fields don't +overlap. For example, above the `and` is required because `title` is in both +filters; whereas below, `and` is not required. + +```graphql +queryPost(filter: { + title: { allofterms: "GraphQL" }, + datePublished: { ge: "2020-06-15" } +} ) { ... } +``` + +Example: _"Posts that have "GraphQL" in the title, or have the tag "GraphQL" and +mention "Dgraph" in the title"_ + +```graphql +queryPost(filter: { + title: { allofterms: "GraphQL"}, + or: { title: { allofterms: "Dgraph" }, tags: { eq: "GraphQL" } } +} ) { ... } +``` + +The `and` and `or` filter both accept a list of filters. Per the GraphQL +specification, non-list filters are coerced into a list. This provides +backwards-compatibility while allowing for more complex filters. + +Example: _"Query for posts that have `GraphQL` in the title but that lack the +`GraphQL` tag, or that have `Dgraph` in the title but lack the `Dgraph` tag"_ + +```graphql +queryPost(filter: { + or: [ + { and: [{ title: { allofterms: "GraphQL" } }, { not: { tags: { eq: "GraphQL" } } }] } + { and: [{ title: { allofterms: "Dgraph" } }, { not: { tags: { eq: "Dgraph" } } }] } + ] +} ) { ... } +``` + +### Nesting + +Nested logic with the same `and`/`or` conjunction can be simplified into a +single list. + +For example, the following complex query: + +``` +queryPost(filter: { + or: [ + { or: [ { foo: { eq: "A" } }, { bar: { eq: "B" } } ] }, + { or: [ { baz: { eq: "C" } }, { quz: { eq: "D" } } ] } + ] +} ) { ... } +``` + +...can be simplified into the following simplified query syntax: + +``` +queryPost(filter: { + or: [ + { foo: { eq: "A" } }, + { bar: { eq: "B" } }, + { baz: { eq: "C" } }, + { quz: { eq: "D" } } + ] +} ) { ... } +``` diff --git a/dgraph/reference/graphql/queries/cached-results.mdx b/dgraph/reference/graphql/queries/cached-results.mdx new file mode 100644 index 00000000..1415a860 --- /dev/null +++ b/dgraph/reference/graphql/queries/cached-results.mdx @@ -0,0 +1,49 @@ +--- +title: Cached Results +description: + Cached results can serve read-heavy workloads with complex queries to improve + performance. This refers to external caching at the browser/CDN level +--- + +Cached results can be used to serve read-heavy workloads with complex queries to +improve performance. When cached results are enabled for a query, the stored +results are served if queried within the defined time-to-live (TTL) of the +cached query. + +When using cached results, Dgraph will add the appropriate HTTP headers so the +caching can be done at the browser or content delivery network (CDN) level. + + + Caching refers to external caching at the browser/CDN level. Internal caching + at the database layer is not currently supported. + + +### Enabling cached results + +To enable the external result cache you need to add the +`@cacheControl(maxAge: int)` directive at the top of your query. This directive +adds the appropriate `Cache-Control` HTTP headers to the response, so that +browsers and CDNs can cache the results. + +For example, the following query defines a cache with TTL of 15 seconds. + +```graphql +query @cacheControl(maxAge: 15) { + queryReview(filter: { comment: { alloftext: "Fantastic" } }) { + comment + by { + username + } + about { + name + } + } +} +``` + +Dgraph's returned HTTP headers: + +``` +Cache-Control: public,max-age=15 +Vary: Accept-Encoding +``` diff --git a/dgraph/reference/graphql/queries/cascade.mdx b/dgraph/reference/graphql/queries/cascade.mdx new file mode 100644 index 00000000..8935dc25 --- /dev/null +++ b/dgraph/reference/graphql/queries/cascade.mdx @@ -0,0 +1,141 @@ +--- +title: "@cascade Directive" +description: + "The @cascade directive can be applied to fields. With the @cascade directive, + nodes that don’t have all fields specified in the query are removed" +--- + +The `@cascade` directive can be applied to fields. With the `@cascade` +directive, nodes that don’t have all fields specified in the query are removed. +This can be useful in cases where some filter was applied and some nodes might +not have all the listed fields. + +For example, the query below only returns the authors which have both +`reputation` and `posts`, where posts have `text`. Note that `@cascade` trickles +down so if it's applied at the `queryAuthor` level, it will automatically be +applied at the `posts` level too. + +```graphql +{ + queryAuthor @cascade { + reputation + posts { + text + } + } +} +``` + +### Pagination + +Starting from v21.03, the `@cascade` directive supports pagination of query +results. + +For example, to get to get the next 5 results after skipping the first 2 with +all the fields non-null: + +```graphql +query { + queryTask(first: 5, offset: 2) @cascade { + name + completed + } +} +``` + +### Nested `@cascade` + +`@cascade` can also be used at nested levels, so the query below would return +all authors but only those posts which have both `text` and `id`. + +```graphql +{ + queryAuthor { + reputation + posts @cascade { + id + text + } + } +} +``` + +### Parameterized `@cascade` + +The `@cascade` directive can optionally take a list of fields as an argument. +This changes the default behavior, considering only the supplied fields as +mandatory instead of all the fields for a type. Listed fields are automatically +cascaded as a required argument to nested selection sets. + +In the example below, `name` is supplied in the `fields` argument. For an author +to be in the query response, it must have a `name`, and if it has a `country` +subfield, then that subfield must also have `name`. + +```graphql +{ + queryAuthor @cascade(fields: ["name"]) { + reputation + name + country { + Id + name + } + } +} +``` + +The query below only return those `posts` which have a non-null `text` field. + +```graphql +{ + queryAuthor { + reputation + name + posts @cascade(fields: ["text"]) { + title + text + } + } +} +``` + +#### Nesting + +The cascading nature of field selection is overwritten by a nested `@cascade`. + +For example, the query below ensures that an author has the `reputation` and +`name` fields, and, if it has a `posts` subfield, then that subfield must have a +`text` field. + +```graphql +{ + queryAuthor @cascade(fields: ["reputation", "name"]) { + reputation + name + dob + posts @cascade(fields: ["text"]) { + title + text + } + } +} +``` + +#### Filtering + +Filters can be used with the `@cascade` directive if they are placed before it: + +```graphql +{ + queryAuthor(filter: { name: { anyofterms: "Alice Bob" } }) + @cascade(fields: ["reputation", "name"]) { + reputation + name + dob + posts @cascade(fields: ["text"]) { + title + text + } + } +} +``` diff --git a/dgraph/reference/graphql/queries/index.mdx b/dgraph/reference/graphql/queries/index.mdx new file mode 100644 index 00000000..c5560213 --- /dev/null +++ b/dgraph/reference/graphql/queries/index.mdx @@ -0,0 +1,3 @@ +--- +title: Queries +--- diff --git a/dgraph/reference/graphql/queries/order-page.mdx b/dgraph/reference/graphql/queries/order-page.mdx new file mode 100644 index 00000000..e25a7c71 --- /dev/null +++ b/dgraph/reference/graphql/queries/order-page.mdx @@ -0,0 +1,32 @@ +--- +title: Order and Pagination +description: + Every type with fields whose types can be ordered gets ordering built into the + query and any list fields of that type. +--- + +Every type with fields whose types can be ordered (`Int`, `Float`, `String`, +`DateTime`) gets ordering built into the query and any list fields of that type. +Every query and list field gets pagination with `first` and `offset` and +ordering with `order` parameter. + +The `order` parameter is not required for pagination. + +For example, find the most recent 5 posts. + +```graphql +queryPost(order: { desc: datePublished }, first: 5) { ... } +``` + +Skip the first five recent posts and then get the next 10. + +```graphql +queryPost(order: { desc: datePublished }, offset: 5, first: 10) { ... } +``` + +It's also possible to give multiple orders. For example, sort by date and within +each date order the posts by number of likes. + +```graphql +queryPost(order: { desc: datePublished, then: { desc: numLikes } }, first: 5) { ... } +``` diff --git a/dgraph/reference/graphql/queries/persistent-queries.mdx b/dgraph/reference/graphql/queries/persistent-queries.mdx new file mode 100644 index 00000000..d6d4a2da --- /dev/null +++ b/dgraph/reference/graphql/queries/persistent-queries.mdx @@ -0,0 +1,80 @@ +--- +title: Persistent Queries +description: + Persistent queries significantly improve the performance of an application as + the smaller hash signature reduces bandwidth utilization. +--- + +Dgraph supports Persistent Queries. When a client uses persistent queries, the +client only sends the hash of a query to the server. The server has a list of +known hashes and uses the associated query accordingly. + +Persistent queries significantly improve the performance and the security of an +application since the smaller hash signature reduces bandwidth utilization and +speeds up client loading times. + +### Persisted Query logic + +The execution of Persistent Queries follows this logic: + +- If the `extensions` key is not provided in the `GET` request, Dgraph will + process the request as usual +- If a `persistedQuery ` exists under the `extensions` key, Dgraph will try to + process a Persisted Query: + - if no `sha256` hash is provided, process the query without persisting + - if the `sha256` hash is provided, try to retrieve the persisted query + +Example: + +```json +{ + "persistedQuery": { + "sha256Hash": "b952c19b894e1aa89dc05b7d53e15ab34ee0b3a3f11cdf3486acef4f0fe85c52" + } +} +``` + +### Create + +To create a Persistent Query, both `query` and `sha256` must be provided. + +Dgraph will verify the hash and perform a lookup. If the query doesn't exist, +Dgraph will store the query, provided that the `sha256` of the query is correct. +Finally, Dgraph will process the query and return the results. + +Example: + +```sh +curl -g 'http://localhost:8080/graphql/?query={sample_query}&extensions={"persistedQuery":{"sha256Hash":"b952c19b894e1aa89dc05b7d53e15ab34ee0b3a3f11cdf3486acef4f0fe85c52"}}' +``` + +### Lookup + +If only a `sha256` is provided, Dgraph will do a look-up, and process the query +if found. Otherwise you'll get a `PersistedQueryNotFound` error. + +Example: + +```sh +curl -g 'http://localhost:8080/graphql/?extensions={"persistedQuery":{"sha256Hash":"b952c19b894e1aa89dc05b7d53e15ab34ee0b3a3f11cdf3486acef4f0fe85c52"}}' +``` + +### Usage with Apollo + +You can create an [Apollo GraphQL](https://www.apollographql.com/) client with +persisted queries enabled. In the background, Apollo will send the same requests +like the ones previously shown. + +For example: + +```go +import { createPersistedQueryLink } from "apollo-link-persisted-queries"; +import { createHttpLink } from "apollo-link-http"; +import { InMemoryCache } from "apollo-cache-inmemory"; +import ApolloClient from "apollo-client"; +const link = createPersistedQueryLink().concat(createHttpLink({ uri: "/graphql" })); +const client = new ApolloClient({ + cache: new InMemoryCache(), + link: link, +}); +``` diff --git a/dgraph/reference/graphql/queries/queries-overview.mdx b/dgraph/reference/graphql/queries/queries-overview.mdx new file mode 100644 index 00000000..d630ea77 --- /dev/null +++ b/dgraph/reference/graphql/queries/queries-overview.mdx @@ -0,0 +1,52 @@ +--- +title: Overview +description: + Dgraph automatically generates GraphQL queries for each type that you define + in your schema. There are three types of queries generated for each type. +--- + +How to use queries to fetch data from Dgraph. + +Dgraph automatically generates GraphQL queries for each type that you define in +your schema. There are three types of queries generated for each type. + +Example + +```graphql +type Post { + id: ID! + title: String! @search + text: String + score: Float @search + completed: Boolean @search + datePublished: DateTime @search(by: [year]) + author: Author! +} + +type Author { + id: ID! + name: String! @search + posts: [Post!] + friends: [Author] +} +``` + +With the above schema, there would be three queries generated for Post and three +for Author. Here are the queries that are generated for the Post type: + +```graphql +getPost(postID: ID!): Post +queryPost(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] +aggregatePost(filter: PostFilter): PostAggregateResult +``` + +The first query allows you to fetch a post and its related fields given an ID. +The second query allows you to fetch a list of posts based on some filters, +sorting and pagination parameters. The third query allows you to fetch aggregate +parameters like count of nodes based on filters. + +Additionally, a `checkPassword` query is generated for types that have +been specified with a `@secret` directive. + +You can look at all the queries that are generated by using any GraphQL client +such as Insomnia or GraphQL playground. diff --git a/dgraph/reference/graphql/queries/search-filtering.mdx b/dgraph/reference/graphql/queries/search-filtering.mdx new file mode 100644 index 00000000..4389a239 --- /dev/null +++ b/dgraph/reference/graphql/queries/search-filtering.mdx @@ -0,0 +1,321 @@ +--- +title: Search and Filtering +description: + Queries generated for a GraphQL type allow you to generate a single list of + objects for a type. You can also query a list of objects using GraphQL. +--- + +Queries generated for a GraphQL type allow you to generate a single list of +objects for a type. + +### Get a single object + +Fetch the `title`, `text` and `datePublished` for a post with id `0x1`. + +```graphql +query { + getPost(id: "0x1") { + title + text + datePublished + } +} +``` + +Fetching nested linked objects, while using `get` queries is also easy. For +example, this is how you would fetch the authors for a post and their friends. + +```graphql +query { + getPost(id: "0x1") { + id + title + text + datePublished + author { + name + friends { + name + } + } + } +} +``` + +While fetching nested linked objects, you can also apply a filter on them. + +For example, the following query fetches the author with the `id` 0x1 and their +posts about `GraphQL`. + +```graphql +query { + getAuthor(id: "0x1") { + name + posts(filter: { title: { allofterms: "GraphQL" } }) { + title + text + datePublished + } + } +} +``` + +If your type has a field with the `@id` directive applied to it, you can also +fetch objects using that. + +For example, given the following schema, the query below fetches a user's `name` +and `age` by `userID` (which has the `@id` directive): + +**Schema**: + +```graphql +type User { + userID: String! @id + name: String! + age: String +} +``` + +**Query**: + +```graphql +query { + getUser(userID: "0x2") { + name + age + } +} +``` + + + The `get` API on interfaces containing fields with the `@id` directive is + being deprecated and will be removed in v21.11. Users are advised to use the + `query` API instead. + + +### Query a list of objects + +You can query a list of objects using GraphQL. For example, the following query +fetches the `title`, `text` and and `datePublished` for all posts: + +```graphql +query { + queryPost { + id + title + text + datePublished + } +} +``` + +The following example query fetches a list of posts by their post `id`: + +```graphql +query { + queryPost(filter: { id: ["0x1", "0x2", "0x3", "0x4"] }) { + id + title + text + datePublished + } +} +``` + +### Query that filters objects by predicate + +Before filtering an object by a predicate, you need to add a `@search` directive +to the field that will be used to filter the results. + +For example, if you wanted to query events between two dates, or events that +fall within a certain radius of a point, you could have an `Event` schema, as +follows: + +``` +type Event { + id: ID! + date: DateTime! @search + location: Point @search +} +``` + +The search directive would let you filter events that fall within a date range, +as follows: + +``` +query { + queryEvent (filter: { date: { between: { min: "2020-01-01", max: "2020-02-01" } } }) { + id + } +} +``` + +You can also filter events that have a location near a certain point with the +following query: + +``` +query { + queryEvent (filter: { location: { near: { coordinate: { latitude: 37.771935, longitude: -122.469829 }, distance: 1000 } } }) { + id + } +} +``` + +You can also use connectors such as the `and` keyword to show results with +multiple filters applied. In the query below, we fetch posts that have `GraphQL` +in their title and have a `score > 100`. + +This example assumes that the `Post` type has a `@search` directive applied to +the `title` field and the `score` field. + +```graphql +query { + queryPost( + filter: { title: { anyofterms: "GraphQL" }, and: { score: { gt: 100 } } } + ) { + id + title + text + datePublished + } +} +``` + +### Filter a query for a list of objects + +You can also filter nested objects while querying for a list of objects. + +For example, the following query fetches all of the authors whose name contains +`Lee` and with their `completed` posts that have a score greater than `10`: + +```graphql +query { + queryAuthor(filter: { name: { anyofterms: "Lee" } }) { + name + posts(filter: { score: { gt: 10 }, and: { completed: true } }) { + title + text + datePublished + } + } +} +``` + +### Filter a query for a range of objects with `between` + +You can filter query results within an inclusive range of indexed and typed +scalar values using the `between` keyword. + + + This keyword is also supported for DQL; to learn more, see [DQL Functions: + `between`](/query-language/functions/#between). + + +For example, you might start with the following example schema used to track +students at a school: + +**Schema**: + +```graphql +type Student { + age: Int @search + name: String @search(by: [exact]) +} +``` + +Using the `between` filter, you could fetch records for students who are between +10 and 20 years of age: + +**Query**: + +```graphql +queryStudent(filter: {age: between: {min: 10, max: 20}}){ + age + name +} +``` + +You could also use this filter to fetch records for students whose names fall +alphabetically between `ba` and `hz`: + +**Query**: + +```graphql +queryStudent(filter: {name: between: {min: "ba", max: "hz"}}){ + age + name +} +``` + +### Filter to match specified field values with `in` + +You can filter query results to find objects with one or more specified values +using the `in` keyword. This keyword can find matches for fields with the `@id` +directive applied. The `in` filter is supported for all data types such as +`string`, `enum`, `Int`, `Int64`, `Float`, and `DateTime`. + +For example, let's say that your schema defines a `State` type that has the +`@id` directive applied to the `code` field: + +```graphql +type State { + code: String! @id + name: String! + capital: String +} +``` + +Using the `in` keyword, you can query for a list of states that have the postal +code **WA** or **VA** using the following query: + +```graphql +query { + queryState(filter: { code: { in: ["WA", "VA"] } }) { + code + name + } +} +``` + +### Filter for objects with specified non-null fields using `has` + +You can filter queries to find objects with a non-null value in a specified +field using the `has` keyword. The `has` keyword can only check whether a field +returns a non-null value, not for specific field values. + +For example, your schema might define a `Student` type that has basic +information about each student; such as their ID number, age, name, and email +address: + +```graphql +type Student { + tid: ID! + age: Int! + name: String + email: String +} +``` + +To find those students who have a non-null `name`, run the following query: + +```graphql +queryStudent(filter: { has : name } ){ + tid + age + name +} +``` + +You can also specify a list of fields, like the following: + +```graphql +queryStudent(filter: { has : [name, email] } ){ + tid + age + name + email +} +``` + +This would return `Student` objects where both `name` and `email` fields are +non-null. diff --git a/dgraph/reference/graphql/queries/skip-include.mdx b/dgraph/reference/graphql/queries/skip-include.mdx new file mode 100644 index 00000000..cb623e90 --- /dev/null +++ b/dgraph/reference/graphql/queries/skip-include.mdx @@ -0,0 +1,65 @@ +--- +title: "@skip and @include Directives" +description: + "@skip and @include directives can be applied to query fields. They let you + skip or include a field based on the value of the if argument." +--- + +`@skip` and `@include` directives can be applied to query fields. They allow you +to skip or include a field based on the value of the `if` argument that is +passed to the directive. + +## @skip + +In the query below, we fetch posts and decide whether to fetch the title for +them or not based on the `skipTitle` GraphQL variable. + +GraphQL query + +```graphql +query ($skipTitle: Boolean!) { + queryPost { + id + title @skip(if: $skipTitle) + text + } +} +``` + +GraphQL variables + +```json +{ + "skipTitle": true +} +``` + +## @include + +Similarly, the `@include` directive can be used to include a field based on the +value of the `if` argument. The query below would only include the authors for a +post if `includeAuthor` GraphQL variable has value true. + +GraphQL Query + +```graphql +query ($includeAuthor: Boolean!) { + queryPost { + id + title + text + author @include(if: $includeAuthor) { + id + name + } + } +} +``` + +GraphQL variables + +```json +{ + "includeAuthor": false +} +``` diff --git a/dgraph/reference/graphql/queries/vector-similarity.mdx b/dgraph/reference/graphql/queries/vector-similarity.mdx new file mode 100644 index 00000000..60c41fbe --- /dev/null +++ b/dgraph/reference/graphql/queries/vector-similarity.mdx @@ -0,0 +1,76 @@ +--- +title: Similarity Search +description: + Dgraph automatically generates GraphQL queries for each vector index that you + define in your schema. There are two types of queries generated for each + index. +--- + +Dgraph automatically generates two GraphQL similarity queries for each type that +have at least one [vector predicate](/graphql/schema/types/#vectors) with +`@search` directive. + +For example + +```graphql +type User { + id: ID! + name: String! + name_v: [Float!] + @embedding + @search(by: ["hnsw(metric: euclidean, exponent: 4)"]) +} +``` + +With the above schema, the auto-generated `querySimilarByEmbedding` +query allows us to run similarity search using the vector index specified in our +schema. + +```graphql +getSimilarByEmbedding( + by: vector_predicate, + topK: n, + vector: searchVector): [User] +``` + +For example in order to find top 3 users with names similar to a given user name +embedding the following query function can be used. + +```graphql +querySimilarUserByEmbedding(by: name_v, topK: 3, vector: [0.1, 0.2, 0.3, 0.4, 0.5]) { + id + name + vector_distance + } +``` + +The results obtained for this query includes the 3 closest Users ordered by +vector_distance. The vector_distance is the Euclidean distance between the +name_v embedding vector and the input vector used in our query. + +Note: you can omit vector_distance predicate in the query, the result will still +be ordered by vector_distance. + +The distance metric used is specified in the index creation. + +Similarly, the auto-generated `querySimilarById` query allows us to +search for similar objects to an existing object, given it’s Id. using the +function. + +```graphql +getSimilarById( + by: vector_predicate, + topK: n, + id: userID): [User] +``` + +For example the following query searches for top 3 users whose names are most +similar to the name of the user with id "0xef7". + +```graphql +querySimilarUserById(by: name_v, topK: 3, id: "0xef7") { + id + name + vector_distance +} +``` diff --git a/dgraph/reference/graphql/quick-start/index.mdx b/dgraph/reference/graphql/quick-start/index.mdx new file mode 100644 index 00000000..65f68e4b --- /dev/null +++ b/dgraph/reference/graphql/quick-start/index.mdx @@ -0,0 +1,302 @@ +--- +title: Quick Start +description: + Go from an empty Dgraph database to a running GraphQL API in just one step; + just define the schema of your graph and how you’d like to search it; Dgraph + does the rest. +--- + +When you write an application that implements GraphQL over a REST endpoint or +maybe over a relational database, you know that GraphQL issues many queries to +translate the REST/relational data into something that looks like a graph. You +also have to be familiar with the GraphQL types, fields, and resolvers. However, +with Dgraph you can generate a running GraphQL API with the associated graph +backend just by deploying the GraphQL schema of your API; Dgraph does the rest. + +## Step 1: Run Dgraph + +The easiest way to get Dgraph up and running is using the +[Dgraph Cloud](https://cloud.dgraph.io). +You can log in to Dgraph cloud using **Sign in with Google**, **Sign in with +GitHub** or any other email account that you prefer to use. + +1. In the Dgraph cloud console, click **Launch new backend**. +1. Select a plan, cloud provider, and region that meets your requirements. +1. Type a name for your Dgraph cloud instance. +1. Click **Launch** +1. A new Dgraph cloud backend is created for you. The URL listed in **GraphQL + Endpoint** is the URL at which Dgraph cloud serves data to your app. You can + copy it at any time to use in a GraphQL client application. + +## Step 2: Deploy a GraphQL Schema + +1. In the [Schema](https://cloud.dgraph.io/_/schema) tab of the Dgraph Cloud + console, paste the following schema: + +```graphql +type Product { + productID: ID! + name: String @search(by: [term]) + reviews: [Review] @hasInverse(field: about) +} + +type Customer { + username: String! @id @search(by: [hash, regexp]) + reviews: [Review] @hasInverse(field: by) +} + +type Review { + id: ID! + about: Product! + by: Customer! + comment: String @search(by: [fulltext]) + rating: Int @search +} +``` + +2. Click `deploy` + +You now have a GraphQL API up and running and a graph database as a backend. + +## Step 3: Test your GraphQL API + +You can use the [API Explorer](https://cloud.dgraph.io/_/explorer) tab of your +Dgraph cloud console, but you can access the `GraphQL endpoint` with any GraphQL +clients such as +[GraphQL Playground](https://github.com/prisma-labs/graphql-playground), +[Insomnia](https://insomnia.rest/), +[GraphiQL](https://github.com/graphql/graphiql), +[Altair](https://github.com/imolorhe/altair) or Postman. + +If you want to use those clients, copy the `GraphQL endpoint` from the +[Cloud dashboard](https://cloud.dgraph.io/_/dashboard). + +You may want to use the introspection capability of the client to explore the +schema, queries, and mutations that were generated by Dgraph. + +### A first GraphQL mutation + +To populate the database, + +1. Open the [API Explorer](https://cloud.dgraph.io/_/explorer) tab +2. Paste the following code into the text area: + +```graphql +mutation { + addProduct( + input: [ + { name: "GraphQL on Dgraph" } + { name: "Dgraph: The GraphQL Database" } + ] + ) { + product { + productID + name + } + } + addCustomer(input: [{ username: "Michael" }]) { + customer { + username + } + } +} +``` + +3. Click **Execute Query** . + +The GraphQL server returns a json response similar to this: + +```json +{ + "data": { + "addProduct": { + "product": [ + { + "productID": "0x2", + "name": "GraphQL on Dgraph" + }, + { + "productID": "0x3", + "name": "Dgraph: The GraphQL Database" + } + ] + }, + "addCustomer": { + "customer": [ + { + "username": "Michael" + } + ] + } + }, + "extensions": { + "requestID": "b155867e-4241-4cfb-a564-802f2d3808a6" + } +} +``` + +### A second GraphQL mutation + +Because the schema defined Customer with the field `username: String! @id`, the +`username` field acts like an ID, so we can identify customers just with their +names. + +Products, on the other hand, had `productID: ID!`, so they'll get an +auto-generated ID which are returned by the mutation. + +1. Paste the following mutation in the text area of the + [API Explorer](https://cloud.dgraph.io/_/explorer) tab. +2. Your ID for the product might be different than `0x2`. Make sure to replace + the product ID with the ID from the response of the previous mutation. +3. Execute the mutation + +```graphql +mutation { + addReview( + input: [ + { + by: { username: "Michael" } + about: { productID: "0x2" } + comment: "Fantastic, easy to install, worked great. Best GraphQL server available" + rating: 10 + } + ] + ) { + review { + comment + rating + by { + username + } + about { + name + } + } + } +} +``` + +This time, the mutation result queries for the author making the review and the +product being reviewed, so it's gone deeper into the graph to get the result +than just the mutation data. + +```json +{ + "data": { + "addReview": { + "review": [ + { + "comment": "Fantastic, easy to install, worked great. Best GraphQL server available", + "rating": 10, + "by": { + "username": "Michael" + }, + "about": { + "name": "GraphQL on Dgraph" + } + } + ] + } + }, + "extensions": { + "requestID": "11bc2841-8c19-45a6-bb31-7c37c9b027c9" + } +} +``` + +### GraphQL Queries + +With Dgraph, you get powerful graph search built into your GraphQL API. The +schema for search is generated from the schema document that we started with and +automatically added to the GraphQL API for you. + +Remember the definition of a review. + +``` +type Review { + ... + comment: String @search(by: [fulltext]) + ... +} +``` + +The directive `@search(by: [fulltext])` tells Dgraph we want to be able to +search for comments with full-text search. + +Dgraph took that directive and the other information in the schema, and built +queries and search into the API. + +Let's find all the products that were easy to install. + +1. Paste the following query in the text area of the + [API Explorer](https://cloud.dgraph.io/_/explorer) tab. +1. Execute the mutation + +```graphql +query { + queryReview(filter: { comment: { alloftext: "easy to install" } }) { + comment + by { + username + } + about { + name + } + } +} +``` + +What reviews did you get back? It'll depend on the data you added, but you'll at +least get the initial review we added. + +Maybe you want to find reviews that describe best GraphQL products and give a +high rating. + +```graphql +query { + queryReview( + filter: { comment: { alloftext: "best GraphQL" }, rating: { ge: 10 } } + ) { + comment + by { + username + } + about { + name + } + } +} +``` + +How about we find the customers with names starting with "Mich" and the five +products that each of those liked the most. + +```graphql +query { + queryCustomer(filter: { username: { regexp: "/Mich.*/" } }) { + username + reviews(order: { asc: rating }, first: 5) { + comment + rating + about { + name + } + } + } +} +``` + +## Conclusion + +Dgraph allows you to have a fully functional GraphQL API in minutes with a +highly performant graph backend to serve complex nested queries. Moreover, you +can update or change your schema freely and just re-deploy new versions. For +GraphQL in Dgraph, you just concentrate on defining the schema of your graph and +how you'd like to search that graph; Dgraph does the rest. + +## What's Next + +- Learn more about [GraphQL schema](./graphql/schema/_index) and Dgraph + directives. +- Follow our [GraphQL tutorials](https://dgraph.io/learn/) to experience rapid + application development by building more advanced apps. diff --git a/dgraph/reference/graphql/schema/dgraph-schema.mdx b/dgraph/reference/graphql/schema/dgraph-schema.mdx new file mode 100644 index 00000000..50c69ce4 --- /dev/null +++ b/dgraph/reference/graphql/schema/dgraph-schema.mdx @@ -0,0 +1,284 @@ +--- +title: Dgraph Schema Fragment +description: + While editing your schema, this GraphQL schema fragment can be useful. It sets + up the definitions of the directives that you’ll use in your schema. +--- + +While editing your schema, you might find it useful to include this GraphQL +schema fragment. It sets up the definitions of the directives, etc. (like +`@search`) that you'll use in your schema. If your editor is GraphQL aware, it +may give you errors if you don't have this available and context sensitive help +if you do. + +Don't include it in your input schema to Dgraph - use your editing environment +to set it up as an import. The details will depend on your setup. + +```graphql +""" +The Int64 scalar type represents a signed 64‐bit numeric non‐fractional value. +Int64 can represent values in range [-(2^63),(2^63 - 1)]. +""" +scalar Int64 + +""" +The DateTime scalar type represents date and time as a string in RFC3339 format. +For example: "1985-04-12T23:20:50.52Z" represents 20 minutes and 50.52 seconds after the 23rd hour of April 12th, 1985 in UTC. +""" +scalar DateTime + +input IntRange { + min: Int! + max: Int! +} + +input FloatRange { + min: Float! + max: Float! +} + +input Int64Range { + min: Int64! + max: Int64! +} + +input DateTimeRange { + min: DateTime! + max: DateTime! +} + +input StringRange { + min: String! + max: String! +} + +enum DgraphIndex { + int + int64 + float + bool + hash + exact + term + fulltext + trigram + regexp + year + month + day + hour + geo +} + +input AuthRule { + and: [AuthRule] + or: [AuthRule] + not: AuthRule + rule: String +} + +enum HTTPMethod { + GET + POST + PUT + PATCH + DELETE +} + +enum Mode { + BATCH + SINGLE +} + +input CustomHTTP { + url: String! + method: HTTPMethod! + body: String + graphql: String + mode: Mode + forwardHeaders: [String!] + secretHeaders: [String!] + introspectionHeaders: [String!] + skipIntrospection: Boolean +} + +type Point { + longitude: Float! + latitude: Float! +} + +input PointRef { + longitude: Float! + latitude: Float! +} + +input NearFilter { + distance: Float! + coordinate: PointRef! +} + +input PointGeoFilter { + near: NearFilter + within: WithinFilter +} + +type PointList { + points: [Point!]! +} + +input PointListRef { + points: [PointRef!]! +} + +type Polygon { + coordinates: [PointList!]! +} + +input PolygonRef { + coordinates: [PointListRef!]! +} + +type MultiPolygon { + polygons: [Polygon!]! +} + +input MultiPolygonRef { + polygons: [PolygonRef!]! +} + +input WithinFilter { + polygon: PolygonRef! +} + +input ContainsFilter { + point: PointRef + polygon: PolygonRef +} + +input IntersectsFilter { + polygon: PolygonRef + multiPolygon: MultiPolygonRef +} + +input PolygonGeoFilter { + near: NearFilter + within: WithinFilter + contains: ContainsFilter + intersects: IntersectsFilter +} + +input GenerateQueryParams { + get: Boolean + query: Boolean + password: Boolean + aggregate: Boolean +} + +input GenerateMutationParams { + add: Boolean + update: Boolean + delete: Boolean +} + +directive @hasInverse(field: String!) on FIELD_DEFINITION +directive @search(by: [DgraphIndex!]) on FIELD_DEFINITION +directive @dgraph( + type: String + pred: String +) on OBJECT | INTERFACE | FIELD_DEFINITION +directive @id(interface: Boolean) on FIELD_DEFINITION +directive @withSubscription on OBJECT | INTERFACE | FIELD_DEFINITION +directive @secret(field: String!, pred: String) on OBJECT | INTERFACE +directive @auth( + password: AuthRule + query: AuthRule + add: AuthRule + update: AuthRule + delete: AuthRule +) on OBJECT | INTERFACE +directive @custom(http: CustomHTTP, dql: String) on FIELD_DEFINITION +directive @remote on OBJECT | INTERFACE | UNION | INPUT_OBJECT | ENUM +directive @remoteResponse(name: String) on FIELD_DEFINITION +directive @cascade(fields: [String]) on FIELD +directive @lambda on FIELD_DEFINITION +directive @lambdaOnMutate( + add: Boolean + update: Boolean + delete: Boolean +) on OBJECT | INTERFACE +directive @cacheControl(maxAge: Int!) on QUERY +directive @generate( + query: GenerateQueryParams + mutation: GenerateMutationParams + subscription: Boolean +) on OBJECT | INTERFACE + +input IntFilter { + eq: Int + in: [Int] + le: Int + lt: Int + ge: Int + gt: Int + between: IntRange +} + +input Int64Filter { + eq: Int64 + in: [Int64] + le: Int64 + lt: Int64 + ge: Int64 + gt: Int64 + between: Int64Range +} + +input FloatFilter { + eq: Float + in: [Float] + le: Float + lt: Float + ge: Float + gt: Float + between: FloatRange +} + +input DateTimeFilter { + eq: DateTime + in: [DateTime] + le: DateTime + lt: DateTime + ge: DateTime + gt: DateTime + between: DateTimeRange +} + +input StringTermFilter { + allofterms: String + anyofterms: String +} + +input StringRegExpFilter { + regexp: String +} + +input StringFullTextFilter { + alloftext: String + anyoftext: String +} + +input StringExactFilter { + eq: String + in: [String] + le: String + lt: String + ge: String + gt: String + between: StringRange +} + +input StringHashFilter { + eq: String + in: [String] +} +``` diff --git a/dgraph/reference/graphql/schema/directives/auth.mdx b/dgraph/reference/graphql/schema/directives/auth.mdx new file mode 100644 index 00000000..31fe178e --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/auth.mdx @@ -0,0 +1,12 @@ +--- +title: "@auth" +--- + +`@auth` allows you to define how to apply authorization rules on the +queries/mutation for a type. + +Refer to [graphql endpoint security](./graphql/security/_index.md), +[RBAC rules](./RBAC-rules.md) and +[Graph traversal rules](./graphtraversal-rules.md) for details. + +`@auth` directive is not supported on `union` and `@remote` types. diff --git a/dgraph/reference/graphql/schema/directives/deprecated.mdx b/dgraph/reference/graphql/schema/directives/deprecated.mdx new file mode 100644 index 00000000..50adcbe0 --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/deprecated.mdx @@ -0,0 +1,25 @@ +--- +title: "@deprecated" +--- + +The `@deprecated` directive allows you to tag the schema definition of a field +or enum value as deprecated with an optional reason. + +When you use the `@deprecated` directive, GraphQL users can deprecate their use +of the deprecated field or `enum` value. Most GraphQL tools and clients will +pick up this notification and give you a warning if you try to use a deprecated +field. + +### Example + +For example, to mark `oldField` in the schema as deprecated: + +```graphql +type MyType { + id: ID! + oldField: String + @deprecated(reason: "oldField is deprecated. Use newField instead.") + newField: String + deprecatedField: String @deprecated +} +``` diff --git a/dgraph/reference/graphql/schema/directives/directive-dgraph.mdx b/dgraph/reference/graphql/schema/directives/directive-dgraph.mdx new file mode 100644 index 00000000..7e2368c5 --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/directive-dgraph.mdx @@ -0,0 +1,63 @@ +--- +title: "@dgraph" +--- + +The `@dgraph` directive customizes the name of the types and predicates +generated in Dgraph when deploying a GraphQL Schema. + +- `type @dgraph(type: "TypeNameToUseInDgraph")` controls what Dgraph type + is used for a GraphQL type. +- `field: SomeType @dgraph(pred: "DgraphPredicate")` controls what Dgraph + predicate is mapped to a GraphQL field. + +For example, if you have existing types that don't match GraphQL requirements, +you can create a schema like the following. + +```graphql +type Person @dgraph(type: "Human-Person") { + name: String @search(by: [hash]) @dgraph(pred: "name") + age: Int +} + +type Movie @dgraph(type: "film") { + name: String @search(by: [term]) @dgraph(pred: "film.name") +} +``` + +Which maps to the Dgraph schema: + +```graphql +type Human-Person { + name + Person.age +} + +type film { + film.name +} + +name string @index(hash) . +Person.age: int . +film.name string @index(term) . +``` + +You might also have the situation where you have used `name` for both movie +names and people's names. In this case you can map fields in two different +GraphQL types to the one Dgraph predicate. + +```graphql +type Person { + name: String @dgraph(pred: "name") + ... +} + +type Movie { + name: String @dgraph(pred: "name") + ... +} +``` + + + In Dgraph's current GraphQL implementation, if two fields are mapped to the + same Dgraph predicate, both should have the same `@search` directive. + diff --git a/dgraph/reference/graphql/schema/directives/directive-withsubscription.mdx b/dgraph/reference/graphql/schema/directives/directive-withsubscription.mdx new file mode 100644 index 00000000..9164e2d0 --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/directive-withsubscription.mdx @@ -0,0 +1,25 @@ +--- +title: "@withSubscription" +--- + +The `@withSubscription` directive enables **subscription** operation on a +GraphQL type. + +A subscription notifies your client with changes to back-end data using the +WebSocket protocol. Subscriptions are useful to get low-latency, real-time +updates. + +To enable subscriptions on any type add the `@withSubscription` directive to the +schema as part of the type definition, as in the following example: + +```graphql +type Todo @withSubscription { + id: ID! + title: String! + description: String! + completed: Boolean! +} +``` + +Refer to [GraphQL Subscriptions](./graphql/subscriptions) to learn how to use +subscriptions in you client application. diff --git a/dgraph/reference/graphql/schema/directives/embedding.mdx b/dgraph/reference/graphql/schema/directives/embedding.mdx new file mode 100644 index 00000000..9694b6f7 --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/embedding.mdx @@ -0,0 +1,14 @@ +--- +title: "@embedding" +--- + +A Float array can be used as a vector using `@embedding` directive. It denotes a +vector of floating point numbers, i.e an ordered array of float32. + +The embeddings can be defined on one or more predicates of a type and they are +generated using suitable machine learning models. + +This directive is used in conjunction with `@search` directive to declare the +HNSW index. For more information see: +[@search](/graphql/schema/directives/search/#vector-embedding) directive for +vector embeddings. diff --git a/dgraph/reference/graphql/schema/directives/generate.mdx b/dgraph/reference/graphql/schema/directives/generate.mdx new file mode 100644 index 00000000..603c10c0 --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/generate.mdx @@ -0,0 +1,61 @@ +--- +title: "@generate" +description: + "The @generate directive specifies which GraphQL APIs are generated for a + given type. Without it, all queries & mutations are generated except + subscription." +--- + +The `@generate` directive is used to specify which GraphQL APIs are generated +for a given type. + +Here's the GraphQL definition of the directive + +```graphql +input GenerateQueryParams { + get: Boolean + query: Boolean + password: Boolean + aggregate: Boolean +} + +input GenerateMutationParams { + add: Boolean + update: Boolean + delete: Boolean +} +directive @generate( + query: GenerateQueryParams + mutation: GenerateMutationParams + subscription: Boolean +) on OBJECT | INTERFACE +``` + +The corresponding APIs are generated by setting the `Boolean` variables inside +the `@generate` directive to `true`. Passing `false` forbids the generation of +the corresponding APIs. + +The default value of the `subscription` variable is `false` while the default +value of all other variables is `true`. Therefore, if no `@generate` directive +is specified for a type, all queries and mutations except `subscription` are +generated. + +## Example of @generate directive + +```graphql +type Person + @generate( + query: { get: false, query: true, aggregate: false } + mutation: { add: true, delete: false } + subscription: false + ) { + id: ID! + name: String! +} +``` + +The GraphQL schema above will generate a `queryPerson` query and `addPerson`, +`updatePerson` mutations. It won't generate `getPerson`, `aggregatePerson` +queries nor a `deletePerson` mutation as these have been marked as `false` using +the `@generate` directive. Note that the `updatePerson` mutation is generated +because the default value of the `update` variable is `true`. diff --git a/dgraph/reference/graphql/schema/directives/ids.mdx b/dgraph/reference/graphql/schema/directives/ids.mdx new file mode 100644 index 00000000..164d7c2e --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/ids.mdx @@ -0,0 +1,141 @@ +--- +title: "@id" +description: + "Dgraph database provides two types of identifiers: the ID scalar type and the + @id directive." +--- + +Dgraph provides two types of built-in identifiers: the `ID` scalar type and the +`@id` directive. + +- The `ID` scalar type is used when you don't need to set an identifier outside + of Dgraph. +- The `@id` directive is used for external identifiers, such as email addresses. + +## The `@id` directive + +For some types, you'll need a unique identifier set from outside Dgraph. A +common example is a username. + +The `@id` directive tells Dgraph to keep that field's values unique and use them +as identifiers. + +For example, you might set the following type in a schema: + +```graphql +type User { + username: String! @id + ... +} +``` + +Dgraph requires a unique username when creating a new user. It generates the +input type for `addUser` with `username: String!`, so you can't make an add +mutation without setting a username; and when processing the mutation, Dgraph +will ensure that the username isn't already set for another node of the `User` +type. + +In a single-page app, you could render the page for `http://.../user/Erik` when +a user clicks to view the author bio page for that user. Your app can then use a +`getUser(username: "Erik") { ... }` GraphQL query to fetch the data and generate +the page. + +Identities created with `@id` are reusable. If you delete an existing user, you +can reuse the username. + +Fields with the `@id` directive must have the type `String!`. + +As with `ID` types, Dgraph generates queries and mutations so you can query, +update, and delete data in nodes, using the fields with the `@id` directive as +references. + +It's possible to use the `@id` directive on more than one field in a type. For +example, you can define a type like the following: + +```graphql +type Book { + name: String! @id + isbn: String! @id + genre: String! + ... +} +``` + +You can then use multiple `@id` fields in arguments to `get` queries, and while +searching, these fields will be combined with the `AND` operator, resulting in a +Boolean `AND` operation. For example, for the above schema, you can send a +`getBook` query like the following: + +```graphql +query { + getBook(name: "The Metamorphosis", isbn: "9871165072") { + name + genre + ... + } +} +``` + +This will yield a positive response if both the `name` **and** `isbn` match any +data in the database. + +### `@id` and interfaces + +By default, if used in an interface, the `@id` directive will ensure field +uniqueness for each implementing type separately. In this case, the `@id` field +in the interface won't be unique for the interface but for each of its +implementing types. This allows two different types implementing the same +interface to have the same value for the inherited `@id` field. + +There are scenarios where this behavior might not be desired, and you may want +to constrain the `@id` field to be unique across all the implementing types. In +that case, you can set the `interface` argument of the `@id` directive to +`true`, and Dgraph will ensure that the field has unique values across all the +implementing types of an interface. + +For example: + +```graphql +interface Item { + refID: Int! @id(interface: true) # if there is a Book with refID = 1, then there can't be a chair with that refID. + itemID: Int! @id # If there is a Book with itemID = 1, there can still be a Chair with the same itemID. +} + +type Book implements Item { ... } +type Chair implements Item { ... } +``` + +In the above example, `itemID` won't be present as an argument to the `getItem` +query as it might return more than one `Item`. + + + `get` queries generated for an interface will have only the `@id(interface: + true)` fields as arguments. + + +## Combining `ID` and `@id` + +You can use both the `ID` type and the `@id` directive on another field +definition to have both a unique identifier and a generated identifier. + +For example, you might define the following type in a schema: + +```graphql +type User { + id: ID! + username: String! @id + ... +} +``` + +With this schema, Dgraph requires a unique `username` when creating a new user. +This schema provides the benefits of both of the previous examples above. Your +app can then use the `getUser(...) { ... }` query to provide either the +Dgraph-generated `id` or the externally-generated `username`. + + + If in a type there are multiple `@id` fields, then in a `get` query these + arguments will be optional. If in a type there's only one field defined with + either `@id` or `ID`, then that will be a required field in the `get` query's + arguments. + diff --git a/dgraph/reference/graphql/schema/directives/index.mdx b/dgraph/reference/graphql/schema/directives/index.mdx new file mode 100644 index 00000000..64aa58ae --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/index.mdx @@ -0,0 +1,131 @@ +--- +title: Directives +--- + +The list of all directives supported by Dgraph. + +### @auth + +`@auth` allows you to define how to apply authorization rules on the +queries/mutation for a type. + +Reference: [Auth directive](./auth) + +### @cascade + +`@cascade` allows you to filter out certain nodes within a query. + +Reference: [Cascade](/graphql/queries/cascade) + +### @custom + +`@custom` directive is used to define custom queries, mutations and fields. + +Reference: [Custom directive](./graphql/custom/directive) + +### @deprecated + +The `@deprecated` directive lets you mark the schema definition of a field or +`enum` value as deprecated, and also lets you provide an optional reason for the +deprecation. + +Reference: [Deprecation](./deprecated) + +### @dgraph + +`@dgraph` directive tells us how to map fields within a type to existing +predicates inside Dgraph. + +Reference: [@dgraph directive](./directive-dgraph) + +### @embedding + +`@embedding` directive designates one or more fields as vector embeddings. + +Reference: [@embedding directive](./embedding) + +### @generate + +The `@generate` directive is used to specify which GraphQL APIs are generated +for a type. + +Reference: [Generate directive](./generate) + +### @hasInverse + +`@hasInverse` is used to setup up two way edges such that adding a edge in one +direction automatically adds the one in the inverse direction. + +Reference: [Linking nodes in the graph](./graph-links) + +### @id + +`@id` directive is used to annotate a field which represents a unique identifier +coming from outside of Dgraph. + +Reference: [Identity]((./ids) + +### @include + +The `@include` directive can be used to include a field based on the value of an +`if` argument. + +Reference: [Include directive](./skip-include) + +### @lambda + +The `@lambda` directive allows you to call custom JavaScript resolvers. The +`@lambda` queries, mutations, and fields are resolved through the lambda +functions implemented on a given lambda server. + +Reference: [Lambda directive](./lambda-overview) + +### @remote + +`@remote` directive is used to annotate types for which data is not stored in +Dgraph. These types are typically used with custom queries and mutations. + +Reference: [Remote directive](./directive.md#remote-types) + +### @remoteResponse + +The `@remoteResponse` directive allows you to annotate the fields of a `@remote` +type in order to map a custom query's JSON key response to a GraphQL field. + +Reference: [Remote directive](./directive.md##remote-response) + +### @search + +`@search` allows you to perform filtering on a field while querying for nodes. + +Reference: [Search](./search) + +### @secret + +`@secret` directive is used to store secret information, it gets encrypted and +then stored in Dgraph. + +Reference: [Password Type](./types.md#password-type) + +### @skip + +The `@skip` directive can be used to fetch a field based on the value of a +user-defined GraphQL variable. + +Reference: [Skip directive](./skip-include) + +### @withSubscription + +`@withSubscription` directive when applied on a type, generates subscription +queries for it. + +Reference: [Subscriptions](./subscriptions) + +### @lambdaOnMutate + +The `@lambdaOnMutate` directive allows you to listen to mutation +events(`add`/`update`/`delete`). Depending on the defined events and the +occurrence of a mutation event, `@lambdaOnMutate` triggers the appropriate +lambda function implemented on a given lambda server. + +Reference: [LambdaOnMutate directive](./webhook) diff --git a/dgraph/reference/graphql/schema/directives/search.mdx b/dgraph/reference/graphql/schema/directives/search.mdx new file mode 100644 index 00000000..aeee4d7a --- /dev/null +++ b/dgraph/reference/graphql/schema/directives/search.mdx @@ -0,0 +1,697 @@ +--- +title: Search and Filtering +description: + What search can you build into your GraphQL API? Dgraph builds search into the + fields of each type, so searching is available at deep levels in a query +--- + +The `@search` directive tells Dgraph what search to build into your GraphQL API. + +When a type contains an `@search` directive, Dgraph constructs a search input +type and a query in the GraphQL `Query` type. For example, if the schema +contains + +```graphql +type Post { + ... +} +``` + +then Dgraph constructs a `queryPost` GraphQL query for querying posts. The +`@search` directives in the `Post` type control how Dgraph builds indexes and +what kinds of search it builds into `queryPost`. If the type contains + +```graphql +type Post { + ... + datePublished: DateTime @search +} +``` + +then it's possible to filter posts with a date-time search like: + +```graphql +query { + queryPost(filter: { datePublished: { ge: "2020-06-15" }}) { + ... + } +} +``` + +If the type tells Dgraph to build search capability based on a term (word) index +for the `title` field + +```graphql +type Post { + ... + title: String @search(by: [term]) +} +``` + +then, the generated GraphQL API will allow search by terms in the title. + +```graphql +query { + queryPost(filter: { title: { anyofterms: "GraphQL" }}) { + ... + } +} +``` + +Dgraph also builds search into the fields of each type, so searching is +available at deep levels in a query. For example, if the schema contained these +types + +```graphql +type Post { + ... + title: String @search(by: [term]) +} + +type Author { + name: String @search(by: [hash]) + posts: [Post] +} +``` + +then Dgraph builds GraphQL search such that a query can, for example, find an +author by name (from the hash search on `name`) and return only their posts that +contain the term "GraphQL". + +```graphql +queryAuthor(filter: { name: { eq: "Diggy" } } ) { + posts(filter: { title: { anyofterms: "GraphQL" }}) { + title + } +} +``` + +Dgraph can build search types with the ability to search between a range. For +example with the above Post type with datePublished field, a query can find +publish dates within a range + +```graphql +query { + queryPost(filter: { datePublished: { between: { min: "2020-06-15", max: "2020-06-16" }}}) { + ... + } +} +``` + +Dgraph can also build GraphQL search ability to find match a value from a list. +For example with the above Author type with the name field, a query can return +the Authors that match a list + +```graphql +queryAuthor(filter: { name: { in: ["Diggy", "Jarvis"] } } ) { + ... +} +``` + +There's different search possible for each type as explained below. + +### Int, Float and DateTime + +| argument | constructed filter | +| -------- | ------------------------------------------------- | +| none | `lt`, `le`, `eq`, `in`, `between`, `ge`, and `gt` | + +Search for fields of types `Int`, `Float` and `DateTime` is enabled by adding +`@search` to the field with no arguments. For example, if a schema contains: + +```graphql +type Post { + ... + numLikes: Int @search +} +``` + +Dgraph generates search into the API for `numLikes` in two ways: a query for +posts and field search on any post list. + +A field `queryPost` is added to the `Query` type of the schema. + +```graphql +type Query { + ... + queryPost(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] +} +``` + +`PostFilter` will contain less than `lt`, less than or equal to `le`, equal +`eq`, in list `in`, between range `between`, greater than or equal to `ge`, and +greater than `gt` search on `numLikes`. Allowing for example: + +```graphql +query { + queryPost(filter: { numLikes: { gt: 50 }}) { + ... + } +} +``` + +Also, any field with a type of list of posts has search options added to it. For +example, if the input schema also contained: + +```graphql +type Author { + ... + posts: [Post] +} +``` + +Dgraph would insert search into `posts`, with + +```graphql +type Author { + ... + posts(filter: PostFilter, order: PostOrder, first: Int, offset: Int): [Post] +} +``` + +That allows search within the GraphQL query. For example, to find Diggy's posts +with more than 50 likes. + +```graphql +queryAuthor(filter: { name: { eq: "Diggy" } } ) { + ... + posts(filter: { numLikes: { gt: 50 }}) { + title + text + } +} +``` + +### DateTime + +| argument | constructed filters | +| --------------------------------- | ------------------------------------------------- | +| `year`, `month`, `day`, or `hour` | `lt`, `le`, `eq`, `in`, `between`, `ge`, and `gt` | + +As well as `@search` with no arguments, `DateTime` also allows specifying how +the search index should be built: by year, month, day or hour. `@search` +defaults to year, but once you understand your data and query patterns, you +might want to changes that like `@search(by: [day])`. + +### Boolean + +| argument | constructed filter | +| -------- | ------------------ | +| none | `true` and `false` | + +Booleans can only be tested for true or false. If `isPublished: Boolean @search` +is in the schema, then the search allows + +```graphql +filter: { isPublished: true } +``` + +and + +```graphql +filter: { isPublished: false } +``` + +### String + +Strings allow a wider variety of search options than other types. For strings, +you have the following options as arguments to `@search`. + +| argument | constructed searches | +| ---------- | --------------------------------------------------------------------- | +| `hash` | `eq` and `in` | +| `exact` | `lt`, `le`, `eq`, `in`, `between`, `ge`, and `gt` (lexicographically) | +| `regexp` | `regexp` (regular expressions) | +| `term` | `allofterms` and `anyofterms` | +| `fulltext` | `alloftext` and `anyoftext` | + +- _Schema rule_: `hash` and `exact` can't be used together. + +#### String exact and hash search + +Exact and hash search has the standard lexicographic meaning. + +```graphql +query { + queryAuthor(filter: { name: { eq: "Diggy" } }) { ... } +} +``` + +And for exact search + +```graphql +query { + queryAuthor(filter: { name: { gt: "Diggy" } }) { ... } +} +``` + +to find users with names lexicographically after "Diggy". + +#### String regular expression search + +Search by regular expression requires bracketing the expression with `/` and +`/`. For example, query for "Diggy" and anyone else with "iggy" in their name: + +```graphql +query { + queryAuthor(filter: { name: { regexp: "/.*iggy.*/" } }) { ... } +} +``` + +#### String term and fulltext search + +If the schema has + +```graphql +type Post { + title: String @search(by: [term]) + text: String @search(by: [fulltext]) + ... +} +``` + +then + +```graphql +query { + queryPost(filter: { title: { `allofterms: "GraphQL tutorial"` } } ) { ... } +} +``` + +will match all posts with both "GraphQL and "tutorial" in the title, while +`anyofterms: "GraphQL tutorial"` would match posts with either "GraphQL" or +"tutorial". + +`fulltext` search is Google-stye text search with stop words, stemming. etc. So +`alloftext: "run woman"` would match "run" as well as "running", etc. For +example, to find posts that talk about fantastic GraphQL tutorials: + +```graphql +query { + queryPost(filter: { title: { `alloftext: "fantastic GraphQL tutorials"` } } ) { ... } +} +``` + +#### Strings with multiple searches + +It's possible to add multiple string indexes to a field. For example to search +for authors by `eq` and regular expressions, add both options to the type +definition, as follows. + +```graphql +type Author { + ... + name: String! @search(by: [hash, regexp]) +} +``` + +### Enums + +| argument | constructed searches | +| -------- | --------------------------------------------------------------------- | +| none | `eq` and `in` | +| `hash` | `eq` and `in` | +| `exact` | `lt`, `le`, `eq`, `in`, `between`, `ge`, and `gt` (lexicographically) | +| `regexp` | `regexp` (regular expressions) | + +Enums are serialized in Dgraph as strings. `@search` with no arguments is the +same as `@search(by: [hash])` and provides `eq` and `in` searches. Also +available for enums are `exact` and `regexp`. For hash and exact search on +enums, the literal enum value, without quotes `"..."`, is used, for regexp, +strings are required. For example: + +```graphql +enum Tag { + GraphQL + Database + Question + ... +} + +type Post { + ... + tags: [Tag!]! @search +} +``` + +would allow + +```graphql +query { + queryPost(filter: { tags: { eq: GraphQL } } ) { ... } +} +``` + +Which would find any post with the `GraphQL` tag. + +While `@search(by: [exact, regexp]` would also admit `lt` etc. and + +```graphql +query { + queryPost(filter: { tags: { regexp: "/.*aph.*/" } } ) { ... } +} +``` + +which is helpful for example if the enums are something like product codes where +regular expressions can match a number of values. + +### Geolocation + +There are 3 Geolocation types: `Point`, `Polygon` and `MultiPolygon`. All of +them are searchable. + +The following table lists the generated filters for each type when you include +`@search` on the corresponding field: + +| type | constructed searches | +| -------------- | ------------------------------------------ | +| `Point` | `near`, `within` | +| `Polygon` | `near`, `within`, `contains`, `intersects` | +| `MultiPolygon` | `near`, `within`, `contains`, `intersects` | + +#### Example + +Take for example a `Hotel` type that has a `location` and an `area`: + +```graphql +type Hotel { + id: ID! + name: String! + location: Point @search + area: Polygon @search +} +``` + +#### near + +The `near` filter matches all entities where the location given by a field is +within a distance `meters` from a coordinate. + +```graphql +queryHotel(filter: { + location: { + near: { + coordinate: { + latitude: 37.771935, + longitude: -122.469829 + }, + distance: 1000 + } + } +}) { + name +} +``` + +#### within + +The `within` filter matches all entities where the location given by a field is +within a defined `polygon`. + +```graphql +queryHotel(filter: { + location: { + within: { + polygon: { + coordinates: [{ + points: [{ + latitude: 11.11, + longitude: 22.22 + }, { + latitude: 15.15, + longitude: 16.16 + }, { + latitude: 20.20, + longitude: 21.21 + }, { + latitude: 11.11, + longitude: 22.22 + }] + }], + } + } + } +}) { + name +} +``` + +#### contains + +The `contains` filter matches all entities where the `Polygon` or `MultiPolygon` +field contains another given `point` or `polygon`. + + + Only one `point` or `polygon` can be taken inside the `ContainsFilter` at a + time. + + +A `contains` example using `point`: + +```graphql +queryHotel(filter: { + area: { + contains: { + point: { + latitude: 0.5, + longitude: 2.5 + } + } + } +}) { + name +} +``` + +A `contains` example using `polygon`: + +```graphql + queryHotel(filter: { + area: { + contains: { + polygon: { + coordinates: [{ + points:[{ + latitude: 37.771935, + longitude: -122.469829 + }] + }], + } + } + } +}) { + name +} +``` + +#### intersects + +The `intersects` filter matches all entities where the `Polygon` or +`MultiPolygon` field intersects another given `polygon` or `multiPolygon`. + + + Only one `polygon` or `multiPolygon` can be given inside the + `IntersectsFilter` at a time. + + +```graphql + queryHotel(filter: { + area: { + intersects: { + multiPolygon: { + polygons: [{ + coordinates: [{ + points: [{ + latitude: 11.11, + longitude: 22.22 + }, { + latitude: 15.15, + longitude: 16.16 + }, { + latitude: 20.20, + longitude: 21.21 + }, { + latitude: 11.11, + longitude: 22.22 + }] + }, { + points: [{ + latitude: 11.18, + longitude: 22.28 + }, { + latitude: 15.18, + longitude: 16.18 + }, { + latitude: 20.28, + longitude: 21.28 + }, { + latitude: 11.18, + longitude: 22.28 + }] + }] + }, { + coordinates: [{ + points: [{ + latitude: 91.11, + longitude: 92.22 + }, { + latitude: 15.15, + longitude: 16.16 + }, { + latitude: 20.20, + longitude: 21.21 + }, { + latitude: 91.11, + longitude: 92.22 + }] + }, { + points: [{ + latitude: 11.18, + longitude: 22.28 + }, { + latitude: 15.18, + longitude: 16.18 + }, { + latitude: 20.28, + longitude: 21.28 + }, { + latitude: 11.18, + longitude: 22.28 + }] + }] + }] + } + } + } + }) { + name + } +``` + +### Union + +Unions can be queried only as a field of a type. Union queries can't be ordered, +but you can filter and paginate them. + + + Union queries do not support the `order` argument. The results will be ordered + by the `uid` of each node in ascending order. + + +For example, the following schema will enable to query the `members` union field +in the `Home` type with filters and pagination. + +```graphql +union HomeMember = Dog | Parrot | Human + +type Home { + id: ID! + address: String + + members(filter: HomeMemberFilter, first: Int, offset: Int): [HomeMember] +} + +# Not specifying a field in the filter input will be considered as a null value for that field. +input HomeMemberFilter { + # `homeMemberTypes` is used to specify which types to report back. + homeMemberTypes: [HomeMemberType] + + # specifying a null value for this field means query all dogs + dogFilter: DogFilter + + # specifying a null value for this field means query all parrots + parrotFilter: ParrotFilter + # note that there is no HumanFilter because the Human type wasn't filterable +} + +enum HomeMemberType { + dog + parrot + human +} + +input DogFilter { + id: [ID!] + category: Category_hash + breed: StringTermFilter + and: DogFilter + or: DogFilter + not: DogFilter +} + +input ParrotFilter { + id: [ID!] + category: Category_hash + and: ParrotFilter + or: ParrotFilter + not: ParrotFilter +} +``` + + + Not specifying any filter at all or specifying any of the `null` values for a + filter will query all members. + + +The same example, but this time with filter and pagination arguments: + +```graphql +query { + queryHome { + address + members( + filter: { + homeMemberTypes: [dog, parrot] # means we don't want to query humans + dogFilter: { + # means in Dogs, we only want to query "German Shepherd" breed + breed: { allofterms: "German Shepherd" } + } + # not specifying any filter for parrots means we want to query all parrots + } + first: 5 + offset: 10 + ) { + ... on Animal { + category + } + ... on Dog { + breed + } + ... on Parrot { + repeatsWords + } + ... on HomeMember { + name + } + } + } +} +``` + +### Vector embedding + +The `@search` directive is used in conjunction with `@embeding` directive to +define the HNSW index on vector embeddings. These vector embeddings are obtained +from external Machine Learning models. + +```graphql +type User { + userID: ID! + name: String! + name_v: [Float!] + @embedding + @search(by: ["hnsw(metric: euclidean, exponent: 4)"]) +} +``` + +In this schema, the field `name_v` is an embedding on which the HNSW algorithm +is used to create a vector search index. + +The metric used to compute the distance between vectors (in this example) is +Euclidean distance. Other possible metrics are `cosine` and `dotproduct`. + +The directive, `@embedding`, designates one or more fields as vector embeddings. + +The `exponent` value is used to set reasonable defaults for HNSW internal tuning +parameters. It is an integer representing an approximate number for the vectors +expected in the index, in terms of power of 10. Default is “4” (10^4 vectors). diff --git a/dgraph/reference/graphql/schema/documentation.mdx b/dgraph/reference/graphql/schema/documentation.mdx new file mode 100644 index 00000000..11491268 --- /dev/null +++ b/dgraph/reference/graphql/schema/documentation.mdx @@ -0,0 +1,75 @@ +--- +title: Documentation and Comments +description: + Dgraph accepts GraphQL documentation comments, which get passed through to the + generated API and shown as documentation in GraphQL tools. +--- + +## Schema Documentation Processed by Generated API + +Dgraph accepts GraphQL documentation comments (e.g. +`""" This is a graphql comment """`), which get passed through to the generated +API and thus shown as documentation in GraphQL tools like GraphiQL, GraphQL +Playground, Insomnia etc. + +## Schema Documentation Ignored by Generated API + +You can also add `# ...` comments where ever you like. These comments are not +passed via the generated API and are not visible in the API docs. + +## Reserved Namespace in Dgraph + +Any comment starting with `# Dgraph.` is **reserved** and **should not be used** +to document your input schema. + +## An Example + +An example that adds comments to a type as well as fields within the type would +be as below. + +```graphql +""" +Author of questions and answers in a website +""" +type Author { + # ... username is the author name , this is an example of a dropped comment + username: String! @id + """ + The questions submitted by this author + """ + questions: [Question] @hasInverse(field: author) + """ + The answers submitted by this author + """ + answers: [Answer] @hasInverse(field: author) +} +``` + +It is also possible to add comments for queries or mutations that have been +added via the custom directive. + +```graphql +type Query { + """ + This query involves a custom directive, and gets top authors. + """ + getTopAuthors(id: ID!): [Author] + @custom( + http: { + url: "http://api.github.com/topAuthors" + method: "POST" + introspectionHeaders: ["Github-Api-Token"] + secretHeaders: ["Authorization:Github-Api-Token"] + } + ) +} +``` + +The screenshots below shows how the documentation appear in a GraphQL API +explorer. + +![Schema Documentation On Types](/images/graphql/authors1.png) + +Schema Documentation on Types + +![Schema Documentation On Custom Directive](/images/graphql/CustomDirectiveDocumentation.png) diff --git a/dgraph/reference/graphql/schema/graph-links.mdx b/dgraph/reference/graphql/schema/graph-links.mdx new file mode 100644 index 00000000..a6720531 --- /dev/null +++ b/dgraph/reference/graphql/schema/graph-links.mdx @@ -0,0 +1,131 @@ +--- +title: Relationships +description: + All the data in your app form a GraphQL data graph. That graph has nodes of + particular types and relationships between the nodes to form the data graph. +--- + +All the data in your app form a GraphQL data graph. That graph has nodes of +particular types and relationships between the nodes to form the data graph. + +Dgraph uses the types and fields in the schema to work out how to link that +graph, what to accept for mutations and what shape responses should take. + +Relationships in that graph are directed: either pointing in one direction or +two. You use the `@hasInverse` directive to tell Dgraph how to handle two-way +relationship. + +### One-way relationship + +If you only ever need to traverse the graph between nodes in a particular +direction, then your schema can simply contain the types and the relationship. + +In this schema, posts have an author - each post in the graph is linked to its +author - but that relationship is one-way. + +```graphql +type Author { + ... +} + +type Post { + ... + author: Author +} +``` + +You'll be able to traverse the graph from a Post to its author, but not able to +traverse from an author to all their posts. Sometimes that's the right choice, +but mostly, you'll want two way relationships. + +Note: Dgraph won't store the reverse direction, so if you change your schema to +include a `@hasInverse`, you'll need to migrate the data to add the reverse +edges. + +### Two-way relationship + +In Dgraph, the directive `@hasInverse` is used to create a two-way relationship. + +```graphql +type Author { + ... + posts: [Post] @hasInverse(field: author) +} + +type Post { + ... + author: Author +} +``` + +With that, `posts` and `author` are just two directions of the same link in the +graph. For example, adding a new post with + +```graphql +mutation { + addPost(input: [ + { ..., author: { username: "diggy" }} + ]) { + ... + } +} +``` + +will automatically add it to Diggy's list of `posts`. Deleting the post will +remove it from Diggy's `posts`. Similarly, using an update mutation on an author +to insert a new post will automatically add Diggy as the author + +```graphql +mutation { + updateAuthor(input: { + filter: { username: { eq: "diggy "}}, + set: { posts: [ {... new post ...}]} + }) { + ... + } +} +``` + +### Many edges + +It's not really possible to auto-detect what a schema designer meant for two-way +edges. There's not even only one possible relationship between two types. +Consider, for example, if an app recorded the posts an `Author` had recently +liked (so it can suggest interesting material) and just a tally of all likes on +a post. + +```graphql +type Author { + ... + posts: [Post] + recentlyLiked: [Post] +} + +type Post { + ... + author: Author + numLikes: Int +} +``` + +It's not possible to detect what is meant here as a one-way edge, or which edges +are linked as a two-way connection. That's why `@hasInverse` is needed - so you +can enforce the semantics your app needs. + +```graphql +type Author { + ... + posts: [Post] @hasInverse(field: author) + recentlyLiked: [Post] +} + +type Post { + ... + author: Author + numLikes: Int +} +``` + +Now, Dgraph will manage the connection between posts and authors and you can get +on with concentrating on what your app needs to to - suggesting them interesting +content. diff --git a/dgraph/reference/graphql/schema/index.mdx b/dgraph/reference/graphql/schema/index.mdx new file mode 100644 index 00000000..8ff74348 --- /dev/null +++ b/dgraph/reference/graphql/schema/index.mdx @@ -0,0 +1,17 @@ +--- +title: Schema +--- + +This section describes all the things you can put in your input GraphQL schema, +and what gets generated from that. + +The process for serving GraphQL with Dgraph is to add a set of GraphQL type +definitions using the `/admin` endpoint. Dgraph takes those definitions, +generates queries and mutations, and serves the generated GraphQL schema. + +The input schema may contain interfaces, types and enums that follow the usual +GraphQL syntax and validation rules. + +If you want to make your schema editing experience nicer, you should use an +editor that does syntax highlighting for GraphQL. With that, you may also want +to include the definitions [here](/graphql/schema/dgraph-schema) as an import. diff --git a/dgraph/reference/graphql/schema/migration.mdx b/dgraph/reference/graphql/schema/migration.mdx new file mode 100644 index 00000000..f29360bf --- /dev/null +++ b/dgraph/reference/graphql/schema/migration.mdx @@ -0,0 +1,230 @@ +--- +title: Schema Migration +description: + This document describes all the things that you need to take care while doing + a schema update or migration. +--- + +In every app's development lifecycle, there's a point where the underlying +schema doesn't fit the requirements and must be changed for good. That requires +a migration for both schema and the underlying data. This article will guide you +through common migration scenarios you can encounter with Dgraph and help you +avoid any pitfalls around them. + +These are the most common scenarios that can occur: + +- Renaming a type +- Renaming a field +- Changing a field's type +- Adding `@id` to an existing field + + + As long as you can avoid migration, avoid it. Because there can be scenarios + where you might need to update downstream clients, which can be hard. So, its + always best to try out things first, once you are confident enough, then only + push them to production. + + +### Renaming a type + +Let's say you had the following schema: + +```graphql +type User { + id: ID! + name: String +} +``` + +and you had your application working fine with it. Now, you feel that the name +`AppUser` would be more sensible than the name `User` because `User` seems a bit +generic to you. Then you are in a situation where you need migration. + +This can be handled in a couple of ways: + +1. Migrate all the data for type `User` to use the new name `AppUser`. OR, +2. Just use the [`@dgraph(type: ...)`](/graphql/dgraph) directive to maintain + backward compatibility with the existing data. + +Depending on your use-case, you might find option 1 or 2 better for you. For +example, if you have accumulated very little data for the `User` type till now, +then you might want to go with option #1. But, if you have an active application +with a very large dataset then updating the node of each user may not be a thing +you might want to commit to, as that can require some maintenance downtime. So, +option #2 could be a better choice in such conditions. + +Option #2 makes your new schema compatible with your existing data. Here's an +example: + +```graphql +type AppUser @dgraph(type: "User") { + id: ID! + name: String +} +``` + +So, no downtime required. Migration is done by just updating your schema. Fast, +easy, and simple. + +Note that, irrespective of what option you choose for migration on Dgraph side, +you will still need to migrate your GraphQL clients to use the new name in +queries/mutations. For example, the query `getUser` would now be renamed to +`getAppUser`. So, your downstream clients need to update that bit in the code. + +### Renaming a field + +Just like renaming a type, let's say you had the following working schema: + +```graphql +type User { + id: ID! + name: String + phone: String +} +``` + +and now you figured that it would be better to call `phone` as `tel`. You need +migration. + +You have the same two choices as before: + +1. Migrate all the data for the field `phone` to use the new name `tel`. OR, +2. Just use the [`@dgraph(pred: ...)`](/graphql/dgraph) directive to maintain + backward compatibility with the existing data. + +Here's an example if you want to go with option #2: + +```graphql +type User { + id: ID! + name: String + tel: String @dgraph(pred: "User.phone") +} +``` + +Again, note that, irrespective of what option you choose for migration on Dgraph +side, you will still need to migrate your GraphQL clients to use the new name in +queries/mutations. For example, the following query: + +```graphql +query { + getUser(id: "0x05") { + name + phone + } +} +``` + +would now have to be changed to: + +```graphql +query { + getUser(id: "0x05") { + name + tel + } +} +``` + +So, your downstream clients need to update that bit in the code. + +### Changing a field's type + +There can be multiple scenarios in this category: + +- List -> Single item +- `String` -> `Int` +- Any other combination you can imagine + +It is strictly advisable that you figure out a solid schema before going in +production, so that you don't have to deal with such cases later. Nevertheless, +if you ended up in such a situation, you have to migrate your data to fit the +new schema. There is no easy way around here. + +An example scenario is, if you initially had this schema: + +```graphql +type Todo { + id: ID! + task: String + owner: Owner +} + +type Owner { + name: String! @id + todo: [Todo] @hasInverse(field: "owner") +} +``` + +and later you decided that you want an owner to have only one todo at a time. +So, you want to make your schema look like this: + +```graphql +type Todo { + id: ID! + task: String + owner: Owner +} + +type Owner { + name: String! @id + todo: Todo @hasInverse(field: "owner") +} +``` + +If you try updating your schema, you may end up getting an error like this: + +```txt +resolving updateGQLSchema failed because succeeded in saving GraphQL schema but failed to alter Dgraph schema - GraphQL layer may exhibit unexpected behavior, reapplying the old GraphQL schema may prevent any issues: Schema change not allowed from [uid] => uid without deleting pred: owner.todo +``` + +That is a red flag. As the error message says, you should revert to the old +schema to make your clients work correctly. In such cases, you should have +migrated your data to fit the new schema _before_ applying the new schema. The +steps for such a data migration varies from case to case, and so can't all be +listed down here, but you need to migrate your data first, is all you need to +keep in mind while making such changes. + +### Adding `@id` to an existing field + +Let's say you had the following schema: + +```graphql +type User { + id: ID! + username: String +} +``` + +and now you think that `username` must be unique for every user. So, you change +the schema to this: + +```graphql +type User { + id: ID! + username: String! @id +} +``` + +Now, here's the catch: with the old schema, it was possible that there could +have existed multiple users with the username `Alice`. If that was true, then +the queries would break in such cases. Like, if you run this query after the +schema change: + +```graphql +query { + getUser(username: "Alice") { + id + } +} +``` + +Then it might error out saying: + +```txt +A list was returned, but GraphQL was expecting just one item. This indicates an internal error - probably a mismatch between the GraphQL and Dgraph/remote schemas. The value was resolved as null (which may trigger GraphQL error propagation) and as much other data as possible returned. +``` + +So, while making such a schema change, you need to make sure that the underlying +data really honors the uniqueness constraint on the username field. If not, you +need to do a data migration to honor such constraints. diff --git a/dgraph/reference/graphql/schema/reserved.mdx b/dgraph/reference/graphql/schema/reserved.mdx new file mode 100644 index 00000000..3ab3e995 --- /dev/null +++ b/dgraph/reference/graphql/schema/reserved.mdx @@ -0,0 +1,57 @@ +--- +title: Reserved Names +description: + This document provides the full list of names that are reserved and can’t be + used to define any other identifiers. +--- + +The following names are reserved and can't be used to define any other +identifiers: + +- `Int` +- `Float` +- `Boolean` +- `String` +- `DateTime` +- `ID` +- `uid` +- `Subscription` +- `as` (case-insensitive) +- `Query` +- `Mutation` +- `Point` +- `PointList` +- `Polygon` +- `MultiPolygon` +- `Aggregate` (as a suffix of any identifier name) + +For each type, Dgraph generates a number of GraphQL types needed to operate the +GraphQL API, these generated type names also can't be present in the input +schema. For example, for a type `Author`, Dgraph generates: + +- `AuthorFilter` +- `AuthorOrderable` +- `AuthorOrder` +- `AuthorRef` +- `AddAuthorInput` +- `UpdateAuthorInput` +- `AuthorPatch` +- `AddAuthorPayload` +- `DeleteAuthorPayload` +- `UpdateAuthorPayload` +- `AuthorAggregateResult` + +**Mutations** + +- `addAuthor` +- `updateAuthor` +- `deleteAuthor` + +**Queries** + +- `getAuthor` +- `queryAuthor` +- `aggregateAuthor` + +Thus if `Author` is present in the input schema, all of those become reserved +type names. diff --git a/dgraph/reference/graphql/schema/types.mdx b/dgraph/reference/graphql/schema/types.mdx new file mode 100644 index 00000000..fb6bb311 --- /dev/null +++ b/dgraph/reference/graphql/schema/types.mdx @@ -0,0 +1,528 @@ +--- +title: Types +description: + How to use GraphQL types to set a GraphQL schema for the Dgraph database. + Includes scalars, enums, types, interfaces, union, password, & geolocation + types. +--- + +This page describes how to use GraphQL types to set the a GraphQL schema for +Dgraph database. + +### Scalars + +Dgraph's GraphQL implementation comes with the standard GraphQL scalar types: +`Int`, `Float`, `String`, `Boolean` and `ID`. There's also an `Int64` scalar, +and a `DateTime` scalar type that is represented as a string in RFC3339 format. + +Scalar types, including `Int`, `Int64`, `Float`, `String` and `DateTime`; can be +used in lists. Lists behave like an unordered set in Dgraph. For example: +`["e1", "e1", "e2"]` may get stored as `["e2", "e1"]`, so duplicate values will +not be stored and order might not be preserved. All scalars may be nullable or +non-nullable. + + + The `Int64` type introduced in release v20.11 represents a signed integer + ranging between `-(2^63)` and `(2^63 -1)`. Signed `Int64` values in this range + will be parsed correctly by Dgraph as long as the client can serialize the + number correctly in JSON. For example, a JavaScript client might need to use a + serialization library such as + [`json-bigint`](https://www.npmjs.com/package/json-bigint) to correctly write + an `Int64` value in JSON. + + +The `ID` type is special. IDs are auto-generated, immutable, and can be treated +as strings. Fields of type `ID` can be listed as nullable in a schema, but +Dgraph will never return null. + +- _Schema rule_: `ID` lists aren't allowed - e.g. `tags: [String]` is valid, but + `ids: [ID]` is not. +- _Schema rule_: Each type you define can have at most one field with type `ID`. + That includes IDs implemented through interfaces. + +It's not possible to define further scalars - you'll receive an error if the +input schema contains the definition of a new scalar. + +For example, the following GraphQL type uses all of the available scalars. + +```graphql +type User { + userID: ID! + name: String! + lastSignIn: DateTime + recentScores: [Float] + reputation: Int + active: Boolean +} +``` + +Scalar lists in Dgraph act more like sets, so `tags: [String]` would always +contain unique tags. Similarly, `recentScores: [Float]` could never contain +duplicate scores. + +### Vectors + +A Float array can be used as a vector using `@embedding` directive. It denotes a +vector of floating point numbers, i.e an ordered array of float32. A type can +contain more than one vector predicate. + +Vectors are normaly used to store embeddings obtained from an ML model. + +When a Float vector is indexed, the GraphQL `querySimilarByEmbedding` +and `querySimilarById` functions can be used for +[similarity search](./vector-similarity.md). + +A simple example of adding a vector embedding on `name` to `User` type is shown +below. + +```graphql +type User { + userID: ID! + name: String! + name_v: [Float!] + @embedding + @search(by: ["hnsw(metric: euclidean, exponent: 4)"]) +} +``` + +In this schema, the field `name_v` is an embedding on which the +[@search ](/graphql/schema/directives/search/#vector-embedding) directive for +vector embeddings is used. + +### The `ID` type + +In Dgraph, every node has a unique 64-bit identifier that you can expose in +GraphQL using the `ID` type. An `ID` is auto-generated, immutable and never +reused. Each type can have at most one `ID` field. + +The `ID` type works great when you need to use an identifier on nodes and don't +need to set that identifier externally (for example, posts and comments). + +For example, you might set the following type in a schema: + +```graphql +type Post { + id: ID! + ... +} +``` + +In a single-page app, you could generate the page for `http://.../posts/0x123` +when a user clicks to view the post with `ID` 0x123. Your app can then use a +`getPost(id: "0x123") { ... }` GraphQL query to fetch the data used to generate +the page. + +For input and output, `ID`s are treated as strings. + +You can also update and delete posts by `ID`. + +### Enums + +You can define enums in your input schema. For example: + +```graphql +enum Tag { + GraphQL + Database + Question + ... +} + +type Post { + ... + tags: [Tag!]! +} +``` + +### Types + +From the built-in scalars and the enums you add, you can generate types in the +usual way for GraphQL. For example: + +```graphql +enum Tag { + GraphQL + Database + Dgraph +} + +type Post { + id: ID! + title: String! + text: String + datePublished: DateTime + tags: [Tag!]! + author: Author! +} + +type Author { + id: ID! + name: String! + posts: [Post!] + friends: [Author] +} +``` + +- _Schema rule_: Lists of lists aren't accepted. For example: + `multiTags: [[Tag!]]` isn't valid. +- _Schema rule_: Fields with arguments are not accepted in the input schema + unless the field is implemented using the `@custom` directive. + +### Interfaces + +GraphQL interfaces allow you to define a generic pattern that multiple types +follow. When a type implements an interface, that means it has all fields of the +interface and some extras. + +According to GraphQL specifications, you can have the same fields in +implementing types as the interface. In such cases, the GraphQL layer will +generate the correct Dgraph schema without duplicate fields. + +If you repeat a field name in a type, it must be of the same type (including +list or scalar types), and it must have the same nullable condition as the +interface's field. Note that if the interface's field has a directive like +`@search` then it will be inherited by the implementing type's field. + +For example: + +```graphql +interface Fruit { + id: ID! + price: Int! +} + +type Apple implements Fruit { + id: ID! + price: Int! + color: String! +} + +type Banana implements Fruit { + id: ID! + price: Int! +} +``` + + + GraphQL will generate the correct Dgraph schema where fields occur only once. + + +The following example defines the schema for posts with comment threads. As +mentioned, Dgraph will fill in the `Question` and `Comment` types to make the +full GraphQL types. + +```graphql +interface Post { + id: ID! + text: String + datePublished: DateTime +} + +type Question implements Post { + title: String! +} +type Comment implements Post { + commentsOn: Post! +} +``` + +The generated schema will contain the full types, for example, `Question` and +`Comment` get expanded as: + +```graphql +type Question implements Post { + id: ID! + text: String + datePublished: DateTime + title: String! +} + +type Comment implements Post { + id: ID! + text: String + datePublished: DateTime + commentsOn: Post! +} +``` + + + If you have a type that implements two interfaces, Dgraph won't allow a field + of the same name in both interfaces, except for the `ID` field. + + +Dgraph currently allows this behavior for `ID` type fields since the `ID` type +field is not a predicate. Note that in both interfaces and the implementing +type, the nullable condition and type (list or scalar) for the `ID` field should +be the same. For example: + +```graphql +interface Shape { + id: ID! + shape: String! +} + +interface Color { + id: ID! + color: String! +} + +type Figure implements Shape & Color { + id: ID! + shape: String! + color: String! + size: Int! +} +``` + +### Union type + +GraphQL Unions represent an object that could be one of a list of GraphQL Object +types, but provides for no guaranteed fields between those types. So no fields +may be queried on this type without the use of type refining fragments or inline +fragments. + +Union types have the potential to be invalid if incorrectly defined: + +- A `Union` type must include one or more unique member types. +- The member types of a `Union` type must all be Object base types; + [Scalar](#scalars), [Interface](#interfaces) and `Union` types must not be + member types of a Union. Similarly, wrapping types must not be member types of + a Union. + +For example, the following defines the `HomeMember` union type: + +```graphql +enum Category { + Fish + Amphibian + Reptile + Bird + Mammal + InVertebrate +} + +interface Animal { + id: ID! + category: Category @search +} + +type Dog implements Animal { + breed: String @search +} + +type Parrot implements Animal { + repeatsWords: [String] +} + +type Cheetah implements Animal { + speed: Float +} + +type Human { + name: String! + pets: [Animal!]! +} + +union HomeMember = Dog | Parrot | Human + +type Zoo { + id: ID! + animals: [Animal] + city: String +} + +type Home { + id: ID! + address: String + members: [HomeMember] +} +``` + +So, when you want to query members in a `Home`, you will be able to do a GraphQL +query like this: + +```graphql +query { + queryHome { + address + members { + ... on Animal { + category + } + ... on Dog { + breed + } + ... on Parrot { + repeatsWords + } + ... on Human { + name + } + } + } +} +``` + +And the results of the GraphQL query will look like the following: + +```json +{ + "data": { + "queryHome": { + "address": "Earth", + "members": [ + { + "category": "Mammal", + "breed": "German Shepherd" + }, + { + "category": "Bird", + "repeatsWords": ["Good Morning!", "I am a GraphQL parrot"] + }, + { + "name": "Alice" + } + ] + } + } +} +``` + +### Password type + +A password for an entity is set with setting the schema for the node type with +`@secret` directive. Passwords cannot be queried directly, only checked for a +match using the `checkTypePassword` function where `Type` is the node type. The +passwords are encrypted using [Bcrypt](https://en.wikipedia.org/wiki/Bcrypt). + + + For security reasons, Dgraph enforces a minimum password length of 6 + characters on `@secret` fields. + + +For example, to set a password, first set schema: + +1. Cut-and-paste the following schema into a file called `schema.graphql` + + ```graphql + type Author @secret(field: "pwd") { + name: String! @id + } + ``` + +2. Run the following curl request: + + ```bash + curl -X POST localhost:8080/admin/schema --data-binary '@schema.graphql' + ``` + +3. Set the password by pointing to the `graphql` endpoint + (http://localhost:8080/graphql): + ```graphql + mutation { + addAuthor(input: [{ name: "myname", pwd: "mypassword" }]) { + author { + name + } + } + } + ``` + +The output should look like: + +```json +{ + "data": { + "addAuthor": { + "author": [ + { + "name": "myname" + } + ] + } + } +} +``` + +You can check a password: + +```graphql +query { + checkAuthorPassword(name: "myname", pwd: "mypassword") { + name + } +} +``` + +output: + +```json +{ + "data": { + "checkAuthorPassword": { + "name": "myname" + } + } +} +``` + +If the password is wrong you will get the following response: + +```json +{ + "data": { + "checkAuthorPassword": null + } +} +``` + +### Geolocation types + +Dgraph GraphQL comes with built-in types to store Geolocation data. Currently, +it supports `Point`, `Polygon` and `MultiPolygon`. These types are useful in +scenarios like storing a location's GPS coordinates, representing a city on the +map, etc. + +For example: + +```graphql +type Hotel { + id: ID! + name: String! + location: Point + area: Polygon +} +``` + +#### Point + +```graphql +type Point { + longitude: Float! + latitude: Float! +} +``` + +#### PointList + +```graphql +type PointList { + points: [Point!]! +} +``` + +#### Polygon + +```graphql +type Polygon { + coordinates: [PointList!]! +} +``` + +#### MultiPolygon + +```graphql +type MultiPolygon { + polygons: [Polygon!]! +} +``` diff --git a/dgraph/reference/graphql/security/RBAC-rules.mdx b/dgraph/reference/graphql/security/RBAC-rules.mdx new file mode 100644 index 00000000..80d36832 --- /dev/null +++ b/dgraph/reference/graphql/security/RBAC-rules.mdx @@ -0,0 +1,146 @@ +--- +title: RBAC rules +description: + Dgraph support Role Based Access Control (RBAC) on GraphQL API operations. +--- + +Dgraph support Role Based Access Control (RBAC) on GraphQL API operations: you +can specify who can invoke query, add, update and delete operations on each type +of your GraphQL schema based on JWT claims, using the `@auth` directive. + +To implement Role Based Access Control on GraphQL API operations : + +1. Ensure your have configured the GraphQL schema to + [Handle JWT tokens](./jwt.md) using `# Dgraph.Authorization` This step is + important to be able to use the + [JWT claims](./graphql/security/_index.md#jwt-claims) +2. Annotate the Types in the GraphQL schema with the `@auth` directive and + specify conditions to be met for `query`, `add`, `update` or `delete` + operations. +3. Deploy the GraphQL schema either with a + [schema update](./graphql/admin.md#using-updategqlschema-to-add-or-modify-a-schema) + or via the Cloud console's [Schema](https://cloud.dgraph.io/_/schema) page. + +The generic format of RBAC rule is as follow + +```graphql +type User @auth( + query: { rule: "{$: { eq: \"\" } }" }, + add: { rule: "{$: { in: [\"\",...] } }" }, + update: ... + delete: ... +) +``` + +RBAC rule supports `eq` or `in` functions to test the value of a +[JWT claim](./graphql/security/_index.md#jwt-claims) from the JWT token payload. + +The claim value may be a string or array of strings. + +For example the following schema has a @auth directive specifying that a delete +operation on a User object can only be done if the connected user has a 'ROLE' +claim in the JWT token with the value "admin" : + +```graphql +type User @auth(delete: { rule: "{$ROLE: { eq: \"admin\" } }" }) { + username: String! @id + todos: [Todo] +} +``` + +The following JWT token payload will pass the test (provided that +Dgraph.Authorization is configured correctly with the right namespace) + +```json +{ + "aud": "dgraph", + "exp": 1695359621, + "https://dgraph.io/jwt/claims": { + "ROLE": "admin", + "USERID": "testuser@dgraph.io" + }, + "iat": 1695359591, + ... +} +``` + +The rule is also working with an array of roles in the JWT token: + +```json +{ + "aud": "dgraph", + "exp": 1695359621, + "https://dgraph.io/jwt/claims": { + "ROLE": ["admin","user"] + "USERID": "testuser@dgraph.io" + }, + "iat": 1695359591, + ... +} +``` + +In the case of an array used with the "in" function, the rule is valid is at +least one of the claim value is "in" the provided list. + +For example, with the following rule, the previous token will be valid because +one of the ROLE is in the authorized roles. + +```graphql +type User + @auth(delete: { rule: "{$ROLE: { in: [\"admin\",\"superadmin\"] } }" }) { + username: String! @id + todos: [Todo] +} +``` + +## rules combination + +Rules can be combined with the logical connectives `and`, `or` and `not`. A +permission can be a mixture of graph traversals and role based rules. + +In the todo app, you can express, for example, that you can delete a `Todo` if +you are the author, or are the site admin. + +```graphql +type Todo + @auth( + delete: { + or: [ + { rule: "query ($USER: String!) { ... }" } # you are the author graph query + { rule: "{$ROLE: { eq: \"ADMIN\" } }" } + ] + } + ) +``` + +## claims + +Rules may use claims from the namespace specified by the +[# Dgraph.Authorization](./jwt.md) or claims present at the root level of the +JWT payload. + +For example, given the following JWT payload + +```json +{ + "https://xyz.io/jwt/claims": [ + "ROLE": "ADMIN" + ], + "email": "random@example.com" +} +``` + +If `https://xyz.io/jwt/claims` is declared as the namespace to use, the +authorization rules can use `$ROLE` but also `$email`. + +In cases where the same claim is present in the namespace and at the root level, +the claim value in the namespace takes precedence. + +## `@auth` on Interfaces + +The rules provided inside the `@auth` directive on an interface will be applied +as an `AND` rule to those on the implementing types. + +A type inherits the `@auth` rules of all the implemented interfaces. The final +authorization rule is an `AND` of the type's `@auth` rule and of all the +implemented interfaces. diff --git a/dgraph/reference/graphql/security/anonymous-access.mdx b/dgraph/reference/graphql/security/anonymous-access.mdx new file mode 100644 index 00000000..fcba034c --- /dev/null +++ b/dgraph/reference/graphql/security/anonymous-access.mdx @@ -0,0 +1,83 @@ +--- +title: Anonymous Access +description: + Controlling **anonymous access** of the GraphQL endpoint is only available in + Dgraph Cloud. +--- + + + Controlling **anonymous access** of the GraphQL endpoint is only available in + Dgraph Cloud. + + +## Turn Anonymous Access On and Off + +To turn `/graphql` endpoint anonymous access off + +1. Go to the [Schema](https://cloud.dgraph.io/_/schema) section of Dgraph Cloud + console. +2. Open the [Access](https://cloud.dgraph.io/_/schema?tab=anon-access) tab. +3. Set `Anonymous Access` toggle to `On` or `Off` + +With **Anonymous Access** turned `off`, any client accessing the `/graphql` +endpoint must pass a valid client or admin +[API Key](./cloud/admin/authentication) in `DG-Auth` or `X-Auth-Token` header. + +With **Anonymous Access** turned `on` (Default configuration), you need to +further define the permission per `type` defined in your GraphQL Schema. + +## Edit GraphQL type operations access permissions + +have a button to "Edit Permissions" When **Anonymous Access** is `on`, any newly +deployed type will have `read` and `write` permissions for anonymous users. + +To control the anonymous access to operations : + +1. Open the [Access](https://cloud.dgraph.io/_/schema?tab=anon-access) tab in + the [Schema](https://cloud.dgraph.io/_/schema) section. +2. Click on `Edit Permission` +3. For every Type defined in your GraphQL schema, Edit Permissions will show + check boxes to enable Anonymous Access to Read and Write. -- Check `Read` to + allow anonymous clients to access the `get` and `query` + operations. -- Check `Write` to allow anonymous clients to access the + `add`, `update`, and `delete` operations. + + + Anonymous Access works as an access control security one level above the [RBAC + (Role Based Access Control)](./auth). + + +Permission settings only applies to the parent type operations: it is still +possible to read/write data of a type that has been set with no read/write +permissions if a `parent` type is granted read/write access to anonymous +clients. + +Consider the following Schema: + +```graphql +type User { + id: ID + name: String! + posts: [Post] @hasInverse(field: "author") +} +type Post { + id: ID + title: String! + author: User +} +``` + +If the Anonymous Access was granted Read and Write for Post but not granted Read +and Write for User, it would be possible still to perform the following +operation which creates a new `User`. + +```graphql +mutation addPost { + addPost(input: [{ + title: "New Post Title" @search(by: [hash]) + author: { name: "New User Name" } # creates a new User node. + }]) { + numUids + } +} +``` diff --git a/dgraph/reference/graphql/security/auth-tips.mdx b/dgraph/reference/graphql/security/auth-tips.mdx new file mode 100644 index 00000000..c8775280 --- /dev/null +++ b/dgraph/reference/graphql/security/auth-tips.mdx @@ -0,0 +1,91 @@ +--- +title: Authorization tips +description: + Given an authentication mechanism and a signed JSON Web Token (JWT), the + `@auth` directive tells Dgraph how to apply authorization. +--- + +## Public Data + +Many apps have data that can be accessed by anyone, logged in or not. That also +works nicely with Dgraph auth rules. + +For example, in Twitter, StackOverflow, etc. you can see authors and posts +without being signed it - but you'd need to be signed in to add a post. With +Dgraph auth rules, if a type doesn't have, for example, a `query` auth rule or +the auth rule doesn't depend on a JWT value, then the data can be accessed +without a signed JWT. + +For example, the todo app might allow anyone, logged in or not, to view any +author, but not make any mutations unless logged in as the author or an admin. +That would be achieved by rules like the following. + +```graphql +type User @auth( + # no query rule + add: { rule: "{$ROLE: { eq: \"ADMIN\" } }" }, + update: ... + delete: ... +) { + username: String! @id + todos: [Todo] +} +``` + +Maybe some todos can be marked as public and users you aren't logged in can see +those. + +```graphql +type Todo @auth( + query: { or: [ + # you are the author + { rule: ... }, + # or, the todo is marked as public + { rule: """query { + queryTodo(filter: { isPublic: { eq: true } } ) { + id + } + }"""} + ]} +) { + ... + isPublic: Boolean +} + +``` + +Because the rule doesn't depend on a JWT value, it can be successfully evaluated +for users who aren't logged in. + +Ensuring that requests are from an authenticated JWT, and no further +restrictions, can be done by arranging the JWT to contain a value like +`"isAuthenticated": "true"`. For example, + +```graphql +type User @auth(query: { rule: "{$isAuthenticated: { eq: \"true\" } }" }) { + username: String! @id + todos: [Todo] +} +``` + +specifies that only authenticated users can query other users. + +### blocking an operation of everyone + +If the `ROLE` claim isn't present in a JWT, any rule that relies on `ROLE` +simply evaluates to false. + +You can also simply disallow some queries and mutations by using a condition on +a non-existing claim: + +If you know that your JWTs never contain the claim `DENIED`, then a rule such as + +```graphql +type User @auth( + delete: { rule: "{$DENIED: { eq: \"DENIED\" } }"} +) { + ... +} +``` + +will block the delete operation for everyone. diff --git a/dgraph/reference/graphql/security/cors.mdx b/dgraph/reference/graphql/security/cors.mdx new file mode 100644 index 00000000..5b887a90 --- /dev/null +++ b/dgraph/reference/graphql/security/cors.mdx @@ -0,0 +1,28 @@ +--- +title: Restrict origins +--- + +To restrict origins of HTTP requests: + +1. Add lines starting with `# Dgraph.Allow-Origin` at the end of your GraphQL + schema specifying the origins allowed. +2. Deploy the GraphQL schema either with a + [schema update](./graphql/admin.md#using-updategqlschema-to-add-or-modify-a-schema) + or via the Cloud console's [Schema](https://cloud.dgraph.io/_/schema) page. + +For example, the following will restrict all origins except the ones specified. + +``` +# Dgraph.Allow-Origin "https://example.com" +# Dgraph.Allow-Origin "https://www.example.com" +``` + +`https://cloud.dgraph.io` is always allowed so that `API explorer`, in Dgraph +Cloud console, continues to work. + +CORS restrictions only apply to browsers. + + + By default, `/graphql` endpoint does not limit the request origin + (`Access-Control-Allow-Origin: *`). + diff --git a/dgraph/reference/graphql/security/graphtraversal-rules.mdx b/dgraph/reference/graphql/security/graphtraversal-rules.mdx new file mode 100644 index 00000000..5aa5295d --- /dev/null +++ b/dgraph/reference/graphql/security/graphtraversal-rules.mdx @@ -0,0 +1,191 @@ +--- +title: ABAC rules +description: + "Dgraph support Attribute Based Access Control (ABAC) on GraphQL API + operations: you can specify which data a user can query, add, update or delete + for each type of your GraphQL schema based on JWT claims, using the `@auth` + directive and graph traversal queries" +--- + +Dgraph support Attribute Based Access Control (ABAC) on GraphQL API operations: +you can specify which data a user can query, add, update or delete for each type +of your GraphQL schema based on JWT claims, using the `@auth` directive and +graph traversal queries. + +To implement graph traversal rule on GraphQL API operations : + +1. Ensure your have configured the GraphQL schema to + [Handle JWT tokens](./jwt.md) using `# Dgraph.Authorization` This step is + important to be able to use the + [JWT claims](./graphql/security/_index.md#jwt-claims) +2. Annotate the Types in the GraphQL schema with the `@auth` directive and + specify conditions to be met for `query`, `add`, `update` or `delete` + operations. +3. Deploy the GraphQL schema either with a + [schema update](./graphql/admin.md#using-updategqlschema-to-add-or-modify-a-schema) + or via the Cloud console's [Schema](https://cloud.dgraph.io/_/schema) page. + +A graph traversal rule is expressed as GraphQL query on the type on which the +@auth directive applies. + +For example, a rule on `Contact` type can only use a `queryContact` query : + +```graphql +type Contact @auth( + query: { rule: "query { queryContact(filter: { isPublic: true }) { id } }" }, + add: ... + update: ... + delete: ... +) { + + ... +} +``` + +You can use triple quotation marks. In that case the query can be defined on +multiple lines. + +The following schema is also valid: + +```graphql +type Contact @auth( + query: { rule: """query { + queryContact(filter: { isPublic: true }) { + id + } + } """ +}) { + + ... +} +``` + +The rules are expressed as GraphQL queries, so they can also have a name and +parameters: + +```graphql +type Todo + @auth( + query: { + rule: """ + query ($USER: String!) { + queryTodo(filter: { owner: { eq: $USER } } ) { + id + } + } + """ + } + ) { + id: ID! + text: String! @search(by: [term]) + owner: String! @search(by: [hash]) +} +``` + +The parameters are replaced at runtime by the corresponding `claims` found in +the JWT token. In the previous case, the query will be executed with the value +of the `USER` claim. + +When a user sends a request on `/graphql` endpoint for a `get` or +`query` operation, Dgraph executes the query specified in the @auth +directive of the `Type` to build a list of "authorized" UIDs. Dgraph returns +only the data matching both the requested data and the "authorized" list. That +means that the client can apply any filter condition, the result will be the +intersection of the data matching the filter and the "authorized" data. + +The same logic applies for `update` and `delete`: only the data +matching the @auth query are affected. + +```graphql +type Todo + @auth( + delete: { + or: [ + { + rule: """ + query ($USER: String!) { + queryTodo(filter: { owner: { eq: $USER } } ) { + __typename + } + } + """ + } # you are the author graph query + { rule: "{$ROLE: { eq: \"ADMIN\" } }" } + ] + } + ) +``` + +In the context of @auth directive, Dgraph executes the @auth query differently +that a normal query : if the query has nested blocks, all levels must match +existing data. Dgraph internally applies a `@cascade` directive, making the +directive more like a **pattern matching** condition. + +For example, in the cases of `Todo`, the access will depend not on a value in +the todo, but on checking which owner it's linked to. This means our auth rule +must make a step further into the graph to check who the owner is : + +```graphql +type User { + username: String! @id + todos: [Todo] +} + +type Todo + @auth( + query: { + rule: """ + query ($USER: String!) { + queryTodo { + owner(filter: { username: { eq: $USER } } ) { + __typename + } + } + } + """ + } + ) { + id: ID! + text: String! + owner: User +} +``` + +The @auth query rule will only return `Todos` having an owner matching the +condition: the owner `username` must be equal the the JWT claim `USER`. + +All blocks must return some data for the query to succeed. You may want to use +the field `__typename` in the most inner block to ensure a data match at this +level. + +### rules combination + +Rules can be combined with the logical connectives `and`, `or` and `not`. A +permission can be a mixture of graph traversals and role based rules. + +### `@auth` on Interfaces + +The rules provided inside the `@auth` directive on an interface will be applied +as an `AND` rule to those on the implementing types. + +A type inherits the `@auth` rules of all the implemented interfaces. The final +authorization rule is an `AND` of the type's `@auth` rule and of all the +implemented interfaces. + +### claims + +Rules may use claims from the namespace specified by the +[# Dgraph.Authorization](./jwt.md) or claims present at the root level of the +JWT payload. + +### error handling + +When deploying the schema, Dgraph tests if you are using valid queries in your +@auth directive. + +For example, using `queryFilm` for a rule on a type `Actor` will lead to an +error: + +``` +resolving updateGQLSchema failed because Type Actor: @auth: expected only queryActor rules,but found queryFilm +``` diff --git a/dgraph/reference/graphql/security/index.mdx b/dgraph/reference/graphql/security/index.mdx new file mode 100644 index 00000000..854e4738 --- /dev/null +++ b/dgraph/reference/graphql/security/index.mdx @@ -0,0 +1,109 @@ +--- +title: Security +description: + Dgraph's GraphQL implementation comes with built-in authorization, and + supports various authentication methods, so you can annotate your schema with + rules that determine who can access or mutate the data +--- + +When you deploy a GraphQL schema, Dgraph automatically generates the query and +mutation operations for each type and exposes them as a GraphQL API on the +`/graphql` endpoint. + +Dgraph's GraphQL authorization features let you specify : + +- if the client requires an API key or not if **anonymous access** is allowed to + invoke a specific operation of the API. +- if a client must present an identity in the form of a **JWT token** to use the + API. +- **RBAC rules** (Role Based Access Control) at operation level based on the + claims included in the client JWT token. +- **ABAC rules** (Attribute Based Access COntrol) at data level using graph + traversal queries. + + + By default all operations are accessible to anonymous clients, no JWT token is + required and no authorization rules are applied. It is your responsibility to + correctly configure the authorization for the `/graphql` endpoint. + + +Refer to the following documentation to set your `/graphql` endpoint security : + +- [Configure anonymous access](./anonymous-access) + +- [Handle JWT token](./jwt) + +- [RBAC rules](./RBAC-rules.md) + +- [ABAC rules](./graphtraversal-rules.md) + +### `/graphql` security flow + +In summary, the Dgraph security flow on `/graphql` endpoint is as follow: + +![graphql endpoint security](/images/graphql/RBAC.jpeg) + +### CORS + +Additionally, you can [restrict the origins](./cors.md) that `/graphql` endpoint +responds to. + +This is a best practice to prevent XSS exploits. + +## Authentication + +Dgraph's GraphQL authorization relies on the presence of a valid JWT token in +the request. + +Dgraph supports both symmetric (HS256) and asymmetric (RS256) encryption and +accepts JSON Web Key (JWK) URL or signed JSON Web Token (JWT). + +You can use any authentication method that is capable of generating such JWT +token (Auth0, Cognito, Firebase, etc...) including Dgraph login mechanism. + +### ACL + +Note that another token may be needed to access the system if ACL security is +also enabled. See the [ACLs](./access-control-lists.md) section for details. The +ACLs are a separate security mechanism. + +### JWT Claims + +In JSON web tokens (JWTs) (https://www.rfc-editor.org/rfc/rfc7519) , a claim +appears as a name/value pair. + +When we talk about a claim in the context of a JWT, we are referring to the name +(or key). For example, the following JSON object contains three claims `sub`, +`name` and `admin`: + +```json +{ + "sub": "1234567890", + "name": "John Doe", + "admin": true +} +``` + +So that different organizations can specify different claims without +conflicting, claims typically have a namespace, and it's a good practice to +specify the namespace of your claims. put specific claims in a nested structure +called a namespace. + +``` +{ + "https://mycompany.org/jwt/claims": { + "username": "auth0|63fe77f32cef38f4fa3dab34", + "role": "Admin" + }, + "name": "raph@dgraph.io", + "email": "raph@dgraph.io", + "email_verified": false, + "iss": "https://dev-5q3n8cc7nckhu5w8.us.auth0.com/", + "aud": "aqk1CSVtliyoXUfLaaLKSKUtkaIel6Vd", + "iat": 1677705681, + "exp": 1677741681 +} +``` + +This json is a JWT token payload containing a namespace +`https://mycompany.org/jwt/claims` having a `username` claim and a `role` claim. diff --git a/dgraph/reference/graphql/security/jwt.mdx b/dgraph/reference/graphql/security/jwt.mdx new file mode 100644 index 00000000..1dc1ec06 --- /dev/null +++ b/dgraph/reference/graphql/security/jwt.mdx @@ -0,0 +1,215 @@ +--- +title: Handle JWT Token +--- + +When deploying a GraphQL schema, the admin user can set a +`# Dgraph.Authorization` line at the bottom of the schema to specify how JWT +tokens present in the HTTP header requests are extracted, validated and used. + +This line must start with the exact string `# Dgraph.Authorization` and be at +the bottom of the schema file. + +## Configure JWT token handling + +To configure how Dgraph should handle JWT token for `/graphql` endpoint : + +1. Add a line starting with `# Dgraph.Authorization` and with the following + parameters at the very end of your GraphQL schema. + The `Dgraph.Authorization` object uses the following syntax: + +``` +# Dgraph.Authorization {"VerificationKey":"","Header":"X-My-App-Auth","Namespace":"https://my.app.io/jwt/claims","Algo":"HS256","Audience":["aud1"],"ClosedByDefault":true} +``` + +Dgraph.Authorization object contains the following parameters: + +- `Header` name of the header field used by the client to send the token. + + + Do not use `Dg-Auth`, `X-Auth-Token` or `Authorization` headers which are + used by Dgraph for other purposes. + + +- `Namespace` is the key inside the JWT that contains the claims relevant to + Dgraph authorization. +- `Algo` is the JWT verification algorithm which can be either `HS256` or + `RS256`. +- `VerificationKey` is the string value of the key, with newlines replaced with + `\n` and the key string wrapped in `""`: + - **For asymmetric encryption**: `VerificationKey` contains the public key + string. + - **For symmetric (secret-based) encryption**: `VerificationKey` is the secret + key. +- `JWKURL`/`JWKURLs` is the URL for the JSON Web Key sets. If you want to pass + multiple URLs, use `JWKURLs` as an array of multiple JWK URLs for the JSON Web + Key sets. You can only use one authentication connection method, either JWT + (`Header`), a single JWK URL, or multiple JWK URLs. +- `Audience` is used to verify the `aud` field of a JWT, which is used by + certain providers to indicate the intended audience for the JWT. When doing + authentication with `JWKURL`, this field is mandatory. +- `ClosedByDefault`, if set to `true`, requires authorization for all requests + even if the GraphQL type does not specify rules. If omitted, the default + setting is `false`. + +2. Deploy the GraphQL schema either with a + [schema update](./graphql/admin.md#using-updategqlschema-to-add-or-modify-a-schema) + or via the Cloud console's [Schema](https://cloud.dgraph.io/_/schema) page. + +When the `# Dgraph.Authorization` line is present in the GraphQL schema, Dgraph +will use the settings in that line to + +- read the specified header in each HTTP request sent on the /graphql endpoint, +- decode that header as a JWT token using the specified algorithm (Algo) +- validate the token signature and the audience +- extract the JWT claims present in the specified namespace and at the root + level + +These claims will then be accessible to any @auth schema directives (a GraphQL +schema directive specific to Dgraph) that are associated with GraphQL types in +the schema file. + +See the [RBAC rules](./RBAC-rules.md) and [Graph traversal +rules](./graphtraversal-rules.md for details on how to restrict data access +using the @auth directive on a per-type basis. + +### Require JWT token + +To not only accept but to require the JWT token regardless of @auth directives +in your GraphQL schema, set option "ClosedByDefault" to true in the +`# Dgraph.Authorization` line. + +## Working with Authentication providers + +Dgraph.Authorization is fully configurable to work with various authentication +providers. Authentication providers have options to configure how to generate +JWT tokens. + +Here are some configuration examples. + +### Clerk.com + +In your clerk dashboard, Access `JWT Templates` and create a template for +Dgraph. + +Your template must have an `aud` (audience), this is mandatory for Dgraph when +the token is verified using JWKURL. + +Decide on a claim namespace and add the information you want to use in your RBAC +rules. + +We are using 'https://dgraph.io/jwt/claims' namespace in this example and have +decided to get the user current organization, role ( clerk has currently two +roles 'admin' and 'basic_member') and email. + +This is our JWT Template in Clerk: + +```json +{ + "aud": "dgraph", + "https://dgraph.io/jwt/claims": { + "org": "{{org.name}}", + "role": "{{org.role}}", + "userid": "{{user.primary_email_address}}" + } +} +``` + +In the same configuration panel + +- set the **token lifetime** +- copy the **JWKS Endpoint** + +Configure your Dgraph GraphQL schema with the following authorization + +``` +# Dgraph.Authorization {"header":"X-Dgraph-AuthToken","namespace":"https://dgraph.io/jwt/claims","jwkurl":"https://<>.clerk.accounts.dev/.well-known/jwks.json","audience":["dgraph"],"closedbydefault":true} +``` + +Note that + +- **namespace** matches the namespace used in the JWT Template +- **audience** is an array and contains the **aud** used in the JWT token +- **jwkurl** is the **JWKS Endpoint** from Clerk + +You can select the header to receive the JWT token from your client app, +`X-Dgraph-AuthToken` is a header authorized by default by Dgraph GraphQL API to +pass CORS requirements. + +## Other Dgraph.Authorization Examples + +To use a single JWK URL: + +``` +# Dgraph.Authorization {"VerificationKey":"","Header":"X-My-App-Auth", "jwkurl":"https://www.googleapis.com/service_accounts/v1/jwk/securetoken@system.gserviceaccount.com", "Namespace":"https://xyz.io/jwt/claims","Algo":"","Audience":["fir-project1-259e7", "HhaXkQVRBn5e0K3DmMp2zbjI8i1wcv2e"]} +``` + +To use multiple JWK URL: + +``` +# Dgraph.Authorization {"VerificationKey":"","Header":"X-My-App-Auth","jwkurls":["https://www.googleapis.com/service_accounts/v1/jwk/securetoken@system.gserviceaccount.com","https://dev-hr2kugfp.us.auth0.com/.well-known/jwks.json"], "Namespace":"https://xyz.io/jwt/claims","Algo":"","Audience":["fir-project1-259e7", "HhaXkQVRBn5e0K3DmMp2zbjI8i1wcv2e"]} +``` + +Using HMAC-SHA256 token in `X-My-App-Auth` header and authorization claims in +`https://my.app.io/jwt/claims` namespace: + +``` +# Dgraph.Authorization {"VerificationKey":"secretkey","Header":"X-My-App-Auth","Namespace":"https://my.app.io/jwt/claims","Algo":"HS256"} +``` + +Using HMAC-SHA256 token in `X-My-App-Auth` header and authorization claims in +`https://my.app.io/jwt/claims` namespace: + +``` +# Dgraph.Authorization {"VerificationKey":"-----BEGIN PUBLIC KEY-----\n...\n-----END PUBLIC KEY-----","Header":"X-My-App-Auth","Namespace":"https://my.app.io/jwt/claims","Algo":"RS256"} +``` + +### JWT format + +The value of the JWT `header` is expected to be in one of the following forms: + +- Bare token. + For example: + + ``` + eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyLCJodHRwczovL215LmFwcC5pby9qd3QvY2xhaW1zIjp7fX0.Pjlxpf-3FhH61EtHBRo2g1amQPRi0pNwoLUooGbxIho + ``` + +- A Bearer token, e.g., a JWT prepended with `Bearer ` prefix (including + space). + For example: + ``` + Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyLCJodHRwczovL215LmFwcC5pby9qd3QvY2xhaW1zIjp7fX0.Pjlxpf-3FhH61EtHBRo2g1amQPRi0pNwoLUooGbxIho + ``` + +### Error handling + +If ClosedByDefault is set to true, and the JWT is not present or if the JWT +token does not include the proper audience information, or is not properly +encoded, or is expired, Dgraph replies to requests on `/graphql` endpoint with +an error message rejecting the operation similar to: + +``` +{ + "errors": [ + { + "message": "couldn't rewrite query queryContact because a valid JWT is required but was not provided", + "path": [ + "queryContact" + ] + } + ], + "data": { + "queryContact": [] + },... +``` + +**Error messages** + +- "couldn't rewrite query queryContact because a valid JWT is required but was + not provided" +- "couldn't rewrite query queryMessage because unable to parse jwt token:token + is expired by 5h49m46.236018623s" +- "couldn't rewrite query queryMessage because JWT `aud` value doesn't match + with the audience" +- "couldn't rewrite query queryMessage because unable to parse jwt token:token + signature is invalid" diff --git a/dgraph/reference/graphql/security/mutations.mdx b/dgraph/reference/graphql/security/mutations.mdx new file mode 100644 index 00000000..6e200a91 --- /dev/null +++ b/dgraph/reference/graphql/security/mutations.mdx @@ -0,0 +1,143 @@ +--- +title: Mutations and GraphQL Authorization +description: + Mutations with authorization work like queries. But because mutations involve + a state change in the database, it's important to understand when the + authorization rules are applied and what they mean. +--- + +Mutations with authorization work like queries. But because mutations involve a +state change in the database, it's important to understand when the +authorization rules are applied and what they mean. + +## Add + +Rules for `add` authorization state that the rule must hold of nodes created by +the mutation data once committed to the database. + +For example, a rule such as the following: + +```graphql +type Todo + @auth( + add: { + rule: """ + query ($USER: String!) { + queryTodo { + owner(filter: { username: { eq: $USER } } ) { + username + } + } + } + """ + } + ) { + id: ID! + text: String! + owner: User +} +type User { + username: String! @id + todos: [Todo] +} +``` + +... states that if you add a new to-do list item, then that new to-do must +satisfy the `add` rule, in this case saying that you can only add to-do list +items with yourself as the author. + +## Delete + +Delete rules filter the nodes that can be deleted. A user can only ever delete a +subset of the nodes that the `delete` rules allow. + +For example, the following rule states that a user can delete a to-do list item +if they own it, or they have the `ADMIN` role: + +```graphql +type Todo + @auth( + delete: { + or: [ + { + rule: """ + query ($USER: String!) { + queryTodo { + owner(filter: { username: { eq: $USER } } ) { + username + } + } + } + """ + } + { rule: "{$ROLE: { eq: \"ADMIN\" } }" } + ] + } + ) { + id: ID! + text: String! @search(by: [term]) + owner: User +} + +type User { + username: String! @id + todos: [Todo] +} +``` + +When using these types of rules, a mutation such as the one shown below will +behave differently. depending on which user is running it: + +- For most users, the following mutation deletes the posts that contain the term + "graphql" and are owned by the user who runs the mutation, but doesn't affect + any other user's to-do list items +- For an admin user, the following mutation deletes any posts that contain the + term "graphql", regardless of which user owns these posts + +```graphql +mutation { + deleteTodo(filter: { text: { anyofterms: "graphql" } }) { + numUids + } +} +``` + +When adding data, what matters is the resulting state of the database, when +deleting, what matters is the state before the delete occurs. + +## Update + +Updates have both a before and after state that can be important for +authorization. + +For example, consider a rule stating that you can only update your own to-do +list items. If evaluated in the database before the mutation (like the delete +rules) it would prevent you from updating anyone elses to-do list items, but it +does not stop you from updating your own to-do items to have a different +`owner`. If evaluated in the database after the mutation occurs, like for add +rules, it would prevent setting the `owner` to another user, but would not +prevent editing other's posts. + +Currently, Dgraph evaluates `update` rules _before_ the mutation. + +## Update and add mutations + +Update mutations can also insert new data. For example, you might allow a +mutation that runs an update mutation to add a new to-do list item: + +```graphql +mutation { + updateUser(input: { + filter: { username: { eq: "aUser" }}, + set: { todos: [ { text: "do this new todo"} ] } + }) { + ... + } +} +``` + +Because a mutation updates a user's to-do list by inserting a new to-do list +item, it would have to satisfy the rules to update the author _and_ the rules to +add a to-do list item. If either fail, the mutation has no effect. + +--- diff --git a/dgraph/reference/graphql/subscriptions/index.mdx b/dgraph/reference/graphql/subscriptions/index.mdx new file mode 100644 index 00000000..55541904 --- /dev/null +++ b/dgraph/reference/graphql/subscriptions/index.mdx @@ -0,0 +1,229 @@ +--- +title: GraphQL Subscriptions +description: + Subscriptions allow clients to listen to real-time messages from the server. + In GraphQL, it's straightforward to enable subscriptions on any type. +--- + +Subscriptions allow clients to listen to real-time messages from the server. The +client connects to the server with a bi-directional communication channel using +the WebSocket protocol and sends a subscription query that specifies which event +it is interested in. When an event is triggered, the server executes the stored +GraphQL query, and the result is sent back to the client using the same +communication channel. + +The client can unsubscribe by sending a message to the server. The server can +also unsubscribe at any time due to errors or timeouts. A significant difference +between queries or mutations and subscriptions is that subscriptions are +stateful and require maintaining the GraphQL document, variables, and context +over the lifetime of the subscription. + +![Subscription](/images/graphql/subscription_flow.png "Subscription in GraphQL") + +## Enable subscriptions in GraphQL + +In GraphQL, it's straightforward to enable subscriptions on any type. You can +add the `@withSubscription` directive to the schema as part of the type +definition, as in the following example: + +```graphql +type Todo @withSubscription { + id: ID! + title: String! + description: String! + completed: Boolean! +} +``` + +## @withSubscription with @auth + +You can use [@auth](./graphql/schema/directives/auth) access control rules in +conjunction with `@withSubscription`. + +Consider following Schema that has both the `@withSubscription` and `@auth` +directives defined on type `Todo`. + +```graphql +type Todo + @withSubscription + @auth( + query: { + rule: """ + query ($USER: String!) { + queryTodo(filter: { owner: { eq: $USER } } ) { + __typename + } + } + """ + } + ) { + id: ID! + text: String! @search(by: [term]) + owner: String! @search(by: [hash]) +} +# Dgraph.Authorization {"Header":"X-Dgraph-AuthToken","Namespace":"https://dgraph.io/jwt/claims","jwkurl":"https://xyz.clerk.accounts.dev/.well-known/jwks.json","audience":["dgraph"],"ClosedByDefault":true} +``` + +The generated GraphQL API expects a JWT token in the `X-Dgraph-AuthToken` header +and uses the `USER` claim to apply a rule based access control (RBAC): the +authorization rule enforces that only to-do tasks owned by `$USER` are returned. + +## WebSocket client + +Dgraph uses the websocket subprotocol `subscription-transport-ws`. + +Clients must be instantiated using the WebSocket URL of the GraphQL API which is +your [Dgraph GraphQL endpoint](./graphql/graphql-clients/endpoint/_index.md) +with `https` replaced by `wss`. + +If your Dgraph endpoint is +`https://blue-surf-0033.us-east-1.aws.cloud.dgraph.io/graphql` the WebSocket URL +is `wss://blue-surf-0033.us-east-1.aws.cloud.dgraph.io/graphql` + +If your GraphQL API is configured to expect a JWT token in a header, you must +configure the WebSocket client to pass the token. Additionally, the subscription +terminates when the JWT expires. + +Here are some examples of frontend clients setup. + +### URQL client setup in a React application + +In this scenario, we are using +[urql client](https://formidable.com/open-source/urql/) and +`subscriptions-transport-ws` modules. + +In order to use a GraphQL subscription query in a component, you need to + +- instantiate a subscriptionClient +- instantiate a URQL client with a 'subscriptionExchange' using the + subscriptionClient + +```js +import { + Client, + Provider, + cacheExchange, + fetchExchange, + subscriptionExchange, +} from "urql" +import { SubscriptionClient } from "subscriptions-transport-ws" + +const subscriptionClient = new SubscriptionClient( + process.env.REACT_APP_DGRAPH_WSS, + { reconnect: true, connectionParams: { "X-Dgraph-AuthToken": props.token } }, +) + +const client = new Client({ + url: process.env.REACT_APP_DGRAPH_ENDPOINT, + fetchOptions: { headers: { "X-Dgraph-AuthToken": `Bearer ${props.token}` } }, + exchanges: [ + cacheExchange, + fetchExchange, + subscriptionExchange({ + forwardSubscription: (request) => subscriptionClient.request(request), + }), + ], +}) +``` + +In this example, + +- **process.env.REACT_APP_DGRAPH_ENDPOINT** is your + [Dgraph GraphQL endpoint](./graphql/graphql-clients/endpoint/_index.md) +- **process.env.REACT_APP_DGRAPH_WSS** is the WebSocket URL +- **props.token** is the JWT token of the logged-in user. + +Note that we are passing the JWT token in the GraphQL client using +'fetchOptions' and in the WebSocket client using 'connectionParams'. + +Assuming we are using graphql-codegen, we can define a subcription query: + +```js +import { graphql } from "../gql" + +export const TodoFragment = graphql(` + fragment TodoItem on Todo { + id + text + } +`) + +export const TodoSubscription = graphql(` + subscription myTodo { + queryTodo(first: 100) { + ...TodoItem + } + } +`) +``` + +and use it in a React component + +```js +import { useQuery, useSubscription } from "urql"; +... +const [messages] = useSubscription({ query: MyMessagesDocument}); + +``` + +That's it, the react component is able to use `messages.data.queryTodo` to +display the updated list of Todos. + +### Apollo client setup + +To learn about using subscriptions with Apollo client, see a blog post on +[GraphQL Subscriptions with Apollo client](https://dgraph.io/blog/post/how-does-graphql-subscription/). + +To pass the user JWT token in the Apollo client,use `connectionParams`, as +follows. + +```javascript +const wsLink = new WebSocketLink({ + uri: `wss://${ENDPOINT}`, + options: { + reconnect: true, + connectionParams: { "
": "", },}); +``` + +Use the header expected by the Dgraph.Authorization configuration of your +GraphQL schema. + +## Subscriptions to custom DQL + +You can also apply `@withSubscription` directive to custom DQL queries by +specifying `@withSubscription` on individual DQL queries in `type Query`, and +those queries will be added to `type subscription`. + +For example, see the custom DQL query `queryUserTweetCounts` below: + +```graphql +type Query { + queryUserTweetCounts: [UserTweetCount] + @withSubscription + @custom( + dql: """ + query { + queryUserTweetCounts(func: type(User)) { + screen_name: User.screen_name + tweetCount: count(User.tweets) + } + } + """ + ) +} +``` + +`queryUserTweetCounts` is added to the `subscription` type, allowing users to +subscribe to this query. + + + Currently, Dgraph only supports subscriptions on custom **DQL queries**. You + can't subscribe to custom **HTTP queries**. + + + + Starting in release v21.03, Dgraph supports compression for subscriptions. + Dgraph uses `permessage-deflate` compression if the GraphQL client's + `Sec-Websocket-Extensions` request header includes `permessage-deflate`, as + follows: `Sec-WebSocket-Extensions: permessage-deflate`. + diff --git a/dgraph/reference/howto/commandline/about-cli.mdx b/dgraph/reference/howto/commandline/about-cli.mdx new file mode 100644 index 00000000..db0b5189 --- /dev/null +++ b/dgraph/reference/howto/commandline/about-cli.mdx @@ -0,0 +1,28 @@ +--- +title: Command-line completion +--- + +Command-line completion is a feature in shells such as `bash` or `zsh` that +saves you extra typing and helps out when you cannot remember a command’s +syntax. This functionality automatically fills in partially typed commands when +you press the tab key. + +Some of the advantages of command-line completion are: + +- saves you from typing text when it can be auto-completed +- helps you know what are the available continuations for the commands +- prevents errors and improve the experience by hiding or showing options based + on what you have already typed + +## Completion script + +The command-line interpreter requires a completion script to define which +completion suggestions can be displayed for a given executable. + +Using the `dgraph completion` command you can generate a file that can be added +to your shell configuration. After you add the file you can auto-complete any +`dgraph` command. + + + Dgraph command completion currently supports `bash` and `zsh` shells. + diff --git a/dgraph/reference/howto/commandline/create-cli.mdx b/dgraph/reference/howto/commandline/create-cli.mdx new file mode 100644 index 00000000..48b9e2db --- /dev/null +++ b/dgraph/reference/howto/commandline/create-cli.mdx @@ -0,0 +1,89 @@ +--- +title: Create a completion script +--- + +Create a completion script The completion script is code that uses the builtin +bash command complete to define which completion suggestions can be displayed +for a given executable. The nature of the completion options vary from simple +static to highly sophisticated. + +## Before you begin + +- [Install Dgraph](./download#build-and-install). +- Determine the shell you are running: + ```bash + echo $0 + ``` + An output similar to the following appears: + ```bash + user@workstation:~/dgraph$ echo $0 + bash + ``` + +### Creating a completion script for Bash shell + +1. To generate a `dgraph-completion.sh` configuration file for your ``, + run the `completion` command: + + ```bash + dgraph completion > ~/dgraph-completion.sh + ``` + + The contents of the file is similar to: + + ```bash + [Decoder]: Using assembly version of decoder + Page Size: 4096 + # bash completion for dgraph -*- shell-script -*- + + __dgraph_debug() + { + if [[ -n ${BASH_COMP_DEBUG_FILE} ]]; then + echo "$*" >> "${BASH_COMP_DEBUG_FILE}" + fi + } + ... + .. + . + ``` + + The generated file has 2 lines at the beginning that need to be removed for + the script to run properly. + +1. You can comment out the 2 lines with a `#`, or remove them with the following + command: + + ```bash + sed -i.bak '1d;2d' ~/dgraph-completion.sh + ``` + +1. Make the file executable by running the following command. You may require + root user `sudo` privileges to run it: + + ```bash + chmod +x ~/dgraph-completion.sh + ``` + +1. Open the `.bashrc` file with any text editor. You might need `sudo` + privileges to apply changes. For example: + ```bash + nano ~/.bashrc + ``` +1. Add the path to `dgraph-completion.sh` using the following syntax and save + the file: + ```bash + . path/to/dgraph-completion.sh + ``` +1. Reload the `bashrc` settings with the following command: + ```bash + source ~/.bashrc + ``` + Now you can start typing `dgraph` and press tab to get + auto-completion and suggestions: + +```bash +user@workstation:~/dgraph$ dgraph +acl cert debug increment migrate tool zero +alpha completion debuginfo live raftmigrate upgrade +bulk conv export_backup lsbackup restore version +``` diff --git a/dgraph/reference/howto/commandline/index.mdx b/dgraph/reference/howto/commandline/index.mdx new file mode 100644 index 00000000..4caa1490 --- /dev/null +++ b/dgraph/reference/howto/commandline/index.mdx @@ -0,0 +1,3 @@ +--- +title: Command line +--- diff --git a/dgraph/reference/howto/completion.mdx b/dgraph/reference/howto/completion.mdx new file mode 100644 index 00000000..943514d2 --- /dev/null +++ b/dgraph/reference/howto/completion.mdx @@ -0,0 +1,111 @@ +--- +title: Shell Completion +description: + Dgraph supports command-line completion, a common feature provided by shells + like bash or zsh that helps you to type commands in a fast and easy way +--- + +Command-line completion is a common feature provided by shells like `bash` or +`zsh` that lets you type commands in a fast and easy way. This functionality +automatically fills in partially typed commands when the user press the + +tab key. + +## Completion script + +The command-line interpreter requires a completion script to define which +completion suggestions can be displayed for a given executable. + +Using the `dgraph completion` command you can generate a file that can be added +to your shell configuration. Once added, you will be able to auto-complete any +`dgraph` command. + + + Dgraph command completion currently supports `bash` and `zsh` shells. + + +First, you need to know which shell you are running. If you don't know, you can +execute the following command: + +```sh +echo $0 +``` + +and the output should look like: + +```sh +user@workstation:~/dgraph$ echo $0 +bash +``` + +## Bash shell + +To generate a `dgraph-completion.sh` configuration file for your `bash` shell, +run the `completion` command as follows: + +```sh +dgraph completion bash > ~/dgraph-completion.sh +``` + +The file content should look like: + +```bash +[Decoder]: Using assembly version of decoder +Page Size: 4096 +# bash completion for dgraph -*- shell-script -*- + +__dgraph_debug() +{ + if [[ -n ${BASH_COMP_DEBUG_FILE} ]]; then + echo "$*" >> "${BASH_COMP_DEBUG_FILE}" + fi +} +... +.. +. +``` + +Currently, the generated file has 2 lines at the beginning that need to be +removed, or else the script won't run properly. You can comment them out with a +`#`, or you can easily remove them with the following command: + +```sh +sed -i.bak '1d;2d' ~/dgraph-completion.sh +``` + +Next, you have to make that file executable by running the following command +(your system might require `sudo` to run it): + +```sh +chmod +x ~/dgraph-completion.sh +``` + +Now open the `.bashrc` file with any text editor (you might need `sudo` to apply +changes). For example: + +```sh +nano ~/.bashrc +``` + +Once opened, add the path to `dgraph-completion.sh` using the following syntax +and save: + +```sh +. path/to/dgraph-completion.sh +``` + +Finally, reload the `bashrc` settings with the following command: + +```sh +source ~/.bashrc +``` + +Now you can start typing `dgraph` and press tab to get +auto-completion and suggestions: + +```txt +user@workstation:~/dgraph$ dgraph +acl cert debug increment migrate tool zero +alpha completion debuginfo live raftmigrate upgrade +bulk conv export_backup lsbackup restore version +``` diff --git a/dgraph/reference/howto/concurrent-modification-java-multithreaded.mdx b/dgraph/reference/howto/concurrent-modification-java-multithreaded.mdx new file mode 100644 index 00000000..171c59ea --- /dev/null +++ b/dgraph/reference/howto/concurrent-modification-java-multithreaded.mdx @@ -0,0 +1,93 @@ +--- +title: Concurrent mutations and conflicts +--- + +This how-to guide provides an example on how to handle concurrent modifications +using a multi-threaded Java Program. The example demonstrates +[transaction](./clients#transactions) conflicts in Dgraph. + +Steps to run this example are as follows. + +Step 1: start a new terminal and launch Dgraph with the following command line. + +```sh +docker run -it -p 8080:8080 -p 9080:9080 dgraph/standalone:%VERSION_HERE +``` + +Step 2: check out the source code from the 'samples' directory in +[dgraph4j repository](https://github.com/dgraph-io/dgraph4j). This particular +example can found at the path `samples/concurrent-modification`. In order to run +this example, execute the following maven command from the +'concurrent-modification' folder. + +```sh +mvn clean install exec:java +``` + +Step 3: on running the example, the program initializes Dgraph with the +following schema. + +```sh +: int @index(int) . +: string @index(exact) . +``` + +Step 4: the program also initializes user "Alice" with a 'clickCount' of value +'1', and then proceeds to increment 'clickCount' concurrently in two threads. +Dgraph throws an exception if a transaction is updating a given predicate that +is being concurrently modified. As part of the exception handling logic, the +program sleeps for 1 second on receiving a concurrent modification exception +(“TxnConflictException”), and then retries. + +The logs below show that two threads are increasing clickCount for the same user +named Alice (note the same uid). Thread #1 succeeds immediately, and Dgraph +throws a concurrent modification conflict on Thread 2. Thread 2 sleeps for 1 +second and retries, and this time succeeds. + +```sh +1599628015260 Thread #2 increasing clickCount for uid 0xe, Name: Alice +1599628015260 Thread #1 increasing clickCount for uid 0xe, Name: Alice +1599628015291 Thread #1 succeeded after 0 retries +1599628015297 Thread #2 found a concurrent modification conflict, sleeping for 1 second... +1599628016297 Thread #2 resuming +1599628016310 Thread #2 increasing clickCount for uid 0xe, Name: Alice +1599628016333 Thread #2 succeeded after 1 retries +``` + +Step 5: please note that the final value of clickCount is 3 (initial value was +1), which is correct. Query: + +```json +{ + Alice(func: has()) @filter(eq(name,"Alice" )) { + uid + name + clickCount + } +} +``` + +Response: + +```json +{ + "data": { + "Alice": [ + { + "uid": "0xe", + "name": "Alice", + "clickCount": 3 + } + ] + } +} +``` + +**Summary** + +Concurrent modifications to the same predicate causes the "TxnConflictException" +exception. When several transactions hit the same node's predicate at the same +time, the first one succeeds, while the other will get the +“TxnConflictException”. Upon constantly retrying, the transactions begin to +succeed one after another, and given enough retries, correctly completes its +work. diff --git a/dgraph/reference/howto/dgraph-sentry-integration.mdx b/dgraph/reference/howto/dgraph-sentry-integration.mdx new file mode 100644 index 00000000..942f5e1a --- /dev/null +++ b/dgraph/reference/howto/dgraph-sentry-integration.mdx @@ -0,0 +1,86 @@ +--- +title: Using the Dgraph Sentry Integration +--- + +Sentry is a powerful service that allows applications to send arbitrary events, +messages, exceptions, bread-crumbs (logs) to your sentry account. In simplest +terms, it is a dial-home service but also has a rich feature set including event +filtering, data scrubbing, several SDKs, custom and release tagging, as well as +integration with 3rd party tools such as Slack, GitHub. + +Although Sentry reporting is on by default, starting from v20.03.1 and v20.07.0, +there is a configuration flag `enable-sentry` which can be used to completely +turn off Sentry events reporting. + +## Basic Integration + +**Panics (runtime and manual)** + +- As of now, at Dgraph, we use Sentry reporting for capturing panics only. For + manual panics anywhere in the code, sentry.CaptureException() API is called. + +- For runtime panics, Sentry does not have any native method. After further + research, we chose the approach of a wrapper process to capture these panics. + The basic idea for this is that whenever a dgraph instance is started, a 2nd + monitoring process is started whose only job is to monitor the stderr for + panics of the monitored process. When a panic is seen, it is reported back to + sentry via the CaptureException API. + +**Reporting** + +Each event is tagged with the release version, environment, timestamp, tags and +the panic backtrace as explained below. **Release:** + +- This is the release version string of the Dgraph instance. + +**Environments:** + +We have defined 4 environments: + +**dev-oss / dev-enterprise**: These are events seen on non-released / local +developer builds. + +**prod-oss/prod-enterprise**: These are events on released version. Events in +this category are also sent on a slack channel private to Dgraph + +**Tags:** + +Tags are key-value pairs that provide additional context for an event. We have +defined the following tags: + +`dgraph`: This tag can have values “zero” or “alpha” depending on which +sub-command saw the panic/exception. + +## Data Handling + +We strive to handle your data with care in a variety of ways when sending events +to Sentry + +1. **Event Selection:** As of now, only panic events are sent to Sentry from + Dgraph. +2. **Data in Transit:** Events sent from the SDK to the Sentry server are + encrypted on the wire with industry-standard TLS protocol with 256 bit AES + Cipher. +3. **Data at rest:** Events on the Sentry server are also encrypted with 256 bit + AES cipher. Sentry is hosted on GCP and as such physical access is tightly + controlled. Logical access is only available to sentry approved officials. +4. **Data Retention:** Sentry stores events only for 90 days after which they + are removed permanently. +5. **Data Scrubbing**: The Data Scrubber option (default: on) in Sentry’s + settings ensures PII doesn’t get sent to or stored on Sentry’s servers, + automatically removing any values that look like they contain sensitive + information for values that contain various strings. The strings we currently + monitor and scrub are: + +- `password` +- `secret` +- `passwd` +- `api_key` +- `apikey` +- `access_token` +- `auth_token` +- `credentials` +- `mysql_pwd` +- `stripetoken` +- `card[number]` +- `ip addresses` diff --git a/dgraph/reference/howto/dql-schema-request.mdx b/dgraph/reference/howto/dql-schema-request.mdx new file mode 100644 index 00000000..0a429048 --- /dev/null +++ b/dgraph/reference/howto/dql-schema-request.mdx @@ -0,0 +1,70 @@ +--- +title: Query Dgraph types +--- + +You can retrieve the Dgraph schema containing the list of predicates types and +node types by: + +- issuing a query on /query endpoint using the + [HTTP Client](/raw-http#query-current-dql-schema) +- issuing a query using any [DQL client library](./dql/clients) +- using [Ratel UI](./ratel/schema) +- using the Cloud console through the + [DQL Schema](https://cloud.dgraph.io/_/schema?tab=dqlschema) tab of the Schema + section. + +When using a query, the request body is + +``` +schema {} +``` + + + Unlike regular queries, the schema query is not surrounded by curly braces. + Also, schema queries and regular queries cannot be combined. + + +You can query for particular schema fields in the query body. + +``` +schema { + type + index + reverse + tokenizer + list + count + upsert + lang +} +``` + +You can also query for particular predicates: + +``` +schema(pred: [name, friend]) { + type + index + reverse + tokenizer + list + count + upsert + lang +} +``` + + + If ACL is enabled, then the schema query returns only the predicates for which + the logged-in ACL user has read access. + + +Types can also be queried. Below are some example queries. + +``` +schema(type: Movie) {} +schema(type: [Person, Animal]) {} +``` + +Note that type queries do not contain anything between the curly braces. The +output will be the entire definition of the requested types. diff --git a/dgraph/reference/howto/drop-data.mdx b/dgraph/reference/howto/drop-data.mdx new file mode 100644 index 00000000..8166fe5c --- /dev/null +++ b/dgraph/reference/howto/drop-data.mdx @@ -0,0 +1,84 @@ +--- +title: Drop all data +--- + +It is possible to drop all data from your Dgraph Cloud backend, and start afresh +while retaining the same endpoint. + +Be careful, as this operation is not reversible, and all data will be lost. It +is highly recommended that you [export](/admin/import-export) your data before +you drop your data. + +### Dropping data from the Cloud UI + +In order to drop all data while retaining the schema : + +- access the [Schema](https://cloud.dgraph.io/_/schema) panel +- click the Drop Data button at the bottom of the schema. +- select the options and confirm. + +_![Drop Data](/images/drop-data.png)_ + +### Dropping Data Programmatically + +You can drop data by invoking the `dropData` mutation on `/admin/slash` +endpoint. + +As an example, if your GraphQL endpoint is +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/graphql`, then the admin +endpoint for schema will be at +`https://frozen-mango.us-west-2.aws.cloud.dgraph.io/admin/slash`. + +This endpoint requires [Authentication](./cloud/admin/authentication). + +Here is curl example. + +``` +curl 'https:///admin/slash' \ + -H 'X-Auth-Token: ' \ + -H 'Content-Type: application/graphql' \ + --data-binary 'mutation { dropData(allData: true) { response { code message } } }' +``` + +If you would like to drop the schema along with the data, then you can set the +`allDataAndSchema` flag. + +``` +curl 'https:///admin/slash' \ + -H 'X-Auth-Token: ' \ + -H 'Content-Type: application/graphql' \ + --data-binary 'mutation { dropData(allDataAndSchema: true) { response { code message } } }' +``` + +## On-Premise + +### Drop data and schema + +The `/alter` endpoint is used to drop data. + +To drop all data and schema: + +```sh +$ curl -X POST localhost:8080/alter -d '{"drop_all": true}' +``` + +To drop all data only (keep schema): + +```sh +$ curl -X POST localhost:8080/alter -d '{"drop_op": "DATA"}' +``` + +The `/alter` endpoint can also be used to drop a specific property or all nodes +of a specific type. + +To drop property `name`: + +```sh +$ curl -X POST localhost:8080/alter -d '{"drop_attr": "name"}' +``` + +To drop the type `Film`: + +```sh +$ curl -X POST localhost:8080/alter -d '{"drop_op": "TYPE", "drop_value": "Film"}' +``` diff --git a/dgraph/reference/howto/exportdata/about-export.mdx b/dgraph/reference/howto/exportdata/about-export.mdx new file mode 100644 index 00000000..100054a2 --- /dev/null +++ b/dgraph/reference/howto/exportdata/about-export.mdx @@ -0,0 +1,21 @@ +--- +title: Export data +--- + +## Export + +As an `Administrator` you can export data from +[Dgraph Cloud](./howto/exportdata/export-data-cloud), using the Cloud console, a +GraphQL Client, or the Cloud API. You can also export data from your self-hosted +[Dgraph instance](./howto/exportdata/export-data) to NFS or a file path or to an +object store. + +When you export data, typically three files are generated: + +- `g01.gql_schema.gz`: The GraphQL schema file. This file can be imported using + the Schema APIs +- `g01.json.gz` or `g01.rdf.gz`: the data from your instance in JSON format or + RDF format. By default, Dgraph exports data in RDF format. +- `g01.schema.gz`: This file is the internal Dgraph schema. If you have set up + the Dgraph Cloud instance with a GraphQL schema, then you can ignore this + file. diff --git a/dgraph/reference/howto/exportdata/export-data-cloud.mdx b/dgraph/reference/howto/exportdata/export-data-cloud.mdx new file mode 100644 index 00000000..efee3bc1 --- /dev/null +++ b/dgraph/reference/howto/exportdata/export-data-cloud.mdx @@ -0,0 +1,92 @@ +--- +title: Export data from Dgraph Cloud +--- + +As an `Administrator` you can export data from a Dgraph Cloud shared instance or +dedicated instance. On a dedicated instance with multi-tenancy feature enabled +you can export data across the cluster, or a specific namespace depending on the +type of administrative privileges you have. + +## Exporting data from Dgraph Cloud using the console + +1. In the `Admin` section of the Dgraph Cloud console, go to `Settings`. +1. In the `Exports` tab, click `Create Export`. +1. In the `New export` dialog, select the format you want to export. +1. Click `Create`. + +Depending on the format that you chose to create an export, three files are +generated. + + + Ensure that you download these files as soon as possible because the links to + download these files expire after 48 hours from the time they were generated. + + +### Exporting data from Dgraph Cloud using a GraphQL client + +1. Generate API Key for authentication. +1. Make a note of the GraphQL endpoint for the instance from `Overview` in the + Dgraph Cloud console. Replace `/graphql` with `/admin/slash`in the GraphQL + endpoint to get the ``. +1. Authenticate the `admin` API requests by adding the `` as the + `Dg-Auth` header to every HTTP request. +1. To export data you need to send authenticated request to ``. +1. Export data in JSON or RDF `` using this mutation: + + ```graphql + mutation { + export(format: "") { + response { + message + code + } + exportId + taskId + } + } + ``` + + A response similar to this appears: + + ```{ + "data": { + "export":"exports/2011-12-08/0x18986fd-558223708", + "response": { + "code": "Success", + "message": "Export queued with ID 0x9d2e13e8a" + }, + "taskID": "0x9d2e13e8a" + } + } + } + ``` + +1. Make a note of the `` and the ``. + +1. To get the status of export and the signed URLs to download the exported + files, use this mutation: + + ```graphql + query { + exportStatus(exportId: "", taskId: "") { + kind + lastUpdated + signedUrls + status + } + } + ``` + + Depending on the format that you chose to create an export, three files are + generated. + + Ensure that you download these files as soon as possible +because the signed URLs to download these files expire after 48 hours from the +time they were generated. You can use `curl -O ` to download the +files to the current directory. + +### Exporting data from Dgraph Cloud programmatically + +You can also export data from Dgraph Cloud programmatically using the Dgraph +Cloud API. For more information, see +[Cloud API documentation](https://dgraph.io/docs/cloud/cloud-api/backup/#export-data). diff --git a/dgraph/reference/howto/exportdata/export-data.mdx b/dgraph/reference/howto/exportdata/export-data.mdx new file mode 100644 index 00000000..1ec0ed0b --- /dev/null +++ b/dgraph/reference/howto/exportdata/export-data.mdx @@ -0,0 +1,353 @@ +--- +title: Export data from Dgraph +--- + +As an `Administrator` you can export data on all nodes, configure the Alpha +server, specify the export format, export to an object store, disable HTTPS for +exports, and encrypt exports + +## Export data using the GraphQL admin endpoint + +You can export the entire data by executing a GraphQL mutation on the `/admin` +endpoint of any Alpha node. + +**Before you begin**: + +- Ensure that there is sufficient space on disk to store the export. Each Dgraph + Alpha leader for a group writes output as a gzipped file to the export + directory specified through the `--export` flag (defaults to an **export** + directory). If any of the groups fail because of insufficient space on the + disk, the entire export process is considered failed and an error is returned. + +- Make a note of the export directories of the Alpha server nodes. For more + information about configuring the Dgraph Alpha server, see [Config](./config). + +This mutation triggers the export from each of the Alpha leader for a group. +Depending on the Dgraph configuration several files are exported. It is +recommended that you copy the files from the Alpha server nodes to a safe place +when the export is complete. + +```graphql +mutation { + export(input: {}) { + response { + message + code + } + } +} +``` + +The export data of the group: + +- in the Alpha instance is stored in the Alpha. +- in every other group is stored in the Alpha leader of that group. + +You need to retrieve the right export files from the Alpha instances in the +cluster. Dgraph does not copy all files to the Alpha that initiated the export. + +When the export is complete a response similar to this appears: + +``` +{"data":{ + "export":{ + "response":{ + "message":"Export completed.", + "code":"Success" + } + } + }, + "extensions":{ + "tracing":{ + "version":1, + "startTime":"2022-12-14T07:39:51.061712416Z","endTime":"2022-12-14T07:39:51.129431494Z", + "duration":67719080 + } + } + } +``` + +## Export data format + +By default, Dgraph exports data in RDF format. Replace ``with `json` or +`rdf` in this GraphQL mutation: + +```graphql +mutation { + export(input: { format: "" }) { + response { + message + code + } + } +} +``` + +## Export to NFS or a file path + +You can override the default folder path by adding the `destination` input field +to the directory where you want to export data. Replace `` in this GraphQL +mutation with the absolute path of the directory to export data. + +```graphql +mutation { + export(input: { format: "", destination: "" }) { + response { + message + code + } + } +} +``` + +## Export to an object store + +You can export to an AWS S3, Azure Blob Storage or Google Cloud Storage. + +### Example mutation to export to AWS S3 + +```graphql +mutation { + export( + input: { + destination: "s3://s3..amazonaws.com/" + accessKey: "" + secretKey: "" + } + ) { + response { + message + code + } + } +} +``` + + The Dgraph URL used for S3 is different than the AWS CLI +tools with the `aws s3` command, which uses a shortened format: +`s3://`. + +### Example mutation to export to MinIO + +```graphql +mutation { + export( + input: { + destination: "minio://
:9000/" + accessKey: "" + secretKey: "" + } + ) { + response { + message + code + } + } +} +``` + +## Export to a MinIO gateway + +You can use MinIO as a gateway to other object stores, such as +[Azure Blob Storage](https://azure.microsoft.com/services/storage/blobs/) or +[Google Cloud Storage](https://cloud.google.com/storage). + +### Azure Blob Storage + +You can use +[Azure Blob Storage](https://azure.microsoft.com/services/storage/blobs/) +through the +[MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html). + +**Before you begin**: + +- Configure a + [storage account](https://docs.microsoft.com/azure/storage/common/storage-account-overview) + and a Blob + [container](https://docs.microsoft.com/azure/storage/blobs/storage-blobs-introduction#containers) + to organize the blobs. +- Make a note the name of the blob container. It is the `` when + specifying the `destination` in the GraphQL mutation. +- [Retrieve storage accounts keys](https://docs.microsoft.com/azure/storage/common/storage-account-keys-manage) + to configure MinIO. Because, + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + uses `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` to correspond to Azure Storage + Account `AccountName` and `AccountKey`. + +You can access Azure Blob Storage locally using one of these methods: + +- Using + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + with the MinIO Binary + ```bash + export MINIO_ACCESS_KEY="" + export MINIO_SECRET_KEY="" + minio gateway azure + ``` +- Using + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + with Docker + ```bash + docker run --detach --rm --name gateway \ + --publish 9000:9000 \ + --env MINIO_ACCESS_KEY="" \ + --env MINIO_SECRET_KEY="" \ + minio/minio gateway azure + ``` +- Using + [MinIO Azure Gateway](https://docs.min.io/docs/minio-gateway-for-azure.html) + with the [MinIO Helm chart](https://github.com/minio/charts) for Kubernetes: + ```bash + helm repo add minio https://helm.min.io/ + helm install my-gateway minio/minio \ + --set accessKey="",secretKey="" \ + --set azuregateway.enabled=true + ``` + You can use the + [MinIO GraphQL mutation](.//howto/exportdata/export-data.md#example-mutation-to-export-to-minio) + with MinIO configured as a gateway. + +### Google Cloud Storage + +You can use [Google Cloud Storage](https://cloud.google.com/storage) through the +[MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html). + +**Before you begin**: + +- Create + [storage buckets](https://cloud.google.com/storage/docs/creating-buckets) +- Create a Service Account key for GCS and get a credentials file. For more + information, see + [Create a Service Account key](https://github.com/minio/minio/blob/master/docs/gateway/gcs.md#11-create-a-service-account-ey-for-gcs-and-get-the-credentials-file). + +When you have a `credentials.json`, you can access GCS locally using one of +these methods: + +- Using [MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html) + with the MinIO Binary + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/credentials.json" + export MINIO_ACCESS_KEY="" + export MINIO_SECRET_KEY="" + minio gateway gcs "" + ``` +- Using [MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html) + with Docker + ```bash + docker run --detach --rm --name gateway \ + --publish 9000:9000 \ + --volume "":/credentials.json \ + --env GOOGLE_APPLICATION_CREDENTIALS=/credentials.json \ + --env MINIO_ACCESS_KEY="" \ + --env MINIO_SECRET_KEY="" \ + minio/minio gateway gcs "" + ``` +- Using [MinIO GCS Gateway](https://docs.min.io/docs/minio-gateway-for-gcs.html) + with the [MinIO Helm chart](https://github.com/minio/charts) for Kubernetes: + + ```bash + ## create MinIO Helm config + cat <<-EOF > myvalues.yaml + accessKey: + secretKey: + + gcsgateway: + enabled: true + projectId: + gcsKeyJson: | + $(IFS='\n'; while read -r LINE; do printf ' %s\n' "$LINE"; done < "") + EOF + + ## deploy MinIO GCS Gateway + helm repo add minio https://helm.min.io/ + helm install my-gateway minio/minio \ + --values myvalues.yaml + ``` + + You can use the + [MinIO GraphQL mutation](.//howto/exportdata/export-data.md#example-mutation-to-export-to-minio) + with MinIO configured as a gateway. + +## Disable HTTPS for exports to S3 and Minio + +By default, Dgraph assumes the destination bucket is using HTTPS. If that is not +the case, the export fails. To export to a bucket using HTTP (insecure), set the +query parameter `secure=false` with the destination endpoint in the +`destination` field: + +```graphql +mutation { + export( + input: { + destination: "minio://
:9000/?secure=false" + accessKey: "" + secretKey: "" + } + ) { + response { + message + code + } + } +} +``` + +## Use anonymous credentials + +When exporting to S3 or MinIO where credentials are not required, can set +`anonymous` to true. + +```graphql +mutation { + export( + input: { + destination: "s3://s3..amazonaws.com/" + anonymous: true + } + ) { + response { + message + code + } + } +} +``` + +## Encrypt exports + +Export is available wherever an Alpha is running. To encrypt an export, the +Alpha must be configured with the `--encryption key-file=value`. + + + The `--encryption key-file` was used for [Encryption at + Rest](./enterprise-features/encryption-at-rest) and will now also be used for + encrypted exports. + + +## Use `curl` to trigger an export + +This is an example of how you can use `curl` to trigger an export. + +1. Create GraphQL file for the desired mutation: + ```bash + cat <<-EOF > export.graphql + mutation { + export(input: { + destination: "s3://s3..amazonaws.com/" + accessKey: "" + secretKey: "" + }) { + response { + message + code + } + } + } + EOF + ``` +2. Trigger an export with `curl` + ```bash + curl http://localhost:8080/admin --silent --request POST \ + --header "Content-Type: application/graphql" \ + --upload-file export.graphql + ``` diff --git a/dgraph/reference/howto/exportdata/index.mdx b/dgraph/reference/howto/exportdata/index.mdx new file mode 100644 index 00000000..dc2a54ee --- /dev/null +++ b/dgraph/reference/howto/exportdata/index.mdx @@ -0,0 +1,3 @@ +--- +title: Export data +--- diff --git a/dgraph/reference/howto/importdata/about_import.mdx b/dgraph/reference/howto/importdata/about_import.mdx new file mode 100644 index 00000000..85ea4bc3 --- /dev/null +++ b/dgraph/reference/howto/importdata/about_import.mdx @@ -0,0 +1,21 @@ +--- +title: Import data +--- + +As an `Administrator` you can initialize a new Dgraph cluster by doing an +[Initial import](./bulk-loader) and you can import data into a running instance +by performing a [Live import](./live-loader). + +Initial import is **considerably faster** than the live import but can only be +used to load data into a new cluster (without prior data) and is executed before +starting the Alpha nodes. + + + Contact us if you need to do an initial import on a Dgraph Cloud instance. + + + + Both options accept [RDF N-Quad/Triple data](https://www.w3.org/TR/n-quads/) + or JSON format. Refers to [data migration](./about-data-migration) to see how + to convert other data formats. + diff --git a/dgraph/reference/howto/importdata/bulk-loader.mdx b/dgraph/reference/howto/importdata/bulk-loader.mdx new file mode 100644 index 00000000..3c146a82 --- /dev/null +++ b/dgraph/reference/howto/importdata/bulk-loader.mdx @@ -0,0 +1,443 @@ +--- +title: Initial import (Bulk Loader) +--- + +Dgraph Bulk Loader serves a similar purpose to the Dgraph Live Loader, but can +only be used to load data into a new cluster. It cannot be run on an existing +Dgraph cluster. Dgraph Bulk Loader is **considerably faster** than the Dgraph +Live Loader and is the recommended way to perform the initial import of large +datasets into Dgraph. + +Only one or more Dgraph Zeros should be running for bulk loading. Dgraph Alphas +will be started later. + +You can [read some technical details](https://dgraph.io/blog/post/bulkloader/) +about the bulk loader on the blog. + + + Don't use the Bulk loader once the Dgraph cluster is up and running. Use it to + import your existing data to a new cluster. + + + + It's crucial to tune the bulk loader's flags to get good performance. See the + next section for details. + + +## Settings + + + Bulk Loader only accept [RDF N-Quad/Triple + data](https://www.w3.org/TR/n-quads/) or JSON in plain or gzipped format. Data + in other formats must be converted. + + +```sh +$ dgraph bulk --help # To see the available flags. + +# Read RDFs or JSON from the passed file. +$ dgraph bulk -f ... + +# Read multiple RDFs or JSON from the passed path. +$ dgraph bulk -f <./path-to-gzipped-RDF-or-JSON-files> ... + +# Read multiple files strictly by name. +$ dgraph bulk -f ... + +``` + +- **Reduce shards**: Before running the bulk load, you need to decide how many + Alpha groups will be running when the cluster starts. The number of Alpha + groups will be the same number of reduce shards you set with the + `--reduce_shards` flag. For example, if your cluster will run 3 Alpha with 3 + replicas per group, then there is 1 group and `--reduce_shards` should be set + to 1. If your cluster will run 6 Alphas with 3 replicas per group, then there + are 2 groups and `--reduce_shards` should be set to 2. + +- **Map shards**: The `--map_shards` option must be set to at least what's set + for `--reduce_shards`. A higher number helps the bulk loader evenly distribute + predicates between the reduce shards. + +For example: + +```sh +$ dgraph bulk -f goldendata.rdf.gz -s goldendata.schema --map_shards=4 --reduce_shards=2 --http localhost:8000 --zero=localhost:5080 +``` + +``` +{ + "DataFiles": "goldendata.rdf.gz", + "DataFormat": "", + "SchemaFile": "goldendata.schema", + "DgraphsDir": "out", + "TmpDir": "tmp", + "NumGoroutines": 4, + "MapBufSize": 67108864, + "ExpandEdges": true, + "SkipMapPhase": false, + "CleanupTmp": true, + "NumShufflers": 1, + "Version": false, + "StoreXids": false, + "ZeroAddr": "localhost:5080", + "HttpAddr": "localhost:8000", + "IgnoreErrors": false, + "MapShards": 4, + "ReduceShards": 2 +} +The bulk loader needs to open many files at once. This number depends on the size of the data set loaded, the map file output size, and the level of indexing. 100,000 is adequate for most data set sizes. See `man ulimit` for details of how to change the limit. +Current max open files limit: 1024 +MAP 01s rdf_count:176.0 rdf_speed:174.4/sec edge_count:564.0 edge_speed:558.8/sec +MAP 02s rdf_count:399.0 rdf_speed:198.5/sec edge_count:1.291k edge_speed:642.4/sec +MAP 03s rdf_count:666.0 rdf_speed:221.3/sec edge_count:2.164k edge_speed:718.9/sec +MAP 04s rdf_count:952.0 rdf_speed:237.4/sec edge_count:3.014k edge_speed:751.5/sec +MAP 05s rdf_count:1.327k rdf_speed:264.8/sec edge_count:4.243k edge_speed:846.7/sec +MAP 06s rdf_count:1.774k rdf_speed:295.1/sec edge_count:5.720k edge_speed:951.5/sec +MAP 07s rdf_count:2.375k rdf_speed:338.7/sec edge_count:7.607k edge_speed:1.085k/sec +MAP 08s rdf_count:3.697k rdf_speed:461.4/sec edge_count:11.89k edge_speed:1.484k/sec +MAP 09s rdf_count:71.98k rdf_speed:7.987k/sec edge_count:225.4k edge_speed:25.01k/sec +MAP 10s rdf_count:354.8k rdf_speed:35.44k/sec edge_count:1.132M edge_speed:113.1k/sec +MAP 11s rdf_count:610.5k rdf_speed:55.39k/sec edge_count:1.985M edge_speed:180.1k/sec +MAP 12s rdf_count:883.9k rdf_speed:73.52k/sec edge_count:2.907M edge_speed:241.8k/sec +MAP 13s rdf_count:1.108M rdf_speed:85.10k/sec edge_count:3.653M edge_speed:280.5k/sec +MAP 14s rdf_count:1.121M rdf_speed:79.93k/sec edge_count:3.695M edge_speed:263.5k/sec +MAP 15s rdf_count:1.121M rdf_speed:74.61k/sec edge_count:3.695M edge_speed:246.0k/sec +REDUCE 16s [1.69%] edge_count:62.61k edge_speed:62.61k/sec plist_count:29.98k plist_speed:29.98k/sec +REDUCE 17s [18.43%] edge_count:681.2k edge_speed:651.7k/sec plist_count:328.1k plist_speed:313.9k/sec +REDUCE 18s [33.28%] edge_count:1.230M edge_speed:601.1k/sec plist_count:678.9k plist_speed:331.8k/sec +REDUCE 19s [45.70%] edge_count:1.689M edge_speed:554.4k/sec plist_count:905.9k plist_speed:297.4k/sec +REDUCE 20s [60.94%] edge_count:2.252M edge_speed:556.5k/sec plist_count:1.278M plist_speed:315.9k/sec +REDUCE 21s [93.21%] edge_count:3.444M edge_speed:681.5k/sec plist_count:1.555M plist_speed:307.7k/sec +REDUCE 22s [100.00%] edge_count:3.695M edge_speed:610.4k/sec plist_count:1.778M plist_speed:293.8k/sec +REDUCE 22s [100.00%] edge_count:3.695M edge_speed:584.4k/sec plist_count:1.778M plist_speed:281.3k/sec +Total: 22s +``` + +The output will be generated in the `out` directory by default. Here's the bulk +load output from the example above: + +```sh +$ tree ./out +``` + +```txt +./out +├── 0 +│   └── p +│   ├── 000000.vlog +│   ├── 000002.sst +│   └── MANIFEST +└── 1 + └── p + ├── 000000.vlog + ├── 000002.sst + └── MANIFEST + +4 directories, 6 files +``` + +Because `--reduce_shards` was set to `2`, two sets of `p` directories are +generated: + +- the `./out/0` folder +- the `./out/1` folder + +Once the output is created, the files must be copied to all the servers that +will run Dgraph Alphas: + +- Each replica of the first group (`Alpha1`, `Alpha2`, `Alpha3`) should have a + copy of `./out/0/p` +- Each replica of the second group (`Alpha4`, `Alpha5`, `Alpha6`) should have a + copy of `./out/1/p`, and so on. + + + Each Dgraph Alpha must have a copy of the group's `p` directory output. + + +![Bulk Loader diagram](/images/deploy/bulk-loader.png) + +### Other Bulk Loader options + +You can further configure Bulk Loader using the following options: + +- `--schema`, `-s`: set the location of the schema file. + +- `--graphql_schema`, `-g` (optional): set the location of the GraphQL schema + file. + +- `--badger` superflag's `compression` option: Configure the compression of data + on disk. By default, the Snappy compression format is used, but you can also + use Zstandard compression. Or, you can choose no compression to minimize CPU + usage. To learn more, see [Data Compression on Disk](/data-compression). + +- `--new_uids`: (default: false): Assign new UIDs instead of using the existing + UIDs in data files. This is useful to avoid overriding the data in a DB + already in operation. + +- `-f`, `--files`: Location of `*.rdf(.gz)` or `*.json(.gz)` file(s) to load. It + can load multiple files in a given path. If the path is a directory, then all + files ending in `.rdf`, `.rdf.gz`, `.json`, and `.json.gz` will be loaded. + +- `--format` (optional): Specify file format (`rdf` or `json`) instead of + getting it from filenames. This is useful if you need to define a strict + format manually. + +- `--store_xids`: Generate a xid edge for each node. It will store the XIDs (The + identifier / Blank-nodes) in an attribute named `xid` in the entity itself. + +- `--xidmap` (default: `disabled`. Need a path): Store xid to uid mapping to a + directory. Dgraph will save all identifiers used in the load for later use in + other data ingest operations. The mapping will be saved in the path you + provide and you must indicate that same path in the next load. It is + recommended to use this flag if you have full control over your identifiers + (Blank-nodes). Because the identifier will be mapped to a specific UID. + +- `--vault` superflag (and its options): specify the Vault server address, role + id, secret id, and field that contains the encryption key required to decrypt + the encrypted export. + +## Load from S3 + +To bulk load from Amazon S3, you must have either [IAM](#iam-setup) or the +following AWS credentials set via environment variables: + +| Environment Variable | Description | +| ------------------------------------------- | ------------------------------------------------------------------- | +| `AWS_ACCESS_KEY_ID` or `AWS_ACCESS_KEY` | AWS access key with permissions to write to the destination bucket. | +| `AWS_SECRET_ACCESS_KEY` or `AWS_SECRET_KEY` | AWS access key with permissions to write to the destination bucket. | + +### IAM setup + +In AWS, you can accomplish this by doing the following: + +1. Create an + [IAM Role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html) + with an IAM Policy that grants access to the S3 bucket. +2. Depending on whether you want to grant access to an EC2 instance, or to a pod + running on [EKS](https://aws.amazon.com/eks/), you can do one of these + options: + - [Instance Profile](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html) + can pass the IAM Role to an EC2 Instance + - [IAM Roles for Amazon EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) + to attach the IAM Role to a running EC2 Instance + - [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) + to associate the IAM Role to a + [Kubernetes Service Account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/). + +Once your setup is ready, you can execute the bulk load from S3: + +```sh +dgraph bulk -f s3:///bucket-name/directory-with-rdf -s s3:///bucket-name/directory-with-rdf/schema.txt +``` + +## Load from MinIO + +To bulk load from MinIO, you must have the following MinIO credentials set via +environment variables: + +| Environment Variable | Description | +| -------------------- | --------------------------------------------------------------------- | +| `MINIO_ACCESS_KEY` | Minio access key with permissions to write to the destination bucket. | +| `MINIO_SECRET_KEY` | Minio secret key with permissions to write to the destination bucket. | + +Once your setup is ready, you can execute the bulk load from MinIO: + +```sh +dgraph bulk -f minio://minio-server:port/bucket-name/directory-with-rdf -s minio://minio-server:port/bucket-name/directory-with-rdf/schema.txt +``` + +## How to properly bulk load + +Starting from Dgraph v20.03.7, v20.07.3 and v20.11.0 onwards, depending on your +dataset size, you can follow one of the following ways to use bulk loader and +initialize your new Cluster. + +_The following procedure is particularly relevant for Clusters that have +`--replicas` flag greater than 1_ + +### For small datasets + +In case your dataset is small (a few GBs) it would be convenient to start by +initializing just one Alpha node and then let the snapshot be streamed among the +other Alpha replicas. You can follow these steps: + +1. Run bulk loader only on one server +2. Once the `p` directory has been created by the bulk loader, then start + **only** the first Alpha replica +3. Wait for 1 minute to ensure that a snapshot has been taken by the first Alpha + node replica. You can confirm that a snapshot has been taken by looking for + the following message": + +```txt +I1227 13:12:24.202196 14691 draft.go:571] Creating snapshot at index: 30. ReadTs: 4. +``` + +4. After confirming that the snapshot has been taken, you can start the other + Alpha node replicas (number of Alpha nodes must be equal to the `--replicas` + flag value set in the zero nodes). Now the Alpha node (the one started in + point 2) will be printing similar messages: + +```txt +I1227 13:18:16.154674 16779 snapshot.go:246] Streaming done. Sent 1093470 entries. Waiting for ACK... +I1227 13:18:17.126494 16779 snapshot.go:251] Received ACK with done: true +I1227 13:18:17.126514 16779 snapshot.go:292] Stream snapshot: OK +``` + +These messages indicate that all replica nodes are now using the same snapshot. +Thus, all your data is correctly in sync across the cluster. Also, the other +alpha nodes will be printing (in their logs) something similar to: + +```txt +I1227 13:18:17.126621 1720 draft.go:567] Skipping snapshot at 28, because found one at 28 +``` + +### For bigger datasets + +When your dataset is pretty big (e.g. dataset size > 10GB) it will be faster +that you just copy the generated `p` directory (by the bulk loader) among all +the Alphas nodes. You can follow these steps: + +1. Run bulk loader only on one server +2. Copy (or use `rsync`) the `p` directory to the other servers (the servers you + will be using to start the other Alpha nodes) +3. Now, start all Alpha nodes at the same time + +If the process went well **all** Alpha nodes will take a snapshot after 1 +minute. You will be seeing something similar to this in the Alpha logs: + +```txt +I1227 13:27:53.959671 29781 draft.go:571] Creating snapshot at index: 34. ReadTs: 6. +``` + +Note that `snapshot at index` value must be the same within the same Alpha group +and `ReadTs` must be the same value within and among all the Alpha groups. + +## Enterprise Features + +### Multi-tenancy (Enterprise Feature) + +By default, Bulk loader preserves the namespace in the data and schema files. If +there's no namespace information available, it loads the data into the default +namespace. + +Using the `--force-namespace` flag, you can load all the data into a specific +namespace. In that case, the namespace information from the data and schema +files will be ignored. + +For example, to force the bulk data loading into namespace `123`: + +```sh +dgraph bulk -s /tmp/data/1million.schema -f /tmp/data/1million.rdf.gz --force-namespace 123 +``` + +### Encryption at rest (Enterprise Feature) + +Even before the Dgraph cluster starts, we can load data using Bulk Loader with +the encryption feature turned on. Later we can point the generated `p` directory +to a new Alpha server. + +Here's an example to run Bulk Loader with a key used to write encrypted data: + +```bash +dgraph bulk --encryption key-file=./enc_key_file -f data.json.gz -s data.schema --map_shards=1 --reduce_shards=1 --http localhost:8000 --zero=localhost:5080 +``` + +Alternatively, starting with v20.07.0, the `vault_*` options can be used to +decrypt the encrypted export. + +### Encrypting imports (Enterprise Feature) + +The Bulk Loader’s `--encryption key-file=value` option was previously used to +encrypt the output `p` directory. This same option will also be used to decrypt +the encrypted export data and schema files. + +Another option, `--encrypted`, indicates whether the input `rdf`/`json` data and +schema files are encrypted or not. With this switch, we support the use case of +migrating data from unencrypted exports to encrypted import. + +So, with the above two options we have 4 cases: + +1. `--encrypted=true` and no `encryption key-file=value`. + +Error: If the input is encrypted, a key file must be provided. + +2. `--encrypted=true` and `encryption key-file=path-to-key`. + +Input is encrypted and output `p` dir is encrypted as well. + +3. `--encrypted=false` and no `encryption key-file=value`. + +Input is not encrypted and the output `p` dir is also not encrypted. + +4. `--encrypted=false` and `encryption key-file=path-to-key`. + +Input is not encrypted but the output is encrypted. (This is the migration use +case mentioned above). + +Alternatively, starting with v20.07.0, the `vault_*` options can be used instead +of the `--encryption key-file=value` option above to achieve the same effect +except that the keys are sitting in a Vault server. + +You can also use _bulk loader_, to turn off encryption. This will generate a new +unencrypted `p` that will be used by the Alpha process. In this, case you need +to pass `--encryption key-file`, `--encrypted` and `--encrypted_out` flags. + +```bash +# Encryption Key from the file path +dgraph bulk --files "" --schema "" --zero "" \ + --encrypted="true" --encrypted_out="false" \ + --encryption key-file="" + +# Encryption Key from HashiCorp Vault +dgraph bulk --files "" --schema "" --zero "" \ + --encrypted="true" --encrypted_out="false" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` + +In this case, we are also passing the flag `--encrypted=true` as the exported +data has been taken from an encrypted Dgraph cluster and we are also specifying +the flag `--encrypted_out=false` to specify that we want the `p` directory +(_that will be generated by the bulk loader process_) to be unencrypted. + +## Tuning & monitoring + +### Performance Tuning + + + We highly recommend [disabling swap + space](https://askubuntu.com/questions/214805/how-do-i-disable-swap) when + running Bulk Loader. It is better to fix the parameters to decrease memory + usage, than to have swapping grind the loader down to a halt. + + +Flags can be used to control the behavior and performance characteristics of the +bulk loader. You can see the full list by running `dgraph bulk --help`. In +particular, **you should tune the flags so that Bulk Loader doesn't use more +memory than is available as RAM**. If it starts swapping, it will become +incredibly slow. + +**In the map phase**, tweaking the following flags can reduce memory usage: + +- The `--num_go_routines` flag controls the number of worker threads. Lowering + reduces memory consumption. + +- The `--mapoutput_mb` flag controls the size of the map output files. Lowering + reduces memory consumption. + +For bigger datasets and machines with many cores, gzip decoding can be a +bottleneck during the map phase. Performance improvements can be obtained by +first splitting the RDFs up into many `.rdf.gz` files (e.g. 256MB each). This +has a negligible impact on memory usage. + +**The reduce phase** is less memory heavy than the map phase, although can still +use a lot. Some flags may be increased to improve performance, _but only if you +have large amounts of RAM_: + +- The `--reduce_shards` flag controls the number of resultant Dgraph alpha + instances. Increasing this increases memory consumption, but in exchange + allows for higher CPU utilization. + +- The `--map_shards` flag controls the number of separate map output shards. + Increasing this increases memory consumption but balances the resultant Dgraph + alpha instances more evenly. diff --git a/dgraph/reference/howto/importdata/index.mdx b/dgraph/reference/howto/importdata/index.mdx new file mode 100644 index 00000000..54718756 --- /dev/null +++ b/dgraph/reference/howto/importdata/index.mdx @@ -0,0 +1,3 @@ +--- +title: Import data +--- diff --git a/dgraph/reference/howto/importdata/live-loader.mdx b/dgraph/reference/howto/importdata/live-loader.mdx new file mode 100644 index 00000000..a566c0e2 --- /dev/null +++ b/dgraph/reference/howto/importdata/live-loader.mdx @@ -0,0 +1,340 @@ +--- +title: Live import +--- + +You can import data on a running Dgraph instance (which may have prior data) +using Dgraph CLI command [dgraph live](./cli-command-reference.md#dgraph-live) +referred to as **Live Loader**. Live Loader sends mutations to a Dgraph cluster +and has options to handle unique IDs assignment and to update existing data. + + + Live Loader accepts [RDF N-Quad/Triple data](https://www.w3.org/TR/n-quads/) + or JSON in plain or gzipped format. Refers to [data + migration](./about-data-migration) to see how to convert other data formats. + + +## Before you begin + +Verify that you have a local folder `` containing + +- at least one **data file** in RDF or JSON in plain or gzip format with the + data to import +- an optional **schema file**. + +Those files have been generated by an [export](./about-export) or by a +[data migration](./about-data-migration) tool. + +## Importing data on Dgraph Cloud + +1. Obtain dgraph binary or the latest docker image by following the + [installation](./download) instructions. This is required to run Dgraph CLI + command `dgraph live`. +1. Obtain the `GRPC endpoint` of your Dgraph Cloud backend and a valid + `Client API key`. + + An administrator gets those information with the following steps: + + 1. Log into the Dgraph Cloud account, select the backend + 2. In the `Admin` section of the Dgraph Cloud console, go to `Settings` and + copy the value of the `gRPC Endpoint` from the `General` tab. + 3. Access the `API Keys` tab to generate an `Client API Key`. + + + The gRPC endpoint is different from the `GraphQL endpoint` that you can find + in the section `Overview`. The gRPC endpoint looks like + `frozen-mango.grpc.us-west-1.aws.cloud.dgraph.io:443` + + +3. Run the live loader as follows: + + + + + ``` + docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --slash_grpc_endpoint -f /tmp/ -s /tmp/ -t + ``` + + Load multiple data files by using + + ``` + docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --slash_grpc_endpoint -f /tmp -s /tmp/ -t + ``` + + When the path provided with `-f, --files` option is a directory, then all files + ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your + schema file has another extension (.txt or .schema for example). + + + + + ``` + dgraph live --slash_grpc_endpoint -f / -s / -t + ``` + + Load multiple data files by using + + ``` + dgraph live --slash_grpc_endpoint -f /tmp -s /tmp/ -t + ``` + + When the path provided with `-f, --files` option is a directory, then all files + ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your + schema file has another extension (.txt or .schema for example). + + + + +## Batch upserts + +You can use Live loader to update existing data, either to modify existing +predicates are to add new predicates to existing nodes. + +To do so, use the `-U, --upsertPredicate` flag or the `-x, --xidmap` flag. + +### upsertPredicate flag + +Use the `-U, --upsertPredicate` flag to specify the predicate name in your data +that will serve as unique identifier. + +For example: + +```sh +dgraph live --files --schema --upsertPredicate xid +``` + +The upsert predicate used must be present the Dgraph instance or in the schema +file and must be indexed. +For each node, Live loader will use the node name provided in the data file as +the upsert predicate value. +For example if your data file contains + +``` +<_:my.org/customer/1> "John" . +``` + +The previous command creates or updates the node with predicate `xid` equal to +`my.org/customer/1` and will set it's predicate `firstName` with the value +`John`. + +### xidmap flag + +```sh +dgraph live --files --schema --xidmap +``` + +Live loader uses `-x, --xidmap` directory to lookup the `uid` value for each +node name used in the data file or to store the mapping between the node names +and the generated `uid` for every new node. + +## Import data on Dgraph self-hosted + +Run the live loader using the the `-a, --alpha` flag as follows + + + + +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --alpha -f /tmp/ -s /tmp/ +``` + +Load multiple data files by using + +``` +docker run -it --rm -v :/tmp dgraph/dgraph:latest \ + dgraph live --alpha -f /tmp -s /tmp/ +``` + +`--alpha` default value is `localhost:9080`. You can specify a comma separated +list of alphas addresses in the same cluster to distribute the load. + +When the path provided with `-f, --files` option is a directory, then all files +ending in .rdf, .rdf.gz, .json, and .json.gz will be loaded. Be sure that your +schema file has another extension (.txt or .schema for example). + + + + +``` + dgraph live --alpha -f / -s / +``` + +`--alpha` default value is `localhost:9080`. You can specify a comma separated +list of alphas addresses in the same cluster to distribute the load. + + + + +### Load from S3 + +To live load from +[Amazon S3 (Simple Storage Service)](https://aws.amazon.com/s3/), you must have +either permissions to access the S3 bucket from the system performing live load +(see [IAM setup](#iam-setup) below) or explicitly add the following AWS +credentials set via environment variables: + +| Environment Variable | Description | +| ------------------------------------------- | ------------------------------------------------------------------- | +| `AWS_ACCESS_KEY_ID` or `AWS_ACCESS_KEY` | AWS access key with permissions to write to the destination bucket. | +| `AWS_SECRET_ACCESS_KEY` or `AWS_SECRET_KEY` | AWS access key with permissions to write to the destination bucket. | + +#### IAM setup + +In AWS, you can accomplish this by doing the following: + +1. Create an + [IAM Role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html) + with an IAM Policy that grants access to the S3 bucket. +2. Depending on whether you want to grant access to an EC2 instance, or to a pod + running on [EKS](https://aws.amazon.com/eks/), you can do one of these + options: + - [Instance Profile](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_use_switch-role-ec2_instance-profiles.html) + can pass the IAM Role to an EC2 Instance + - [IAM Roles for Amazon EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) + to attach the IAM Role to a running EC2 Instance + - [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) + to associate the IAM Role to a + [Kubernetes Service Account](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/). + +Once your setup is ready, you can execute the live load from S3. As examples: + +```sh +## short form of S3 URL +dgraph live \ + --files s3://// \ + --schema s3://///schema.txt + +## long form of S3 URL +dgraph live \ + --files s3://s3..amazonaws.com// \ + --schema s3://s3..amazonaws.com///schema.txt +``` + + + The short form of the S3 URL requires S3 URL is prefixed with `s3:///` + (noticed the triple-slash `///`). The long form for S3 buckets requires a + double slash, e.g. `s3://`. + + +### Load from MinIO + +To live load from MinIO, you must have the following MinIO credentials set via +environment variables: + +| Environment Variable | Description | +| -------------------- | --------------------------------------------------------------------- | +| `MINIO_ACCESS_KEY` | Minio access key with permissions to write to the destination bucket. | +| `MINIO_SECRET_KEY` | Minio secret key with permissions to write to the destination bucket. | + +Once your setup is ready, you can execute the bulk load from MinIO: + +```sh +dgraph live \ + --files minio://minio-server:port// \ + --schema minio://minio-server:port///schema.txt +``` + +## Enterprise Features + +### Multi-tenancy (Enterprise Feature) + +Since [multi-tenancy](./multitenancy) requires ACL, when using the Live loader +you must provide the login credentials using the `--creds` flag. By default, +Live loader loads the data into the user's namespace. + +[Guardians of the Galaxy](./multitenancy.md#guardians-of-the-galaxy) can load +the data into multiple namespaces. Using `--force-namespace`, a _Guardian_ can +load the data into the namespace specified in the data and schema files. + + + The Live loader requires that the `namespace` from the data and schema files + exist before loading the data. + + +For example, to preserve the namespace while loading data first you need to +create the namespace(s) and then run the live loader command: + +```sh +dgraph live \ + --schema /tmp/data/1million.schema \ + --files /tmp/data/1million.rdf.gz --creds="user=groot;password=password;namespace=0" \ + --force-namespace -1 +``` + +A _Guardian of the Galaxy_ can also load data into a specific namespace. For +example, to force the data loading into namespace `123`: + +```sh +dgraph live \ + --schema /tmp/data/1million.schema \ + --files /tmp/data/1million.rdf.gz \ + --creds="user=groot;password=password;namespace=0" \ + --force-namespace 123 +``` + + + The Live loader requires that the `namespace` from the data and schema files + exist before loading the data. + + +### Encrypted imports (Enterprise Feature) + +A new flag `--encryption key-file=value` is added to the Live Loader. This +option is required to decrypt the encrypted export data and schema files. Once +the export files are decrypted, the Live Loader streams the data to a live Alpha +instance. Alternatively, starting with v20.07.0, the `vault_*` options can be +used to decrypt the encrypted export and schema files. + + + If the live Alpha instance has encryption turned on, the `p` directory will be + encrypted. Otherwise, the `p` directory is unencrypted. + + +For example, to load an encrypted RDF/JSON file and schema via Live Loader: + +```sh +dgraph live \ + --files \ + --schema \ + --encryption key-file= +``` + +You can import your encrypted data into a new Dgraph Alpha node without +encryption enabled. + +```bash +# Encryption Key from the file path +dgraph live --files "" --schema "" \ + --alpha "" --zero "" \ + --encryption key-file="" + +# Encryption Key from HashiCorp Vault +dgraph live --files "" --schema "" \ + --alpha "" --zero "" \ + --vault addr="http://localhost:8200";enc-field="enc_key";enc-format="raw";path="secret/data/dgraph/alpha";role-id-file="./role_id";secret-id-file="./secret_id" + +``` + +## Other Live Loader options + +`--new_uids` (default: `false`): Assign new UIDs instead of using the existing +UIDs in data files. This is useful to avoid overriding the data in a DB already +in operation. + +`--format`: Specify file format (`rdf` or `json`) instead of getting it from +filenames. This is useful if you need to define a strict format manually. + +`-b, --batch` (default: `1000`): Number of N-Quads to send as part of a +mutation. + +`-c, --conc` (default: `10`): Number of concurrent requests to make to Dgraph. +Do not confuse with `-C`. + +`-C, --use_compression` (default: `false`): Enable compression for connections +to and from the Alpha server. + +`--vault` [superflag's](./deploy/cli-command-reference) options specify the +Vault server address, role id, secret id, and field that contains the encryption +key required to decrypt the encrypted export. diff --git a/dgraph/reference/howto/index.mdx b/dgraph/reference/howto/index.mdx new file mode 100644 index 00000000..f048c7d0 --- /dev/null +++ b/dgraph/reference/howto/index.mdx @@ -0,0 +1,3 @@ +--- +title: How To Guides +--- diff --git a/dgraph/reference/howto/jepsen-tests.mdx b/dgraph/reference/howto/jepsen-tests.mdx new file mode 100644 index 00000000..81a72fac --- /dev/null +++ b/dgraph/reference/howto/jepsen-tests.mdx @@ -0,0 +1,33 @@ +--- +title: Run Jepsen Tests +--- + +1. Clone the Jepsen repo at + [https://github.com/jepsen-io/jepsen](https://github.com/jepsen-io/jepsen). + +```sh +git clone git@github.com:jepsen-io/jepsen.git +``` + +2. Run the following command to setup the instances from the repo. + +```sh +cd docker && ./up.sh +``` + +This should start 5 Jepsen nodes in docker containers. + +3. Now ssh into `jepsen-control` container and run the tests. + +```sh +docker exec -it jepsen-control bash +``` + +```sh +root@control:/jepsen# cd dgraph +root@control:/jepsen/dgraph# lein run test -w upsert + +# Specify a --package-url + +root@control:/jepsen/dgraph# lein run test --force-download --package-url https://github.com/dgraph-io/dgraph/releases/download/nightly/dgraph-linux-amd64.tar.gz -w upsert +``` diff --git a/dgraph/reference/howto/load-balancing-nginx.mdx b/dgraph/reference/howto/load-balancing-nginx.mdx new file mode 100644 index 00000000..b1f16af7 --- /dev/null +++ b/dgraph/reference/howto/load-balancing-nginx.mdx @@ -0,0 +1,184 @@ +--- +title: Load Balancing Queries with NGINX +--- + +There might be times when you'll want to set up a load balancer to accomplish +goals such as increasing the utilization of your database by sending queries +from the app to multiple database server replicas. You can follow these steps to +get started with that. + +## Setting up NGINX load balancer using Docker Compose + +### Download ZIP + +Download the contents of this gist's ZIP file and extract it to a directory +called `graph-nginx`, as follows: + +```sh +mkdir dgraph-nginx +cd dgraph-nginx +wget -O dgraph-nginx.zip https://gist.github.com/danielmai/0cf7647b27c7626ad8944c4245a9981e/archive/5a2f1a49ca2f77bc39981749e4783e3443eb3ad9.zip +unzip -j dgraph-nginx.zip +``` + +Two files will be created: `docker-compose.yml` and `nginx.conf`. + +### Start Dgraph cluster + +Start a 6-node Dgraph cluster (3 Dgraph Zero, 3 Dgraph Alpha, replication +setting 3) by starting the Docker Compose config: + +```sh +docker-compose up +``` + +## Setting up NGINX load balancer with Dgraph running directly on the host machine + +You can start your Dgraph cluster directly on the host machine (for example, +with systemd) as follows: + +### Install NGINX using the following `apt-get` command: + +After you have set up your Dgraph cluster, install the latest stable NGINX. On +Debian and Ubuntu systems use the following command: + +```sh +apt-get install nginx +``` + +### Configure NGINX as a load balancer + +Make sure that your Dgraph cluster is up and running (it this case we will refer +to a 6 node cluster). After installing NGINX, you can configure it for load +balancing. You do this by specifying which types of connections to listen to, +and where to redirect them. Create a new configuration file called +`load-balancer.conf`: + +```sh +sudo vim /etc/nginx/conf.d/load-balancer.conf +``` + +and edit it to read as follows: + +```sh +upstream alpha_grpc { + server alpha1:9080; + server alpha2:9080; + server alpha3:9080; +} + +upstream alpha_http { + server alpha1:8080; + server alpha2:8080; + server alpha3:8080; +} + +# $upstream_addr is the ip:port of the Dgraph Alpha defined in the upstream +# Example: 172.25.0.2, 172.25.0.7, 172.25.0.5 are the IP addresses of alpha1, alpha2, and alpha3 +# /var/log/nginx/access.log will contain these logs showing "localhost to " +# for the different backends. By default, NGINX load balancing is round robin. + +log_format upstreamlog '[$time_local] $remote_addr - $remote_user - $server_name $host to: $upstream_addr: $request $status upstream_response_time $upstream_response_time msec $msec request_time $request_time'; + +server { + listen 9080 http2; + access_log /var/log/nginx/access.log upstreamlog; + location / { + grpc_pass grpc://alpha_grpc; + } +} + +server { + listen 8080; + access_log /var/log/nginx/access.log upstreamlog; + location / { + proxy_pass http://alpha_http; + } +} +``` + +Next, disable the default server configuration; on Debian and Ubuntu systems +you’ll need to remove the default symbolic link from the **sites-enabled** +folder. + +```sh +rm /etc/nginx/sites-enabled/default +``` + +Now you can restart `nginx`: + +```sh +systemctl restart nginx +``` + +## Use the increment tool to start a gRPC LB + +In a different shell, run the `dgraph increment` +([docs](./howto/using-increment-tool)) tool against the NGINX gRPC load balancer +(`nginx:9080`): + +```sh +docker-compose exec alpha1 dgraph increment --alpha nginx:9080 --num=10 +``` + +If you have Dgraph installed on your host machine, then you can also run this +from the host: + +```sh +dgraph increment --alpha localhost:9080 --num=10 +``` + +The increment tool uses the Dgraph Go client to establish a gRPC connection +against the `--alpha` flag and transactionally increments a counter predicate +`--num` times. + +## Check logs + +In the NGINX access logs (in the `docker-compose` up shell window), or if you +are not using Docker Compose you can tail logs from `/var/log/nginx/access.log`. +You'll see access logs like the following: + + + With gRPC load balancing, each request can hit a different Alpha node. This + can increase read throughput. + + +```sh +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.7:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.008 msec 1579057922.135 request_time 0.009 +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.2:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.012 msec 1579057922.149 request_time 0.013 +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.5:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.008 msec 1579057922.162 request_time 0.012 +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.7:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.012 msec 1579057922.176 request_time 0.013 +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.2:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.012 msec 1579057922.188 request_time 0.011 +nginx_1 | [15/Jan/2020:03:12:02 +0000] 172.20.0.9 - - - nginx to: 172.20.0.5:9080: POST /api.Dgraph/Query HTTP/2.0 200 upstream_response_time 0.016 msec 1579057922.202 request_time 0.013 +``` + +These logs show that traffic is being load balanced to the following upstream +addresses defined in alpha_grpc in nginx.conf: + +- `nginx to: 172.20.0.7` +- `nginx to: 172.20.0.2` +- `nginx to: 172.20.0.5` + +## Load balancing methods + +By default, NGINX load balancing is done round-robin. By the way There are other +load-balancing methods available such as least connections or IP hashing. To use +a different method than round-robin, specify the desired load-balancing method +in the upstream section of `load-balancer.conf`. + +```sh +# use least connection method +upstream alpha_grpc { + least_conn; + server alpha1:9080; + server alpha2:9080; + server alpha3:9080; +} + +upstream alpha_http { + least_conn; + server alpha1:8080; + server alpha2:8080; + server alpha3:8080; +} +``` diff --git a/dgraph/reference/howto/login-system.mdx b/dgraph/reference/howto/login-system.mdx new file mode 100644 index 00000000..64b28a8b --- /dev/null +++ b/dgraph/reference/howto/login-system.mdx @@ -0,0 +1,59 @@ +--- +title: A Simple Login System +--- + + + This example is based on part of the [transactions in + v0.9](https://blog.dgraph.io/post/v0.9/) blogpost. Error checking has been + omitted for brevity. + + +Schema is assumed to be: + +``` +// @upsert directive is important to detect conflicts. +email: string @index(exact) @upsert . # @index(hash) would also work +pass: password . +``` + +``` +// Create a new transaction. The deferred call to Discard +// ensures that server-side resources are cleaned up. +txn := client.NewTxn() +defer txn.Discard(ctx) + +// Create and execute a query to looks up an email and checks if the password +// matches. +q := fmt.Sprintf(` + { + login_attempt(func: eq(email, %q)) { + checkpwd(pass, %q) + } + } +`, email, pass) +resp, err := txn.Query(ctx, q) + +// Unmarshal the response into a struct. It will be empty if the email couldn't +// be found. Otherwise it will contain a bool to indicate if the password matched. +var login struct { + Account []struct { + CheckPwd bool `json:"checkpwd(pass)"` + } `json:"login_attempt"` +} +err = json.Unmarshal(resp.GetJson(), &login); err != nil { + +// Now perform the upsert logic. +if len(login.Account) == 0 { + fmt.Println("Account doesn't exist! Creating new account.") + mu := &protos.Mutation{ + SetJson: []byte(fmt.Sprintf(`{ "email": %q, "pass": %q }`, email, pass)), + } + _, err = txn.Mutate(ctx, mu) + // Commit the mutation, making it visible outside of the transaction. + err = txn.Commit(ctx) +} else if login.Account[0].CheckPwd { + fmt.Println("Login successful!") +} else { + fmt.Println("Wrong email or password.") +} +``` diff --git a/dgraph/reference/howto/retrieving-debug-information.mdx b/dgraph/reference/howto/retrieving-debug-information.mdx new file mode 100644 index 00000000..ac6ef025 --- /dev/null +++ b/dgraph/reference/howto/retrieving-debug-information.mdx @@ -0,0 +1,224 @@ +--- +title: Retrieving Debug Information +--- + +Each Dgraph data node exposes profile over `/debug/pprof` endpoint and metrics +over `/debug/vars` endpoint. Each Dgraph data node has it's own profiling and +metrics information. Below is a list of debugging information exposed by Dgraph +and the corresponding commands to retrieve them. + +## Metrics Information + +If you are collecting these metrics from outside the Dgraph instance you need to +pass `--expose_trace=true` flag, otherwise there metrics can be collected by +connecting to the instance over localhost. + +``` +curl http://:/debug/vars +``` + +Metrics can also be retrieved in the Prometheus format at +`/debug/prometheus_metrics`. See the [Metrics](./metrics) section for the full +list of metrics. + +## Profiling Information + +Profiling information is available via the `go tool pprof` profiling tool built +into Go. The +["Profiling Go programs"](https://blog.golang.org/profiling-go-programs) Go blog +post will help you get started with using pprof. Each Dgraph Zero and Dgraph +Alpha exposes a debug endpoint at `/debug/pprof/` via the HTTP port. + +``` +go tool pprof http://:/debug/pprof/heap +Fetching profile from ... +Saved Profile in ... +``` + +The output of the command would show the location where the profile is stored. + +In the interactive pprof shell, you can use commands like `top` to get a listing +of the top functions in the profile, `web` to get a visual graph of the profile +opened in a web browser, or `list` to display a code listing with profiling +information overlaid. + +### CPU Profile + +``` +go tool pprof http://:/debug/pprof/profile +``` + +### Memory Profile + +``` +go tool pprof http://:/debug/pprof/heap +``` + +### Block Profile + +Dgraph by default doesn't collect the block profile. Dgraph must be started with +`--profile_mode=block` and `--block_rate=` with N > 1. + +``` +go tool pprof http://:/debug/pprof/block +``` + +### Goroutine stack + +The HTTP page `/debug/pprof/` is available at the HTTP port of a Dgraph Zero or +Dgraph Alpha. From this page a link to the "full goroutine stack dump" is +available (e.g., on a Dgraph Alpha this page would be at +`http://localhost:8080/debug/pprof/goroutine?debug=2`). Looking at the full +goroutine stack can be useful to understand goroutine usage at that moment. + +## Profiling Information with `debuginfo` + +Instead of sending a request to the server for each CPU, memory, and `goroutine` +profile, you can use the `debuginfo` command to collect all of these profiles, +along with several metrics. + +You can run the command like this: + +```sh +dgraph debuginfo -a -z -d +``` + +Your output should look like: + +```log +I0311 14:13:53.243667 32654 run.go:118] using directory /tmp/dgraph-debuginfo037351492 for debug info dump. +I0311 14:13:53.243864 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/heap +I0311 14:13:53.243872 32654 debugging.go:70] please wait... (30s) +I0311 14:13:53.245338 32654 debugging.go:58] saving heap metric in /tmp/dgraph-debuginfo037351492/alpha_heap.gz +I0311 14:13:53.245349 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/profile?seconds=30 +I0311 14:13:53.245357 32654 debugging.go:70] please wait... (30s) +I0311 14:14:23.250079 32654 debugging.go:58] saving cpu metric in /tmp/dgraph-debuginfo037351492/alpha_cpu.gz +I0311 14:14:23.250148 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/state +I0311 14:14:23.250173 32654 debugging.go:70] please wait... (30s) +I0311 14:14:23.255467 32654 debugging.go:58] saving state metric in /tmp/dgraph-debuginfo037351492/alpha_state.gz +I0311 14:14:23.255507 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/health +I0311 14:14:23.255528 32654 debugging.go:70] please wait... (30s) +I0311 14:14:23.257453 32654 debugging.go:58] saving health metric in /tmp/dgraph-debuginfo037351492/alpha_health.gz +I0311 14:14:23.257507 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/jemalloc +I0311 14:14:23.257548 32654 debugging.go:70] please wait... (30s) +I0311 14:14:23.259009 32654 debugging.go:58] saving jemalloc metric in /tmp/dgraph-debuginfo037351492/alpha_jemalloc.gz +I0311 14:14:23.259055 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/trace?seconds=30 +I0311 14:14:23.259091 32654 debugging.go:70] please wait... (30s) +I0311 14:14:53.266092 32654 debugging.go:58] saving trace metric in /tmp/dgraph-debuginfo037351492/alpha_trace.gz +I0311 14:14:53.266152 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/metrics +I0311 14:14:53.266181 32654 debugging.go:70] please wait... (30s) +I0311 14:14:53.276357 32654 debugging.go:58] saving metrics metric in /tmp/dgraph-debuginfo037351492/alpha_metrics.gz +I0311 14:14:53.276414 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/vars +I0311 14:14:53.276439 32654 debugging.go:70] please wait... (30s) +I0311 14:14:53.278295 32654 debugging.go:58] saving vars metric in /tmp/dgraph-debuginfo037351492/alpha_vars.gz +I0311 14:14:53.278340 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/trace?seconds=30 +I0311 14:14:53.278366 32654 debugging.go:70] please wait... (30s) +I0311 14:15:23.286770 32654 debugging.go:58] saving trace metric in /tmp/dgraph-debuginfo037351492/alpha_trace.gz +I0311 14:15:23.286830 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/goroutine?debug=2 +I0311 14:15:23.286886 32654 debugging.go:70] please wait... (30s) +I0311 14:15:23.291120 32654 debugging.go:58] saving goroutine metric in /tmp/dgraph-debuginfo037351492/alpha_goroutine.gz +I0311 14:15:23.291164 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/block +I0311 14:15:23.291192 32654 debugging.go:70] please wait... (30s) +I0311 14:15:23.304562 32654 debugging.go:58] saving block metric in /tmp/dgraph-debuginfo037351492/alpha_block.gz +I0311 14:15:23.304664 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/mutex +I0311 14:15:23.304706 32654 debugging.go:70] please wait... (30s) +I0311 14:15:23.309171 32654 debugging.go:58] saving mutex metric in /tmp/dgraph-debuginfo037351492/alpha_mutex.gz +I0311 14:15:23.309228 32654 debugging.go:68] fetching information over HTTP from http://localhost:8080/debug/pprof/threadcreate +I0311 14:15:23.309256 32654 debugging.go:70] please wait... (30s) +I0311 14:15:23.313026 32654 debugging.go:58] saving threadcreate metric in /tmp/dgraph-debuginfo037351492/alpha_threadcreate.gz +I0311 14:15:23.385359 32654 run.go:150] Debuginfo archive successful: dgraph-debuginfo037351492.tar.gz +``` + +When the command finishes, `debuginfo` returns the tarball's file name. If no +destination has been specified, the file will be created in the same directory +from where you ran the `debuginfo` command. + +The following files contain the metrics collected by the `debuginfo` command: + +``` +dgraph-debuginfo639541060 +├── alpha_block.gz +├── alpha_goroutine.gz +├── alpha_health.gz +├── alpha_heap.gz +├── alpha_jemalloc.gz +├── alpha_mutex.gz +├── alpha_profile.gz +├── alpha_state.gz +├── alpha_threadcreate.gz +├── alpha_trace.gz +├── zero_block.gz +├── zero_goroutine.gz +├── zero_health.gz +├── zero_heap.gz +├── zero_jemalloc.gz +├── zero_mutex.gz +├── zero_profile.gz +├── zero_state.gz +├── zero_threadcreate.gz +└── zero_trace.gz +``` + +### Command parameters + +```txt + -a, --alpha string Address of running dgraph alpha. (default "localhost:8080") + -x, --archive Whether to archive the generated report (default true) + -d, --directory string Directory to write the debug info into. + -h, --help help for debuginfo + -m, --metrics strings List of metrics & profiles to dump in the report. (default [heap,cpu,state,health,jemalloc,trace,metrics,vars,trace,goroutine,block,mutex,threadcreate]) + -s, --seconds uint32 Duration for time-based metric collection. (default 30) + -z, --zero string Address of running dgraph zero. +``` + +#### The metrics flag (`-m`) + +By default, `debuginfo` collects: + +- `heap` +- `cpu` +- `state` +- `health` +- `jemalloc` +- `trace` +- `metrics` +- `vars` +- `trace` +- `goroutine` +- `block` +- `mutex` +- `threadcreate` + +If needed, you can collect some of them (not necessarily all). For example, this +command will collect only `jemalloc` and `health` profiles: + +```sh +dgraph debuginfo -m jemalloc,health +``` + +### Profiles details + +- `cpu profile`: CPU profile determines where a program spends its time while + actively consuming CPU cycles (as opposed to while sleeping or waiting for + I/O). + +- `heap`: Heap profile reports memory allocation samples; used to monitor + current and historical memory usage, and to check for memory leaks. + +- `threadcreate`: Thread creation profile reports the sections of the program + that lead the creation of new OS threads. + +- `goroutine`: Goroutine profile reports the stack traces of all current + goroutines. + +- `block`: Block profile shows where goroutines block waiting on synchronization + primitives (including timer channels). + +- `mutex`: Mutex profile reports the lock contentions. When you think your CPU + is not fully utilized due to a mutex contention, use this profile. + +- `trace`: this capture a wide range of runtime events. Execution tracer is a + tool to detect latency and utilization problems. You can examine how well the + CPU is utilized, and when networking or syscalls are a cause of preemption for + the goroutines. Tracer is useful to identify poorly parallelized execution, + understand some of the core runtime events, and how your goroutines execute. diff --git a/dgraph/reference/howto/update-dgraph-types.mdx b/dgraph/reference/howto/update-dgraph-types.mdx new file mode 100644 index 00000000..217502f8 --- /dev/null +++ b/dgraph/reference/howto/update-dgraph-types.mdx @@ -0,0 +1,84 @@ +--- +title: Update Dgraph types +--- + +You modify Dgraph types (node types and predicates types) by + +- issuing a request to the `/alter` endpoint using the + [HTTP Client](/raw-http#alter-the-dql-schema) +- using an `alter` operation of any [DQL client library](/dql/clients). +- using [Ratel UI](./ratel/schema) +- using the Cloud console through the + [DQL Schema](https://cloud.dgraph.io/_/schema?tab=dqlschema) tab of the Schema + section. + +### Notes about predicate type change + +If data is already stored, existing values are not checked to conform to the +updated predicate type. + +On query, Dgraph tries to convert existing values to the new predicate type and +ignores any that fail conversion. + +If data exists and new indexes are specified, any old index not in the updated +schema is dropped. New indexes are created. + +## Indexes in Background + +Indexes may take long time to compute depending upon the size of the data. + +Indexes can be computed in the background and thus indexing may still be running +after an Alter operation returns. + +To run index computation in the background set the flag `runInBackground` to +`true` . + +```sh +curl localhost:8080/alter?runInBackground=true -XPOST -d $' + name: string @index(fulltext, term) . + age: int @index(int) @upsert . + friend: [uid] @count @reverse . +' | python -m json.tool | less +``` + +```go +op := &api.Operation{} +op.Schema = ` + name: string @index(fulltext, term) . + age: int @index(int) @upsert . + friend: [uid] @count @reverse . +` +op.RunInBackground = true +err = dg.Alter(context.Background(), op) +``` + +### Notes + +If executed before the indexing finishes, queries that require the new indices +will fail with an error notifying that a given predicate is not indexed or +doesn't have reverse edges. + +In a multi-node cluster, it is possible that the alphas will finish computing +indexes at different times. Alphas may return different schema in such a case +until all the indexes are done computing on all the Alphas. + +You can check the background indexing status using the +[Health](./dgraph-alpha#querying-health) query on the `/admin` endpoint. + +An alter operation will fail if one is already in progress with an error +`schema is already being modified. Please retry`. + +Dgraph will report the indexes in the schema only when the indexes are done +computing. + +## Deleting a node type + +Type definitions can be deleted using the Alter endpoint. + +Below is an example deleting the type `Person` using the Go client: + +```go +err := c.Alter(context.Background(), &api.Operation{ + DropOp: api.Operation_TYPE, + DropValue: "Person"}) +``` diff --git a/dgraph/reference/howto/upserts.mdx b/dgraph/reference/howto/upserts.mdx new file mode 100644 index 00000000..09d34b2b --- /dev/null +++ b/dgraph/reference/howto/upserts.mdx @@ -0,0 +1,75 @@ +--- +title: Upserts +--- + +Upsert-style operations are operations where: + +1. A node is searched for, and then +2. Depending on if it is found or not, either: + - Updating some of its attributes, or + - Creating a new node with those attributes. + +The upsert has to be an atomic operation such that either a new node is created, +or an existing node is modified. It's not allowed that two concurrent upserts +both create a new node. + +There are many examples where upserts are useful. Most examples involve the +creation of a 1 to 1 mapping between two different entities. E.g. associating +email addresses with user accounts. + +Upserts are common in both traditional RDBMSs and newer NoSQL databases. Dgraph +is no exception. + +## Upsert Procedure + +In Dgraph, upsert-style behavior can be implemented by users on top of +transactions. The steps are as follows: + +1. Create a new transaction. + +2. Query for the node. This will usually be as simple as + `{ q(func: eq(email, "bob@example.com")) { uid }}`. If a `uid` result is + returned, then that's the `uid` for the existing node. If no results are + returned, then the user account doesn't exist. + +3. In the case where the user account doesn't exist, then a new node has to be + created. This is done in the usual way by making a mutation (inside the + transaction), e.g. the RDF `_:newAccount "bob@example.com" .`. The + `uid` assigned can be accessed by looking up the blank node name `newAccount` + in the `Assigned` object returned from the mutation. + +4. Now that you have the `uid` of the account (either new or existing), you can + modify the account (using additional mutations) or perform queries on it in + whichever way you wish. + +## Upserts in DQL and GraphQL + +You can also use the `Upsert Block` in DQL to achieve the upsert procedure in a +single mutation. The request will contain both the query and the mutation as +explained [here](./dql-mutation.md#Update data with upsert block). + +In GraphQL, you can use the `upsert` input variable in an `add` mutation, as +explained [here](./graphql/mutations/upsert.md). + +## Conflicts + +Upsert operations are intended to be run concurrently, as per the needs of the +application. As such, it's possible that two concurrently running operations +could try to add the same node at the same time. For example, both try to add a +user with the same email address. If they do, then one of the transactions will +fail with an error indicating that the transaction was aborted. + +If this happens, the transaction is rolled back and it's up to the user's +application logic to retry the whole operation. The transaction has to be +retried in its entirety, all the way from creating a new transaction. + +The choice of index placed on the predicate is important for performance. **Hash +is almost always the best choice of index for equality checking.** + + + It's the _index_ that typically causes upsert conflicts to occur. The index is + stored as many key/value pairs, where each key is a combination of the + predicate name and some function of the predicate value (e.g. its hash for the + hash index). If two transactions modify the same key concurrently, then one + will fail. + diff --git a/dgraph/reference/howto/using-debug-tool.mdx b/dgraph/reference/howto/using-debug-tool.mdx new file mode 100644 index 00000000..c47ab314 --- /dev/null +++ b/dgraph/reference/howto/using-debug-tool.mdx @@ -0,0 +1,273 @@ +--- +title: Using the Debug Tool +--- + + + To debug a running Dgraph cluster, first copy the postings ("p") directory to another location. If the Dgraph cluster isn't running, then you can use the same postings directory with the debug tool. If the “p” directory has been encrypted, then the debug tool will need to use the `--keyfile ` option. This file must contain the same key that was used to encrypt the “p” directory. + + +The `dgraph debug` tool can be used to inspect Dgraph's posting list structure. +You can use the debug tool to inspect the data, schema, and indices of your +Dgraph cluster. + +Some scenarios where the debug tool is useful: + +- Verify that mutations committed to Dgraph have been persisted to disk. +- Verify that indices are created. +- Inspect the history of a posting list. +- Parse a badger key into meaningful struct + +## Example Usage + +Debug the p directory. + +```sh +dgraph debug --postings ./p +``` + +Debug the p directory, not opening in read-only mode. This is typically +necessary when the database was not closed properly. + +```sh +dgraph debug --postings ./p --readonly=false +``` + +Debug the p directory, only outputting the keys for the predicate `0-name`. Note +that 0 is the namespace and name is the predicate. + +```sh +dgraph debug --postings ./p --readonly=false --pred=0-name +``` + +Debug the p directory, looking up a particular key: + +```sh +dgraph debug --postings ./p --lookup 01000000000000000000046e616d65 +``` + +Debug the p directory, inspecting the history of a particular key: + +```sh +dgraph debug --postings ./p --lookup 01000000000000000000046e616d65 --history +``` + +Debug an encrypted p directory with the key in a local file at the path +./key_file: + +```sh +dgraph debug --postings ./p --encryption=key-file=./key_file +``` + + + +The key file contains the key used to decrypt/encrypt the db. This key should be +kept secret. As a best practice, + +- Do not store the key file on the disk permanently. Back it up in a safe place + and delete it after using it with the debug tool. + +- If the above is not possible, make sure correct privileges are set on the + keyfile. Only the user who owns the dgraph process should be able to read / + write the key file: `chmod 600` + + + +## Debug Tool Output + +Let's go over an example with a Dgraph cluster with the following schema with a +term index, full-text index, and two separately committed mutations: + +```sh +$ curl localhost:8080/alter -d ' + name: string @index(term) . + url: string . + description: string @index(fulltext) . +' +``` + +```sh +$ curl -H "Content-Type: application/rdf" "localhost:8080/mutate?commitNow=true" -d '{ + set { + _:dgraph "Dgraph" . + _:dgraph "Software" . + _:dgraph "https://github.com/dgraph-io/dgraph" . + _:dgraph "Fast, Transactional, Distributed Graph Database." . + } +}' +``` + +```sh +$ curl -H "Content-Type: application/rdf" "localhost:8080/mutate?commitNow=true" -d '{ + set { + _:badger "Badger" . + _:badger "Software" . + _:badger "https://github.com/dgraph-io/badger" . + _:badger "Embeddable, persistent and fast key-value (KV) database written in pure Go." . + } +}' +``` + +After stopping Dgraph, you can run the debug tool to inspect the postings +directory: + + + The debug output can be very large. Typically you would redirect the debug + tool to a file first for easier analysis. + + +```sh +dgraph debug --postings ./p +``` + +```text +Opening DB: ./p + +prefix = +{d} ns: 0x0 attr: url uid: 1 ts: 5 item: [79, b0100] sz: 79 dcnt: 1 key: 000000000000000000000375726c000000000000000001 +{d} ns: 0x0 attr: url uid: 2 ts: 8 item: [108, b1000] sz: 108 dcnt: 0 isz: 187 icount: 2 key: 000000000000000000000375726c000000000000000002 +{d} ns: 0x0 attr: name uid: 1 ts: 5 item: [51, b0100] sz: 51 dcnt: 1 key: 00000000000000000000046e616d65000000000000000001 +{d} ns: 0x0 attr: name uid: 2 ts: 8 item: [80, b1000] sz: 80 dcnt: 0 isz: 131 icount: 2 key: 00000000000000000000046e616d65000000000000000002 +{i} ns: 0x0 attr: name term: [1] [badger] ts: 8 item: [41, b1000] sz: 41 dcnt: 0 isz: 79 icount: 2 key: 00000000000000000000046e616d650201626164676572 +{i} ns: 0x0 attr: name term: [1] [dgraph] ts: 5 item: [38, b0100] sz: 38 dcnt: 1 key: 00000000000000000000046e616d650201646772617068 +{d} ns: 0x0 attr: description uid: 1 ts: 5 item: [100, b0100] sz: 100 dcnt: 1 key: 000000000000000000000b6465736372697074696f6e000000000000000001 +{d} ns: 0x0 attr: description uid: 2 ts: 8 item: [156, b1000] sz: 156 dcnt: 0 isz: 283 icount: 2 key: 000000000000000000000b6465736372697074696f6e000000000000000002 +{i} ns: 0x0 attr: description term: [8] [databas] ts: 8 item: [49, b1000] sz: 49 dcnt: 0 isz: 141 icount: 3 key: 000000000000000000000b6465736372697074696f6e020864617461626173 +{i} ns: 0x0 attr: description term: [8] [distribut] ts: 5 item: [48, b0100] sz: 48 dcnt: 1 key: 000000000000000000000b6465736372697074696f6e0208646973747269627574 +{i} ns: 0x0 attr: description term: [8] [embedd] ts: 8 item: [48, b1000] sz: 48 dcnt: 0 isz: 93 icount: 2 key: 000000000000000000000b6465736372697074696f6e0208656d62656464 +{i} ns: 0x0 attr: description term: [8] [fast] ts: 8 item: [46, b1000] sz: 46 dcnt: 0 isz: 132 icount: 3 key: 000000000000000000000b6465736372697074696f6e020866617374 +{i} ns: 0x0 attr: description term: [8] [go] ts: 8 item: [44, b1000] sz: 44 dcnt: 0 isz: 85 icount: 2 key: 000000000000000000000b6465736372697074696f6e0208676f +{i} ns: 0x0 attr: description term: [8] [graph] ts: 5 item: [44, b0100] sz: 44 dcnt: 1 key: 000000000000000000000b6465736372697074696f6e02086772617068 +{i} ns: 0x0 attr: description term: [8] [kei] ts: 8 item: [45, b1000] sz: 45 dcnt: 0 isz: 87 icount: 2 key: 000000000000000000000b6465736372697074696f6e02086b6569 +{i} ns: 0x0 attr: description term: [8] [kv] ts: 8 item: [44, b1000] sz: 44 dcnt: 0 isz: 85 icount: 2 key: 000000000000000000000b6465736372697074696f6e02086b76 +{i} ns: 0x0 attr: description term: [8] [persist] ts: 8 item: [49, b1000] sz: 49 dcnt: 0 isz: 95 icount: 2 key: 000000000000000000000b6465736372697074696f6e020870657273697374 +{i} ns: 0x0 attr: description term: [8] [pure] ts: 8 item: [46, b1000] sz: 46 dcnt: 0 isz: 89 icount: 2 key: 000000000000000000000b6465736372697074696f6e020870757265 +{i} ns: 0x0 attr: description term: [8] [transact] ts: 5 item: [47, b0100] sz: 47 dcnt: 1 key: 000000000000000000000b6465736372697074696f6e02087472616e73616374 +{i} ns: 0x0 attr: description term: [8] [valu] ts: 8 item: [46, b1000] sz: 46 dcnt: 0 isz: 89 icount: 2 key: 000000000000000000000b6465736372697074696f6e020876616c75 +{i} ns: 0x0 attr: description term: [8] [written] ts: 8 item: [49, b1000] sz: 49 dcnt: 0 isz: 95 icount: 2 key: 000000000000000000000b6465736372697074696f6e02087772697474656e +{d} ns: 0x0 attr: dgraph.type uid: 1 ts: 5 item: [60, b0100] sz: 60 dcnt: 1 key: 000000000000000000000b6467726170682e74797065000000000000000001 +{d} ns: 0x0 attr: dgraph.type uid: 2 ts: 8 item: [88, b1000] sz: 88 dcnt: 0 isz: 148 icount: 2 key: 000000000000000000000b6467726170682e74797065000000000000000002 +{i} ns: 0x0 attr: dgraph.type term: [2] [Software] ts: 8 item: [50, b1000] sz: 50 dcnt: 0 isz: 144 icount: 3 key: 000000000000000000000b6467726170682e747970650202536f667477617265 +{s} ns: 0x0 attr: url ts: 3 item: [23, b0001] sz: 23 dcnt: 0 isz: 23 icount: 1 key: 010000000000000000000375726c +{s} ns: 0x0 attr: name ts: 3 item: [33, b0001] sz: 33 dcnt: 0 isz: 33 icount: 1 key: 01000000000000000000046e616d65 +{s} ns: 0x0 attr: description ts: 3 item: [51, b0001] sz: 51 dcnt: 0 isz: 51 icount: 1 key: 010000000000000000000b6465736372697074696f6e +{s} ns: 0x0 attr: dgraph.type ts: 1 item: [50, b0001] sz: 50 dcnt: 0 isz: 50 icount: 1 key: 010000000000000000000b6467726170682e74797065 +{s} ns: 0x0 attr: dgraph.drop.op ts: 1 item: [45, b0001] sz: 45 dcnt: 0 isz: 45 icount: 1 key: 010000000000000000000e6467726170682e64726f702e6f70 +{s} ns: 0x0 attr: dgraph.graphql.xid ts: 1 item: [64, b0001] sz: 64 dcnt: 0 isz: 64 icount: 1 key: 01000000000000000000126467726170682e6772617068716c2e786964 +{s} ns: 0x0 attr: dgraph.graphql.schema ts: 1 item: [59, b0001] sz: 59 dcnt: 0 isz: 59 icount: 1 key: 01000000000000000000156467726170682e6772617068716c2e736368656d61 +{s} ns: 0x0 attr: dgraph.graphql.p_query ts: 1 item: [71, b0001] sz: 71 dcnt: 0 isz: 71 icount: 1 key: 01000000000000000000166467726170682e6772617068716c2e705f7175657279 + ns: 0x0 attr: dgraph.graphql ts: 1 item: [98, b0001] sz: 98 dcnt: 0 isz: 98 icount: 1 key: 020000000000000000000e6467726170682e6772617068716c + ns: 0x0 attr: dgraph.graphql.persisted_query ts: 1 item: [105, b0001] sz: 105 dcnt: 0 isz: 105 icount: 1 key: 020000000000000000001e6467726170682e6772617068716c2e7065727369737465645f7175657279 + +Found 34 keys +``` + +Each line in the debug output contains a prefix indicating the type of the key: +`{d}`: Data key; `{i}`: Index key; `{c}`: Count key; `{r}`: Reverse key; `{s}`: +Schema key. In the debug output above, we see data keys, index keys, and schema +keys. + +Each index key has a corresponding index type. For example, in +`attr: name term: [1] [dgraph]` the `[1]` shows that this is the term index +([0x1][tok_term]); in `attr: description term: [8] [fast]`, the `[8]` shows that +this is the full-text index ([0x8][tok_fulltext]). These IDs match the index IDs +in [tok.go][tok]. + +[tok_term]: + https://github.com/dgraph-io/dgraph/blob/ce82aaafba3d9e57cf5ea1aeb9b637193441e1e2/tok/tok.go#L39 +[tok_fulltext]: + https://github.com/dgraph-io/dgraph/blob/ce82aaafba3d9e57cf5ea1aeb9b637193441e1e2/tok/tok.go#L48 +[tok]: + https://github.com/dgraph-io/dgraph/blob/ce82aaafba3d9e57cf5ea1aeb9b637193441e1e2/tok/tok.go#L37-L53 + +## Key Lookup + +Every key can be inspected further with the `--lookup` flag for the specific +key. + +```sh +dgraph debug --postings ./p --lookup 000000000000000000000b6465736372697074696f6e020866617374 +``` + +```text +Opening DB: ./p + +Key: 000000000000000000000b6465736372697074696f6e020866617374 Length: 2 Is multi-part list? false Uid: 1 Op: 0 + Uid: 2 Op: 0 +``` + +For data keys, a lookup shows its type and value. Below, we see that the key for +`attr: url uid: 1` is a string value. + +```sh +$ dgraph debug --postings ./p --lookup 000000000000000000000375726c000000000000000001 +``` + +```text +Opening DB: ./p + +Key: 000000000000000000000375726c000000000000000001 Length: 1 Is multi-part list? false Uid: 18446744073709551615 Op: 1 Type: STRING. String Value: "https://github.com/dgraph-io/dgraph +``` + +For index keys, a lookup shows the UIDs that are part of this index. Below, we +see that the `fast` index for the `` predicate has UIDs 0x1 and +0x2. + +```sh +$ dgraph debug --postings ./p --lookup 000000000000000000000b6465736372697074696f6e020866617374 +``` + +```text +Opening DB: ./p +Key: 000000000000000000000b6465736372697074696f6e020866617374 Length: 2 Is multi-part list? false Uid: 1 Op: 0 + Uid: 2 Op: 0 +``` + +## Key history + +You can also look up the history of values for a key using the `--history` +option. + +```sh +dgraph debug --postings ./p --lookup 000000000000000000000b6465736372697074696f6e020866617374 --history +``` + +```text +Opening DB: ./p + +==> key: 000000000000000000000b6465736372697074696f6e020866617374. PK: UID: 0, Attr: 0-description, IsIndex: true, Term: 0 +ts: 8 {item}{discard}{complete} + Num uids = 2. Size = 16 + Uid = 1 + Uid = 2 + +ts: 7 {item}{delta} + Uid: 2 Op: 1 + +ts: 5 {item}{delta} + Uid: 1 Op: 1 +``` + +Above, we see that UID 0x1 was committed to this index at ts 5, and UID 0x2 was +committed to this index at ts 7. + +The debug output also shows UserMeta information: + +- `{complete}`: Complete posting list +- `{uid}`: UID posting list +- `{delta}`: Delta posting list +- `{empty}`: Empty posting list +- `{item}`: Item posting list +- `{deleted}`: Delete marker + +## Parse Key + +You can parse a key into its constituent components using `--parse_key`. This +does not require a p directory. + +```sh +dgraph debug --parse_key 000000000000000000000b6467726170682e74797065000000000000000001 +``` + +```text +{d} Key: UID: 1, Attr: 0-dgraph.type, Data key +``` diff --git a/dgraph/reference/howto/using-increment-tool.mdx b/dgraph/reference/howto/using-increment-tool.mdx new file mode 100644 index 00000000..3a0d9344 --- /dev/null +++ b/dgraph/reference/howto/using-increment-tool.mdx @@ -0,0 +1,105 @@ +--- +title: Using the Increment Tool +--- + +The `dgraph increment` tool increments a counter value transactionally. The +increment tool can be used as a health check that an Alpha is able to service +transactions for both queries and mutations. + +## Example Usage + +Increment the default predicate (`counter.val`) once. If the predicate doesn't +yet exist, then it will be created starting at counter 0. + +```sh +$ dgraph increment +``` + +Increment the counter predicate against the Alpha running at address `--alpha` +(default: `localhost:9080`): + +```sh +$ dgraph increment --alpha=192.168.1.10:9080 +``` + +Increment the counter predicate specified by `--pred` (default: `counter.val`): + +```sh +$ dgraph increment --pred=counter.val.healthcheck +``` + +Run a read-only query for the counter predicate and does not run a mutation to +increment it: + +```sh +$ dgraph increment --ro +``` + +Run a best-effort query for the counter predicate and does not run a mutation to +increment it: + +```sh +$ dgraph increment --be +``` + +Run the increment tool 1000 times every 1 second: + +```sh +$ dgraph increment --num=1000 --wait=1s +``` + +## Increment Tool Output + +```sh + Run increment a few times +$ dgraph increment +0410 10:31:16.379 Counter VAL: 1 [ Ts: 1 ] +$ dgraph increment +0410 10:34:53.017 Counter VAL: 2 [ Ts: 3 ] +$ dgraph increment +0410 10:34:53.648 Counter VAL: 3 [ Ts: 5 ] + + Run read-only queries to read the counter a few times +$ dgraph increment --ro +0410 10:34:57.35 Counter VAL: 3 [ Ts: 7 ] +$ dgraph increment --ro +0410 10:34:57.886 Counter VAL: 3 [ Ts: 7 ] +$ dgraph increment --ro +0410 10:34:58.129 Counter VAL: 3 [ Ts: 7 ] + + Run best-effort query to read the counter a few times +$ dgraph increment --be +0410 10:34:59.867 Counter VAL: 3 [ Ts: 7 ] +$ dgraph increment --be +0410 10:35:01.322 Counter VAL: 3 [ Ts: 7 ] +$ dgraph increment --be +0410 10:35:02.674 Counter VAL: 3 [ Ts: 7 ] + + Run a read-only query to read the counter 5 times +$ dgraph increment --ro --num=5 +0410 10:35:18.812 Counter VAL: 3 [ Ts: 7 ] +0410 10:35:18.813 Counter VAL: 3 [ Ts: 7 ] +0410 10:35:18.815 Counter VAL: 3 [ Ts: 7 ] +0410 10:35:18.817 Counter VAL: 3 [ Ts: 7 ] +0410 10:35:18.818 Counter VAL: 3 [ Ts: 7 ] + + Increment the counter 5 times +$ dgraph increment --num=5 +0410 10:35:24.028 Counter VAL: 4 [ Ts: 8 ] +0410 10:35:24.061 Counter VAL: 5 [ Ts: 10 ] +0410 10:35:24.104 Counter VAL: 6 [ Ts: 12 ] +0410 10:35:24.145 Counter VAL: 7 [ Ts: 14 ] +0410 10:35:24.178 Counter VAL: 8 [ Ts: 16 ] + + Increment the counter 5 times, once every second. +$ dgraph increment --num=5 --wait=1s +0410 10:35:26.95 Counter VAL: 9 [ Ts: 18 ] +0410 10:35:27.975 Counter VAL: 10 [ Ts: 20 ] +0410 10:35:28.999 Counter VAL: 11 [ Ts: 22 ] +0410 10:35:30.028 Counter VAL: 12 [ Ts: 24 ] +0410 10:35:31.054 Counter VAL: 13 [ Ts: 26 ] + + If the Alpha is too busy or unhealthy, the tool will timeout and retry. +$ dgraph increment +0410 10:36:50.857 While trying to process counter: Query error: rpc error: code = DeadlineExceeded desc = context deadline exceeded. Retrying... +``` diff --git a/dgraph/reference/learn/administrator/index.mdx b/dgraph/reference/learn/administrator/index.mdx new file mode 100644 index 00000000..4d891330 --- /dev/null +++ b/dgraph/reference/learn/administrator/index.mdx @@ -0,0 +1,19 @@ +--- +title: Dgraph for Administrators +description: + From learning the basics of graph databases to advanced functions and + capabilities, Dgraph docs have the information you need. +--- + +### Recommended learning path + +- See [Dgraph Overview](./dgraph-overview) for an introduction to Dgraph + database and a presentation of Dgraph cluster architecture. +- Get familiar with some terms in the [Glossary](./dgraph-glossary) +- **Dgraph Cloud** + - Learn from [Dgraph Cloud](./cloud) section of our documentation. + - Refer to [Administering Tasks](./cloud/admin). +- **Dgraph Community and Dgraph Enterprise** (self-managed) + - Refer to [Self-managed cluster](./deploy) to learn how to deploy and manage + Dgraph database in a variety of self-managed deployment scenarios. + - [Dgraph Administration](./deploy/admin) describes the admin operations. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/01-dm-101-introduction.mdx b/dgraph/reference/learn/data-engineer/data-model-101/01-dm-101-introduction.mdx new file mode 100644 index 00000000..e2d24732 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/01-dm-101-introduction.mdx @@ -0,0 +1,48 @@ +--- +title: Graphs and Natural Data Modeling +description: + Graphs provide an alternative to tabular data structures, allowing for a more + natural way to store and retrieve data +--- + +Graphs provide an alternative to tabular data structures, allowing for a more +natural way to store and retrieve data. + +For example, you could imagine that we are modeling a conversation within a +family: + +- A `father`, who starts a conversation about going to get ice cream. +- A `mother`, who comments that she would also like ice cream. +- A `child`, who likes the idea of the family going to get ice cream. + +This conversation could easily occur in the context of a modern social media or +messaging app, so you can imagine the data model for such an app as follows: + +![A graph diagram for a social media app's data model](/images/data-model/evolution-3.png) + +For the remainder of this module, we will use this as our example application: a +basic social media or messaging app, with a data model that includes `people`, +`posts`, `comments`, and `reactions`. + +A graph data model is different from a relational model. A graph focuses on the +relationships between information, whereas a relational model focuses on storing +similar information in a list. The graph model received its name because it +resembles a graph when illustrated. + +- Data objects are called _nodes_ and are illustrated with a circle. +- Properties of nodes are called _predicates_ and are illustrated as a panel on + the node. +- Relationships between nodes are called _edges_ and are illustrated as + connecting lines. Edges are named to describe the relationship between two + nodes. A `reaction` is an example of an edge, in which a person reacts to a + post. + +Some illustrations omit the predicates panel and show only the nodes and edges. + +Referring back to the example app, the `father`, `mother`, `child`, `post`, and +`comment` are nodes. The name of the people, the post's title, and text of the +comment are the predicates. The natural relationships between the authors of the +posts, authors of the comments, and the comments' topics are edges. + +As you can see, a graph models data in a natural way that shows the +relationships (edges) between the entities (nodes) that contain predicates. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/02-relational-data-model.mdx b/dgraph/reference/learn/data-engineer/data-model-101/02-relational-data-model.mdx new file mode 100644 index 00000000..f14091be --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/02-relational-data-model.mdx @@ -0,0 +1,77 @@ +--- +title: Relational Data Modeling +description: + Changing the schema in a relational model directly affects the data that is + held by the model, and can impact database query performance. +--- + +This section considers the example social media app introduced in the previous +section and discusses how it could be modeled with a traditional relational data +model, such as those used by SQL databases. + +With relational data models, you create lists of each type of data in tables, +and then add columns in those tables to track the attributes of that table's +data. Looking back on our data, we remember that we have three main types, +`People`, `Posts`, and `Comments` + +![Three tables](/images/data-model/evolution-4.png) + +To define relationships between records in two tables, a relational data model +uses numeric identifiers called _foreign keys_, that take the form of table +columns. Foreign keys can only model one-to-many relationship types, such as the +following: + +- The relationship from `Posts` to `People`, to track contributors (authors, + editors, etc.) of a `Post` +- The relationship from `Comments` to `People`, to track the author of the + comment +- The relationship from `Comments` to `Posts`, to track on which post comments + were made +- The relationship between rows in the `Comments` table, to track comments made + in reply to other comments (a self-reference relationship) + +![Relationships between rows in tables](/images/data-model/evolution-5.png) + +The limitations of foreign keys become apparent when your app requires you to +model many-to-many relationships. In our example app, a person can like many +posts or comments, and posts and comments can be liked by many people. The only +way to model this relationship in a relational database is to create a new +table. This so-called _pivot table_ usually does not store any information +itself, it just stores links between two other tables. + +In our example app, we decided to limit the number of tables by having a single +“Likes” table instead of having `people_like_posts` and `people_like_comments` +tables. None of these solutions is perfect, though, and there is a trade-off +between having a lower table count or having more empty fields in our tables +(also known as "sparse data"). + +![An illustration of sparse data when creating a Likes table](/images/data-model/evolution-6.png) + +Because foreign keys cannot be added in reference to entities that do not exist, +adding new posts and authors requires additional work. To add a new post and a +new author at the same time (in the Posts and People tables), we must first add +a row to the `People` table and then retrieve their primary key and associate it +with the new row in the `Posts` table. + +![Adding a post and an author at the same time](/images/data-model/evolution-7.png) + +By now, you might ask yourself: How does a relational model expand to handle new +data, new types of data, and new data relationships? + +When new data is added to the model, the model will change to accept the data. +The simplest type of change is when you add a new row to a table. The new row +adopts all of the columns from the table. When you add a new property to a +table, the model changes and adds the new property as a column on every existing +and future row for the table. And when you add a new data type to the database, +you create a new table with its own pre-defined columns. This new data type +might link to existing tables or need more pivot tables for a new many-to-many +relationship. So, with each data type added to your relational data model, the +need to add foreign keys and pivot tables increases, making support for querying +every potential data relationship increasingly unwieldy. + +![Expanding a relational data model means more pivot tables](/images/data-model/evolution-8.png) + +Properties are stored as new columns and relationships require new columns and +sometimes new pivot tables. Changing the schema in a relational model directly +effects the data that is held by the model, and can impact database query +performance. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/03-graph-data-model.mdx b/dgraph/reference/learn/data-engineer/data-model-101/03-graph-data-model.mdx new file mode 100644 index 00000000..410c6f8e --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/03-graph-data-model.mdx @@ -0,0 +1,66 @@ +--- +title: Graph Data Modeling +description: + When modeling a graph, focus on the relationships between nodes. In a graph, + you can change the model without affecting the underlying data +--- + +In this section we will take our example social media app and see how it could +be modeled in a graph. + +The concept of modeling data in a graph starts by placing dots, which represent +nodes. Nodes can have one or more predicates (properties). A `person` may have +predicates for their name, age, and gender. A `post` might have a predicate +value showing when it was posted, and a value containing the contents of the +post. A `comment` would most likely have a predicate containing the comment +string. However, any one node could have other predicates that are not contained +on any other node. Each node represents an individual item, hence the singular +naming structure. + +![Nodes used in the example social media app](/images/data-model/evolution-9.png) + +As graphs naturally resemble the data you are modeling, the individual nodes can +be moved around this conceptual space to clearly show the relationships between +these data nodes. Relationships are formed in graphs by creating an edge between +them. In our app, a post has an author, a post can have comments, a comment has +an author, and a comment can have a reply. + +For sake of illustration we will also show the family tree information. The +`father` and the `mother` are linked together with a `spouse` edge, and both +parents are related to the child along a `child` edge. + +![Illustration of relationships as edges](/images/data-model/evolution-10.png) + +With a graph, you can also name the inverse relations. From here we can quickly +see the inverse relationships. A `Post` has an `Author` and a `Person` has +`Posts`. A `Post` has `Comments` and a `Comment` is on a `Post`. A `Comment` has +an `Author`, and a `Person` has `Comments`. A `Parent` has a `Child`, and a +`Child` has a `Parent`. + +You create many-to-many relationships in the same way that you make one-to-many +relationships, with an edge between nodes. + +Adding groups of related data occurs naturally within a graph. The data is sent +as a complete object instead of separate pieces of information that needs to be +connected afterwards. Adding a new person and a new post to our graph is a +one-step process. New data coming in does not have to be related to any existing +data. You can insert this whole data object with 3 people, a post, and a +comment; all in one step. + +When new data is added to the model, the model will change to accept the data. +Every change to a graph model is received naturally. When you add a new node +with a data type, you are simply creating a new dot in space and applying a type +to it. The new node does not include any predicates or relationships other than +what you define for it. When you want to add a new predicate onto an existing +data type, the model changes and adds the new property onto the items that you +define. Other items not specifically given the new property type are not +changed. When you add a new data type to the database, a new node is created, +ready to receive new edges and predicates. + +![Illustration of expanding a graph data model](/images/data-model/evolution-11.png) + +The key to remember when modeling a graph is to focus on the relationships +between nodes. In a graph you can change the model without affecting the +underlying data. Because the graph is stored as individual nodes, you can adjust +predicates of individual nodes, create edges between sets of nodes, and add new +node types without affecting any of the other nodes. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/04-rel-query.mdx b/dgraph/reference/learn/data-engineer/data-model-101/04-rel-query.mdx new file mode 100644 index 00000000..27c6392d --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/04-rel-query.mdx @@ -0,0 +1,69 @@ +--- +title: Query Data in a Relational Model +description: + In a relational model, tables are stored in files. When you request data from + a file either a table scan takes place or an index is invoked +--- + +Storing our data is great, but the best data model would be useless without the +ability to query the data our app requires. So, how does information get +retrieved in a relational model compared to a graph model? + +In a relational model, tables are stored in files. To support the sample social +media app described in this tutorial, you would need four files: `People`, +`Posts`, `Comments`, and `Likes`. + +![Visualization of four files](/images/data-model/evolution-12.png) + +When you request data from a file, one of two things happens: either a table +scan takes place or an index is invoked. A table scan happens when filtering +upon data that is not indexed. To find this data, the whole file must be read +until the data is found or the end of the file is reached. In our example app we +have a post titled, “Ice Cream?”. If the title is not indexed, every post in the +file would need to be read until the database finds the post entitled, “Ice +Cream?”. This method would be like reading the entire dictionary to find the +definition of a single word: very time-consuming. This process could be +optimized by creating an index on the post’s title column. Using an index speeds +up searches for data, but it can still be time-consuming. + +### What is an index? + +An index is an algorithm used to find the location of data. Instead of scanning +an entire file looking for a piece of data, an index is used to aggregate the +data into "chunks" and then create a decision tree pointing to the individual +chunks of data. Such a decision tree could look like the following: + +![Image showing a tree to lookup the term graph from an index. The tree should be in a “graph” type format with circles instead of squares.](/images/data-model/evolution-13.png) + +Relational data models rely heavily on indexes to quickly find the requested +data. Because the data required to answer a single question will usually live in +multiple tables, you must use multiple indexes each time that related data is +joined together. And because you can't index every column, some types of queries +won't benefit from indexing. + +### How data is joined in a relational model + +In a relational model, the request's response must be returned as a single table +consisting of columns and rows. To form this single table response, data from +multiple tables must be joined together. In our app example, we found the post +entitled “Ice Cream?” and also found the comments, “Yes!”, “When?”, and “After +Lunch”. Each of these comments also has a corresponding author: `Mother`, +`Child`, and `Father`. Because there is only one post as the root of the join, +the post is duplicated to join to each comment. + +![TBD alt text](/images/data-model/evolution-15.png) + +Flattening query results can lead to many duplicate rows. Consider the case +where you also want to query which people liked the comments on this example +post. This query requires mapping a many-to-many relationship, which invokes two +additional index searches to get the list of likes by `person`. + +![TBD alt text](/images/data-model/evolution-16.png) + +Joining all of this together would form a single table containing many +duplicates: duplicate `posts` and duplicate `comments`. Another side effect of +this response approach is that it is likely that empty data will exist in the +response. + +In the next section, you will see that querying a graph data model avoids the +issues that you would face when querying a relational data model. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/05-graph-query.mdx b/dgraph/reference/learn/data-engineer/data-model-101/05-graph-query.mdx new file mode 100644 index 00000000..64cbd6ea --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/05-graph-query.mdx @@ -0,0 +1,86 @@ +--- +title: Query Data in a Graph Model +description: + When data is requested from a graph, a root function determines which nodes + are the starting points. This function uses indexes to match ndoes quickly. +--- + +As you will see in this section, the data model we use determines the ease with +which we can query for different types of data. The more your app relies on +queries about the relationships between different types of data, the more you +will benefit from querying data using a graph data model. + +In a graph data model, each record (i.e., a `person`, `post` or `comment`) is +stored as a data _object_ (sometimes also called a _node_). In the example +social media app described in this tutorial, we have objects for individual +people, posts, and comments. + +![Image of many objects of people, posts, and comments(not showing the relationships for clarity of the objects themselves)](/images/data-model/evolution-18.png) + +When data is requested from the graph, a root function determines which nodes +are selected for the starting points. This root function will use indexes to +determine which nodes match quickly. In our app example, we want to start with +the root being the post with the title “Ice Cream?”. This type of lookup will +evoke an index on the post's title, much like indexes work in a relational +model. The indexes at the root of the graph use the full index tree to find the +data. + +Connecting edges together to form a connected graph is called _traversal_. After +arriving at our `post`, “Ice Cream?”, we traverse the graph to arrive at the +post's `comments`. To find the post's `author`, we traverse the next step to +arrive at the people who authored the comment. This process follows the natural +progression of related data, and graph data models allow us to query our data to +follow this progression efficiently. + +What do we mean by efficiently? A graph data model lets you traverse from one +node to a distantly-related node without the need for anything like pivot +tables. This means that queries based on edges can be updated easily, with no +need to change the schema to support new many-to-many relationships. And, with +no need to build tables specifically for query optimization, you can adjust your +schema quickly to accommodate new types of data without adversely impacting +existing queries. + +![Image of post with connected comments and author](/images/data-model/evolution-19.png) + +A feature of a graph model is that related edges can be filtered anywhere within +the graph's traversing. When you want to know the most recent `comment` on your +post or the last `person` to like the comment, filters can be applied to the +edge. + +![Image of filters along an edge](/images/data-model/evolution-21.png) + +When filters get applied along an edge, only the nodes that match the edge are +filtered - not all of the nodes in the graph. Applying this logic reduces the +size of the graph and makes index trees smaller. The smaller an index tree is, +the faster that it can be resolved. + +In a graph model, data is returned in an object-oriented format. Any related +data is joined to its parent within the object in a nested structure. + +```json +{ + "title": "IceCream?", + "comments": [ + { + "title": "Yes!", + "author": { + "name": "Mother" + } + }, + { + "title": "When?", + "author": { + "name": "Child" + } + }, + { + "title": "After Lunch", + "author": { + "name": "Father" + } + } + ] +} +``` + +This object-oriented structure allows data to be joined without duplication. diff --git a/dgraph/reference/learn/data-engineer/data-model-101/06-dm-101-conclusion.mdx b/dgraph/reference/learn/data-engineer/data-model-101/06-dm-101-conclusion.mdx new file mode 100644 index 00000000..6497a23d --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/06-dm-101-conclusion.mdx @@ -0,0 +1,20 @@ +--- +title: Conclusion +description: + Congratulations on finishing Graph Data Models 101. Here is further reading + and where you can learn more. +--- + +Congratulations on finishing the Dgraph learn course: **Graph Data Models 101**! + +Now that you have an overview and understanding of + +- [what a graph is](./01-dm-101-introduction), +- how a graph differs from a [relational model](./02-relational-data-model), +- [how to model a graph](./03-graph-data-model), +- and [how to query a graph](./05-graph-query), + +you are ready to jump into using Dgraph, the only truly native distributed graph +database. + +Check out [Dgraph Cloud](https://dgraph.io/cloud). diff --git a/dgraph/reference/learn/data-engineer/data-model-101/index.mdx b/dgraph/reference/learn/data-engineer/data-model-101/index.mdx new file mode 100644 index 00000000..f4e9581b --- /dev/null +++ b/dgraph/reference/learn/data-engineer/data-model-101/index.mdx @@ -0,0 +1,47 @@ +--- +title: Graph Data Models 101 +description: + Learn data modeling using relational databases compared to graph databases + such as Dgraph +--- + +When building an app, you might wonder which database is the best choice. A +traditional relational database that you can query using SQL is a familiar +choice, but does a relational database really provide a natural fit to your data +model, and the performance that you need if your app goes viral and needs to +scale up rapidly? + +This tutorial takes a deeper look at data modeling using relational databases +compared to graph databases like Dgraph, to give you a better understanding of +the advantages of using a graph database to power your app. If you aren't +familiar with graph data models or graph databases, this tutorial was written +for you. + +### Learning Goals + +In this tutorial, you will learn about graphs, and how a graph database is +different from a database built on a relational data model. You will not find +any code or syntax in this tutorial, but rather a comparison of graphs and +relational data models. By the end of this tutorial, you will be able to answer +the following questions: + +- What is a graph? +- How are graphs different from relational models? +- How is data modeled in a graph? +- How is data queried from a graph? + +Along the way, you might find that a graph is the right fit for the data model +used by your app. Any data model that tracks lots of different relationships (or +_edges_) between various data types is a good candidate for a graph model. + +Whether this is the first time you are learning about graphs or looking to +deepen your understanding of graphs with some concrete examples, this tutorial +will help you along your journey. + +If you are already familiar with graphs, you can jump right into our coding +example for [React](/courses/messageboardapp/react/overview/introduction/). + +If you are a SQL user and you'd like to learn how common SQL syntax maps to +similar GraphQL syntax so you can use your SQL knowledge to jump-start your +GraphQL learning journey, see +[Introduction to Dgraph for SQL Users](/courses/datamodel/sql-to-dgraph/overview/introduction). diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/index.mdx new file mode 100644 index 00000000..7b207ae2 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/index.mdx @@ -0,0 +1,107 @@ +--- +title: Get Started with Dgraph +description: + From learning the basics of graph databases to advanced functions and + capabilities, Dgraph docs have the information you need. +--- + +**Welcome to Dgraph. Here are a series of tutorials for getting started:** + +
+
+
+
+
+ +

+ Run dgraph and learn about nodes and edges, as well as basic queries + and mutations. +

+
+
+
+
+ +

+ Learn about UID operations, updating nodes, and traversals. +

+
+
+
+
+ +

+ Learn about data types, indexing, filtering, and reverse traversals. +

+
+
+
+
+ +

+ Learn about multi-language strings and operations on them using the + language tags. +

+
+
+
+
+ +

+ Learn about string indices, modeling tweet graph, and keyword-based + searching. +

+
+
+
+
+ +

+ Learn about full-text search and regular expression search. +

+
+
+
+
+ +

+ Learn about searching user names using fuzzy search on social + graphs. +

+
+
+
+
+ +

+ Easily build location-aware apps using native geolocation features +

+
+
+
+
+
diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-1/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-1/index.mdx new file mode 100644 index 00000000..abcee964 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-1/index.mdx @@ -0,0 +1,236 @@ +--- +title: Get Started with Dgraph - Introduction +--- + +**Welcome to getting started with Dgraph.** + +[Dgraph](https://dgraph.io) is an open-source, transactional, distributed, +native Graph Database. Here is the first tutorial of the get started series on +using Dgraph. + +In this tutorial, we'll learn about: + +- Running Dgraph using the `dgraph/standalone` docker image. +- Running the following basic operations using Dgraph's UI Ratel, +- Creating a node. +- Creating an edge between two nodes. +- Querying for the nodes. + +Our use case will represent a person named "Ann", age 28, who "follows" in +social media, a person named "Ben", age 31. + +You can see the accompanying video below. + + + +--- + +## Running Dgraph + +Running the `dgraph/standalone` docker image is the quickest way to get started +with Dgraph. This standalone image is meant for quickstart purposes only. It is +not recommended for production environments. + +Ensure that [Docker](https://docs.docker.com/install/) is installed and running +on your machine. + +Now, it's just a matter of running the following command, and you have Dgraph up +and running. + +```sh +docker run --rm -it -p 8080:8080 -p 9080:9080 dgraph/standalone:latest +``` + +### Nodes and Relationships + +The mental picture of the use case may be a graph where we have 2 nodes +representing the 2 persons and an relationship representing the fact that "Ann" +follows "Ben" : + +![A simple graph](/images/tutorials/1/gs-1.png) + +Dgraph is using those very same concepts, making it simple to store and +manipulate your data. + +We will then create two nodes, one representing the information we know about +`Ann` and one holding the information about `Ben`. + +What we know is the `name` and the `age` of those persons. + +We also know that Ann follows Jessica. This will also be stored as a +relationship between the two nodes. + +### Using Ratel + +Launch Ratel image + +```sh +docker run --rm -d -p 8000:8000 - dgraph/ratel:latest +``` + +Just visit [http://localhost:8000](http://localhost:8000) from your browser, and +you will be able to access it. + +![ratel-1](/images/tutorials/1/gs-2.png) + +We'll be using the Console tab of Ratel. + +![ratel-2](/images/tutorials/1/gs-3.png) + +### Mutations using Ratel + +The create, update, and delete operations in Dgraph are called mutations. + +In Ratel console, select the `Mutate` tab and paste the following mutation into +the text area. + +```json +{ + "set": [ + { + "name": "Ann", + "age": 28, + "follows": { + "name": "Ben", + "age": 31 + } + } + ] +} +``` + +The query above creates an entity and saves the predicates `name` and `age` with +the corresponding values. + +It also creates a predicate 'follows' for that entity but the value is not a +literal (string, int, float, bool). + +So Dgraph also creates a second entity that is the object of this predicate. +This second entity has itself some predicates (`name` and `age`). + +Let's execute this mutation. Click Run! + +![Query-gif](/images/tutorials/1/mutate-example.gif) + +You can see in the response that two UIDs (Universal IDentifiers) have been +created. The two values in the `"uids"` field of the response correspond to the +two entities created for "Ann" and "Ben". + +### Querying using the has function + +Now, let's run a query to visualize the graph which we just created. We'll be +using Dgraph's `has` function. The expression `has(name)` returns all the +entities with a predicate `name` associated with them. + +```sh +{ + people(func: has(name)) { + name + age + } +} +``` + +Go to the `Query` tab this time and type in the query above. Then, click `Run` +on the top right of the screen. + +![query-1](/images/tutorials/1/query-1.png) + +Ratel renders a graph visualization of the result. + +Just click on any of them, notice that the nodes are assigned UIDs, matching the +ones, we saw in the mutation's response. + +You can also view the JSON results in the JSON tab on the right. + +![query-2](/images/tutorials/1/query-2.png) + +#### Understanding the query + +![Illustration with explanation](/images/tutorials/1/explain-query-2.JPG) + +The first part of the query is the user-defined function name. In our query, we +have named it as `people`. However, you could use any other name. + +The `func` parameter has to be associated with a built-in function of Dgraph. +Dgraph offers a variety of built-in functions. The `has` function is one of +them. Check out the +[query language guide](https://dgraph.io/docs/query-language) to know more about +other built-in functions in Dgraph. + +The inner fields of the query are similar to the column names in a SQL select +statement or to a GraphQL query! + +You can easily specify which predicates you want to get back. + +```graphql +{ + people(func: has(name)) { + name + } +} +``` + +Similarly, you can use the `has` function to find all entities with the `age` +predicate. + +```graphql +{ + people(func: has(age)) { + name + } +} +``` + +### Flexible schema + +Dgraph doesn't enforce a structure or a schema. Instead, you can start entering +your data immediately and add constraints as needed. + +Let's look at this mutation. + +```json +{ + "set": [ + { + "name": "Balaji", + "age": 23, + "country": "India" + }, + { + "name": "Daniel", + "age": 25, + "city": "San Diego" + } + ] +} +``` + +We are creating two entities, while the first entity has predicates `name`, +`age`, and `country`, the second one has `name`, `age`, and `city`. + +Schemas are not needed initially. Dgraph creates new predicates as they appear +in your mutations. This flexibility can be beneficial, but if you prefer to +force your mutations to follow a given schema there are options available that +we will explore in an next tutorial. + +## Wrapping up + +In this tutorial, we learned the basics of Dgraph, including how to run the +database, add new entities and predicates, and query them back. + +Check out our next tutorial of the getting started series +[here](./tutorial-2/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-2/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-2/index.mdx new file mode 100644 index 00000000..f0e5e7c0 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-2/index.mdx @@ -0,0 +1,395 @@ +--- +title: Get Started with Dgraph - Basic Operations +--- + +**Welcome to the second tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-1/index) of getting started, we learned +some of the basics of Dgraph. Including how to run the database, add new nodes +and predicates, and query them back. + +![Graph](/images/tutorials/2/graph-1.jpg) + +In this tutorial, we'll build the above Graph and learn more about operations +using the UID (Universal Identifier) of the nodes. Specifically, we'll learn +about: + +- Querying and updating nodes, deleting predicates using their UIDs. +- Adding an edge between existing nodes. +- Adding a new predicate to an existing node. +- Traversing the Graph. + +You can see the accompanying video below. + + +--- + +First, let's create our Graph. + +Go to Ratel's mutate tab, paste the mutation below in the text area, and click +Run. + +```json +{ + "set": [ + { + "name": "Michael", + "age": 40, + "follows": { + "name": "Pawan", + "age": 28, + "follows": { + "name": "Leyla", + "age": 31 + } + } + } + ] +} +``` + +![mutation-1](/images/tutorials/2/a-add-data.gif) + +## Query using UIDs + +The UID of the nodes can be used to query them back. The built-in function uid +takes a list of UIDs as a variadic argument, so you can pass one (e.g. uid(0x1)) +or as many as you need (e.g. uid(0x1, 0x2)). + +It returns the same UIDs that were passed as input, no matter whether they exist +in the database or not. But the predicates asked will be returned only if both +the UIDs and their predicates exist. + +Let's see the `uid` function in action. + +First, let's copy the UID of the node created for `Michael`. + +Go to the query tab, type in the query below, and click Run. + +```graphql +{ + people(func: has(name)) { + uid + name + age + } +} +``` + +Now, from the result, copy the UID of Michael's node. + +![get-uid](/images/tutorials/2/b-get-uid-1.png) + +In the query below, replace the placeholder `MICHAELS_UID` with the UID you just +copied, and run the query. + +```graphql +{ + find_using_uid(func: uid(MICHAELS_UID)){ + uid + name + age + } +} +``` + +![get_node_from_uid](/images/tutorials/2/c-query-uid.png) + +_Note: `MICHAELS_UID` appears as `0x8` in the images. The UID you get on your +machine might have a different value._ + +You can see that the `uid` function returns the node matching the UID for +Michael's node. + +Refer to the [previous tutorial](./tutorial-1/index) if you have questions +related to the structure of the query in general. + +## Updating predicates + +You can also update one or more predicates of a node using its UID. + +Michael recently celebrated his 41st birthday. Let's update his age to 41. + +Go to the mutate tab and execute the mutation. Again, don't forget to replace +the placeholder `MICHAELS_UID` with the actual UID of the node for `Michael`. + +```json +{ + "set": [ + { + "uid": "MICHAELS_UID", + "age": 41 + } + ] +} +``` + +We had earlier used `set` to create new nodes. But on using the UID of an +existing node, it updates its predicates, instead of creating a new node. + +You can see that Michael's age is updated to 41. + +```graphql +{ + find_using_uid(func: uid(MICHAELS_UID)){ + name + age + } +} +``` + +![update check](/images/tutorials/2/d-update-check.png) + +Similarly, you can also add new predicates to an existing node. Since the +predicate `country` doesn't exist for the node for `Michael`, it creates a new +one. + +```json +{ + "set": [ + { + "uid": "MICHAELS_UID", + "country": "Australia" + } + ] +} +``` + +## Adding an edge between existing nodes + +You can also add an edge between existing nodes using their UIDs. + +Let's say, `Leyla` starts to follow `Michael`. + +We know that this relationship between them has to represented by creating the +`follows` edge between them. + +![Graph](/images/tutorials/2/graph-2.jpg) + +First, let's copy the UIDs of nodes for `Leyla` and `Michael` from Ratel. + +Now, replace the placeholders `LEYLAS_UID` and `MICHAELS_UID` with the ones you +copied, and execute the mutation. + +```json +{ + "set": [ + { + "uid": "LEYLAS_UID", + "follows": { + "uid": "MICHAELS_UID" + } + } + ] +} +``` + +## Traversing the edges + +Graph databases offer many distinct capabilities. `Traversals` are among them. + +Traversals answer questions or queries related to the relationship between the +nodes. Hence, queries like, `who does Michael follow?` are answered by +traversing the `follows` relationship. + +Let's run a traversal query and then understand it in detail. + +```graphql +{ + find_follower(func: uid(MICHAELS_UID)){ + name + age + follows { + name + age + } + } +} +``` + +Here's the result. + +![traversal-result](/images/tutorials/2/e-traversal.png) + +The query has three parts: + +- **Selecting the root nodes.** + +First, you need to select one or more nodes as the starting point for +traversals. These are called the root nodes. In the query above, we use the +`uid()` function to select the node created for `Michael` as the root node. + +- **Choosing the edge to be traversed** + +You need to specify the edge to be traversed, starting from the selected root +nodes. And then, the traversal, travels along these edges, from one end to the +nodes at the other end. + +In our query, we chose to traverse the `follows` edge starting from the node for +`Michael`. The traversal returns all the nodes connected to the node for +`Michael` via the `follows` edge. + +- **Specify the predicates to get back** + +Since Michael follows only one person, the traversal returns just one node. +These are `level-2` nodes. The root nodes constitute the nodes for `level-1`. +Again, we need to specify which predicates you want to get back from `level-2` +nodes. + +![get_node_from_uid](/images/tutorials/2/j-explain.JPG) + +You can extend the query to make use of `level-2` nodes and traverse the Graph +further and deeper. Let's explore that in the next section. + +#### Multi-level traversals + +The first level of traversal returns people followed by Michael. The next level +of traversal further returns the people they in-turn follow. + +This pattern can be repeated multiple times to achieve multi-level traversals. +The depth of the query increases by one as we traverse each level of the Graph. +That's when we say that the query is deep! + +```graphql +{ + find_follower(func: uid(MICHAELS_UID)) { + name + age + follows { + name + age + follows { + name + age + } + } + } +} +``` + +![level-3-query](/images/tutorials/2/f-level-3-traverse.png) + +Here is one more example from the extension of the last query. + +```graphql +{ + find_follower(func: uid(MICHAELS_UID)) { + name + age + follows { + name + age + follows { + name + age + follows { + name + age + } + } + } + } +} +``` + +![level 3](/images/tutorials/2/g-level-4-traversal.png) + +This query is really long! The query is four levels deep. In other words, the +depth of the query is four. If you ask, isn't there an in-built function that +makes multi-level deep queries or traversals easy? + +The answer is Yes! That's what the `recurse()` function does. Let's explore that +in our next section. + +#### Recursive traversals + +Recursive queries makes it easier to perform multi-level deep traversals. They +let you easily traverse a subset of the Graph. + +With the following recursive query, we achieve the same result as our last +query. But, with a much better querying experience. + +```graphql +{ + find_follower(func: uid(MICHAELS_UID)) @recurse(depth: 4) { + name + age + follows + } +} +``` + +In the query above, the `recurse` function traverses the graph starting from the +node for `Michael`. You can choose any other node to be the starting point. The +depth parameter specifies the maximum depth the traversal query should consider. + +Let's run the recursive traversal query after replacing the placeholder with the +UID of node for Michael. + +![recurse](/images/tutorials/2/h-recursive-traversal.png) + +[Check out the docs](https://dgraph.io/docs/query-language/#recurse-query) for +detailed instructions on using the `recurse` directive. + +#### Edges have directions + +Edges in Dgraph have directions. + +For instance, the `follows` edge emerging from the node for `Michael`, points at +the node for `Pawan`. They have a notion of direction. + +Traversing along the direction of an edge is natural to Dgraph. We'll learn +about traversing edges in reverse direction in our next tutorial. + +## Deleting a predicate + +Predicates of a node can be deleted using the `delete` mutation. Here's the +syntax of the delete mutation to delete any predicate of a node, + +```graphql +{ + delete { + * . + } +} +``` + +Using the mutation syntax above, let's compose a delete mutation. Let's delete +the `age` predicate of the node for `Michael`. + +```graphql +{ + delete { + * . + } +} +``` + +![recurse](/images/tutorials/2/i-delete.png) + +## Wrapping up + +In this tutorial, we learned about the CRUD operations using UIDs. We also +learned about `recurse()` function. + +Before we wrap, here's a sneak peek into our next tutorial. + +Did you know that you could search predicates based on their value? + +Sounds interesting? + +Check out our next tutorial of the getting started series +[here](./tutorial-3/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-3/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-3/index.mdx new file mode 100644 index 00000000..17d22fa5 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-3/index.mdx @@ -0,0 +1,649 @@ +--- +title: Get Started with Dgraph - Types and Operations +--- + +**Welcome to the third tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-2/index), we learned about CRUD operations +using UIDs. We also learned about traversals and recursive traversals. + +In this tutorial, we'll learn about Dgraph's basic types and how to query for +them. Specifically, we'll learn about: + +- Basic data types in Dgraph. +- Querying for predicate values. +- Indexing. +- Filtering nodes. +- Reverse traversing. + +Check out the accompanying video: + + + +Let's start by building the graph of a simple blog application. Here's the Graph +model of our application: + +![main graph model](/images/tutorials/3/a-main-graph.JPG) + +The above graph has three entities: Author, Blog posts, and Tags. The nodes in +the graph represent these entities. For the rest of the tutorial, we'll call the +nodes representing a blog as a `blog post` node and the node presenting a `tag` +as a `tag node`, and so on. + +You can see from the graph model that these entities are related: + +- Every Author has one or more blog posts. + +The `published` edge relates the blogs to their authors. These edges start from +an `author node` and point to a `blog post` node. + +- Every Blog post has one or more tags. + +The `tagged` edge relates the blog posts to their tags. These edges emerge from +a `blog post node` and point to a `tag node`. + +Let's build our graph. + +Go to Ratel, click on the mutate tab, paste the following mutation, and click +Run. + +```json +{ + "set": [ + { + "author_name": "John Campbell", + "rating": 4.1, + "published": [ + { + "title": "Dgraph's recap of GraphQL Conf - Berlin 2019", + "url": "https://blog.dgraph.io/post/graphql-conf-19/", + "content": "We took part in the recently held GraphQL conference in Berlin. The experience was fascinating, and we were amazed by the high voltage enthusiasm in the GraphQL community. Now, we couldn’t help ourselves from sharing this with Dgraph’s community! This is the story of the GraphQL conference in Berlin.", + "likes": 100, + "dislikes": 4, + "publish_time": "2018-06-25T02:30:00", + "tagged": [ + { + "uid": "_:graphql", + "tag_name": "graphql" + }, + { + "uid": "_:devrel", + "tag_name": "devrel" + } + ] + }, + { + "title": "Dgraph Labs wants you!", + "url": "https://blog.dgraph.io/post/hiring-19/", + "content": "We recently announced our successful Series A fundraise and, since then, many people have shown interest to join our team. We are very grateful to have so many people interested in joining our team! We also realized that the job openings were neither really up to date nor covered all of the roles that we are looking for. This is why we decided to spend some time rewriting them and the result is these six new job openings!.", + "likes": 60, + "dislikes": 2, + "publish_time": "2018-08-25T03:45:00", + "tagged": [ + { + "uid": "_:hiring", + "tag_name": "hiring" + }, + { + "uid": "_:careers", + "tag_name": "careers" + } + ] + } + ] + }, + { + "author_name": "John Travis", + "rating": 4.5, + "published": [ + { + "title": "How Dgraph Labs Raised Series A", + "url": "https://blog.dgraph.io/post/how-dgraph-labs-raised-series-a/", + "content": "I’m really excited to announce that Dgraph has raised $11.5M in Series A funding. This round is led by Redpoint Ventures, with investment from our previous lead, Bain Capital Ventures, and participation from all our existing investors – Blackbird, Grok and AirTree. With this round, Satish Dharmaraj joins Dgraph’s board of directors, which includes Salil Deshpande from Bain and myself. Their guidance is exactly what we need as we transition from building a product to bringing it to market. So, thanks to all our investors!.", + "likes": 139, + "dislikes": 6, + "publish_time": "2019-07-11T01:45:00", + "tagged": [ + { + "uid": "_:announcement", + "tag_name": "announcement" + }, + { + "uid": "_:funding", + "tag_name": "funding" + } + ] + }, + { + "title": "Celebrating 10,000 GitHub Stars", + "url": "https://blog.dgraph.io/post/10k-github-stars/", + "content": "Dgraph is celebrating the milestone of reaching 10,000 GitHub stars 🎉. This wouldn’t have happened without all of you, so we want to thank the awesome community for being with us all the way along. This milestone comes at an exciting time for Dgraph.", + "likes": 33, + "dislikes": 12, + "publish_time": "2017-03-11T01:45:00", + "tagged": [ + { + "uid": "_:devrel" + }, + { + "uid": "_:announcement" + } + ] + } + ] + }, + { + "author_name": "Katie Perry", + "rating": 3.9, + "published": [ + { + "title": "Migrating data from SQL to Dgraph!", + "url": "https://blog.dgraph.io/post/migrating-from-sql-to-dgraph/", + "content": "Dgraph is rapidly gaining reputation as an easy to use database to build apps upon. Many new users of Dgraph have existing relational databases that they want to migrate from. In particular, we get asked a lot about how to migrate data from MySQL to Dgraph. In this article, we present a tool that makes this migration really easy: all a user needs to do is write a small 3 lines configuration file and type in 2 commands. In essence, this tool bridges one of the best technologies of the 20th century with one of the best ones of the 21st (if you ask us).", + "likes": 20, + "dislikes": 1, + "publish_time": "2018-08-25T01:44:00", + "tagged": [ + { + "uid": "_:tutorial", + "tag_name": "tutorial" + } + ] + }, + { + "title": "Building a To-Do List React App with Dgraph", + "url": "https://blog.dgraph.io/post/building-todo-list-react-dgraph/", + "content": "In this tutorial we will build a To-Do List application using React JavaScript library and Dgraph as a backend database. We will use dgraph-js-http — a library designed to greatly simplify the life of JavaScript developers when accessing Dgraph databases.", + "likes": 97, + "dislikes": 5, + "publish_time": "2019-02-11T03:33:00", + "tagged": [ + { + "uid": "_:tutorial" + }, + { + "uid": "_:devrel" + }, + { + "uid": "_:javascript", + "tag_name": "javascript" + } + ] + } + ] + } + ] +} +``` + +Our Graph is ready! + +![rating-blog-rating](/images/tutorials/3/l-fullgraph-2.png) + +Our Graph has: + +- Three blue author nodes. +- Each author has two blog posts each - six in total - which are represented by + the green nodes. +- The tags of the blog posts are in pink. You can see that there are 8 unique + tags, and some of the blogs share a common tag. + +## Data types for predicates + +Dgraph automatically detects the data type of its predicates. You can see the +auto-detected data types using the Ratel UI. + +Click on the schema tab on the left and then check the `Type` column. You'll see +the predicate names and their corresponding data types. + +![rating-blog-rating](/images/tutorials/3/a-initial.png) + +These data types include `string`, `float`, and `int`, and `uid`. Besides them, +Dgraph also offers three more basic data types: `geo`, `dateTime`, and `bool`. + +The `uid` types represent predicates between two nodes. In other words, they +represent edges connecting two nodes. + +You might have noticed that the `published` and `tagged` predicates are of type +`uid` array (`[uid]`). UID arrays represent a collection of UIDs. This is used +to represent one to many relationships. + +For instance, we know that an author can publish more than one blog. Hence, +there could be more than one `published` edge emerging from a given `author` +node, each pointing to a different blog post of the author. + +Dgraph's [v1.1 release](https://blog.dgraph.io/post/release-v1.1.0/) introduced +the type system feature. This feature made it possible to create custom data +types by grouping one or more predicates. But in this tutorial, we'll only focus +on the basic data types. + +Also, notice that there are no entries in the indexes column. We'll talk about +indexes in detail shortly. + +## Querying for predicate values + +First, let's query for all the Authors and their ratings: + +``` +{ + authors_and_ratings(func: has(author_name)) { + uid + author_name + rating + } +} +``` + +![authors](/images/tutorials/3/a-find-rating-2.png) + +Refer to the [first episode](./tutorial-1/index) if you have any questions +related to the structure of the query in general. + +We have 3 authors in total in our dataset. Now, let's find the best authors. +Let's query for authors whose rating is 4.0 or more. + +In order to achieve our goal, we need a way to select nodes that meets certain +criteria (e.g., rating > 4.0). You can do this by using Dgraph's built-in +comparator functions. Here's the list of comparator functions available in +Dgraph: + +| comparator function name | Full form | +| ------------------------ | ------------------------ | +| eq | equals to | +| lt | less than | +| le | less than or equal to | +| gt | greater than | +| ge | greater than or equal to | + +There are a total of five comparator functions in Dgraph. You can use any of +them alongside the `func` keyword in your queries. + +The comparator function takes two arguments. One is the predicate name and the +other is its comparable value. Here are a few examples. + +| Example usage | Description | +| ---------------------- | ---------------------------------------------------------------------------- | +| func: eq(age, 60) | Return nodes with `age` predicate equal to 60. | +| func: gt(likes, 100) | Return nodes with a value of `likes` predicate greater than 100. | +| func: le(dislikes, 10) | Return nodes with a value of `dislikes` predicates less than or equal to 10. | + +Now, guess the comparator function we should use to select `author nodes` with a +rating of 4.0 or more. + +If you think it should be the `greater than or equal to(ge)` function, then +you're right! + +Let's try it out. + +```graphql +{ + best_authors(func: ge(rating, 4.0)) { + uid + author_name + rating + } +} +``` + +![index missing](/images/tutorials/3/b-index-missing.png) + +We got an error! The index for the `rating` predicate is missing. You cannot +query for the value of a predicate unless you've added an index for it. + +Let's learn more about indexes in Dgraph and also how to add them. + +## Indexing in Dgraph + +Indexes are used to speed up your queries on predicates. They have to be +explicitly added to a predicate when they are required. That is, only when you +need to query for the value of a predicate. + +Also, there's no need to anticipate the indexes to be added right at the +beginning. You can add them as you go along. + +Dgraph offers different types of indexes. The choice of index depends on the +data type of the predicate. + +Here is the table containing data types and the set of indexes that can be +applied to them. + +| Data type | Available index types | +| --------- | ------------------------------------ | +| int | int | +| float | float | +| string | hash, exact, term, fulltext, trigram | +| bool | bool | +| geo | geo | +| dateTime | year, month, day, hour | + +Only `string` and `dateTime` data types have an option for more than one index +type. + +Let's create an index on the rating predicate. Ratel UI makes it super simple to +add an index. + +Here's the sequence of steps: + +- Go to the schema tab on the left. +- Click on the `rating` predicate from the list. +- Tick the index option in the Properties UI on the right. + +![Add schema](/images/tutorials/3/c-add-schema.png) + +We successfully added the index for `rating` predicate! Let's rerun our previous +query. + +![rating](/images/tutorials/3/d-rating-query.png) + +We successfully queried for Author nodes with a rating of 4.0 or more. How about +we also fetch the Blog posts of these authors? + +We already know that the `published` edge points from an `author` node to a +`blog post` node. So fetching the blog posts of the `author` nodes is simple. We +need to traverse the `published` edge starting from the `author` nodes. + +```graphql +{ + authors_and_ratings(func: ge(rating, 4.0)) { + uid + author_name + rating + published { + title + content + dislikes + } + } +} +``` + +![rating-blog-rating](/images/tutorials/3/e-rating-blog.png) + +_Check out our [previous tutorial](./tutorial-2/index.md) if you have questions +around graph traversal queries._ + +Similarly, let's extend our previous query to fetch the tags of these blog +posts. + +```graphql +{ + authors_and_ratings(func: ge(rating, 4.0)) { + uid + author_name + rating + published { + title + content + dislikes + tagged { + tag_name + } + } + } +} +``` + +![rating-blog-rating](/images/tutorials/3/m-four-blogs.png) + +_Note: Author nodes are in blue, blogs posts in green, and tags in pink._ + +We have two authors, four blog posts, and their tags in the result. If you take +a closer look at the result, there's a blog post with 12 dislikes. + +![Dislikes](/images/tutorials/3/i-dislikes-2.png) + +Let's filter and fetch only the popular blog posts. Let's query for only those +blog posts with fewer than 10 dislikes. + +To achieve that, we need to express the following statement as a query to +Dgraph: + +_Hey, traverse the `published` edge, but only return those blogs with fewer than +10 dislikes_ + +Can we also filter the nodes during traversals? Yes, we can! Let's learn how to +do that in our next section. + +## Filtering traversals + +We can filter the result of traversals by using the `@filter` directive. You can +use any of the Dgraph's comparator functions with the `@filter` directive. You +should use the `lt` comparator to filter for only those blog posts with fewer +than 10 dislikes. + +Here's the query. + +```graphql +{ + authors_and_ratings(func: ge(rating, 4.0)) { + author_name + rating + + published @filter(lt(dislikes, 10)) { + title + likes + dislikes + tagged { + tag_name + } + } + } +} +``` + +The query returns: + +![rating-blog-rating](/images/tutorials/3/n-three-blogs.png) + +Now, we only have three blogs in the result. The blog with 12 dislikes is +filtered out. + +Notice that the blog posts are associated with a series of tags. + +Let's run the following query and find all the tags in the database. + +```sh +{ + all_tags(func: has(tag_name)) { + tag_name + } +} +``` + +![tags](/images/tutorials/3/o-tags.png) + +We got all the tags in the database. My favorite tag is `devrel`. What's yours? + +In our next section, let's find all the blog posts which are tagged `devrel`. + +## Querying string predicates + +The `tag_name` predicate represents the name of a tag. It is of type `string`. +Here are the steps to fetch all blog posts which are tagged `devrel`. + +- Find the root node with the value of `tag_name` predicate set to `devrel`. We + can use the `eq` comparator function to do so. +- Don't forget to add an index to the `tag_name` predicate before you run the + query. +- Traverse starting from the node for `devrel` tag along the `tagged` edge. + +Let's start by adding an index to the `tag_name` predicate. Go to Ratel, click +`tag_name` predicate from the list. + +![string index](/images/tutorials/3/p-string-index-2.png) + +You can see that there are five choices for indexes that can be applied to any +`string` predicate. The `fulltext`, `term`, and `trigram` are advanced string +indexes. We'll discuss them in detail in our next episode. + +There are a few constraints around the use of string type indexes and the +comparator functions. + +For example, only the `exact` index is compatible with the `le`, `ge`,`lt`, and +`gt` built-in functions. If you set a string predicate with any other index and +run the above comparators, the query fails. + +Although, any of the five string type indexes are compatible with the `eq` +function, the `hash` index used with the `eq` comparator would normally be the +most performant. + +Let's add the `hash` index to the `tag_name` predicate. + +![string index](/images/tutorials/3/m-hash.png) + +Let's use the `eq` comparator and fetch the root node with `tag_name` set to +`devrel`. + +```graphql +{ + devrel_tag(func: eq(tag_name,"devrel")) { + tag_name + } +} +``` + +![string index](/images/tutorials/3/q-devrel-2.png) + +We finally have the node we wanted! + +We know that the `blog post` nodes are connected to their `tag nodes` via the +`tagged` edges. Do you think that a traversal from the node for `devrel` tag +should give us the blog posts? Let's try it out! + +```graphql +{ + devrel_tag(func: eq(tag_name,"devrel")) { + tag_name + tagged { + title + content + } + } +} +``` + +Looks like the query didn't work! It didn't return us the blog posts! Don't be +surprised as this is expected. + +Let's observe our Graph model again. + +![main graph model](/images/tutorials/3/a-main-graph.JPG) + +We know that the edges in Dgraph have directions. You can see that the `tagged` +edge points from a `blog post` node to a `tag` node. + +Traversing along the direction of an edge is natural to Dgraph. Hence, you can +traverse from any `blog post node` to its `tag node` via the `tagged` edge. + +But to traverse the other way around requires you to move opposite to the +direction of the edge. You can still do so by adding a tilde(~) sign in your +query. The tilde(~) has to be added at the beginning of the name of the edge to +be traversed. + +Let's add the `tilde (~)` at the beginning of the `tagged` edge and initiate a +reverse edge traversal. + +```graphql +{ + devrel_tag(func: eq(tag_name,"devrel")) { + tag_name + + ~tagged { + title + content + } + } +} +``` + +![string index](/images/tutorials/3/r-reverse-2.png) + +We got an error! + +Reverse traversals require an index on their predicate. + +Let's go to Ratel and add the `reverse` index to the edge. + +![string index](/images/tutorials/3/r-reverse-1.png) + +Let's re-run the reverse edge traversal. + +```graphql +{ + devrel_tag(func: eq(tag_name, "devrel")) { + tag_name + + ~tagged { + title + content + } + } +} +``` + +![uid index](/images/tutorials/3/s-devrel-blogs.png) + +![uid index](/images/tutorials/3/s-devrel-blogs-2.png) + +Phew! Now we got all the blog posts that are tagged `devrel`. + +Similarly, you can extend the query to also find the authors of these blog +posts. It requires you to reverse traverse the `published` predicate. + +Let's add the reverse index to the `published` edge. + +![uid index](/images/tutorials/3/t-reverse-published.png) + +Now, let's run the following query. + +```graphql +{ + devrel_tag(func: eq(tag_name,"devrel")) { + tag_name + + ~tagged { + title + content + + ~published { + author_name + } + } + } +} +``` + +![uid index](/images/tutorials/3/u-author-reverse-1.png) + +![uid index](/images/tutorials/3/u-author-reverse-2.png) + +With our previous query, we traversed the entire graph in reverse order. +Starting from the tag nodes, we traversed up to the author nodes. + +## Summary + +In this tutorial, we learned about basic types, indexes, filtering, and reverse +edge traversals. + +Before we wrap up, here’s a sneak peek into our next tutorial. + +Did you know that Dgraph offers advanced text search capabilities? How about the +geo-location querying capabilities? + +Sounds interesting? + +Check out our next tutorial of the getting started series +[here](./tutorial-4/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-4/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-4/index.mdx new file mode 100644 index 00000000..d8d7c1c1 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-4/index.mdx @@ -0,0 +1,441 @@ +--- +title: Get Started with Dgraph - Multi-language strings +--- + +**Welcome to the fourth tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-3/index), we learned about Datatypes, +Indexing, Filtering, and Reverse traversals in Dgraph. + +In this tutorial, we'll learn about using multi-language strings and operations +on them using the language tags. + +You can see the accompanying video below. + + +--- + +## Strings and languages + +Strings values in Dgraph are of UTF-8 format. Dgraph also supports values for +string predicate types in multiple languages. The multi-lingual capability is +particularly useful to build features, which requires you to store the same +information in multiple languages. + +Let's learn more about them! + +Let's start with building a simple food review Graph. Here's the Graph model. + +![model](/images/tutorials/4/a-graph-model.jpg) + +The above Graph has three entities: Food, Comment, and Country. + +The nodes in the Graph represent these entities. + +For the rest of the tutorial, let's call the node representing a food item as a +`food` node. The node representing a review comment as a `review` node, and the +node representing the country of origin as a `country` node. + +Here's the relationship between them: + +- Every food item is connected to its reviews via the `review` edge. +- Every food item is connected to its country of origin via the `origin` edge. + +Let's add some reviews for some fantastic dishes! + +How about spicing it up a bit before we do that? + +Let's add the reviews for these dishes in the native language of their country +of origin. + +Let's go, amigos! + +```json +{ + "set": [ + { + "food_name": "Hamburger", + "review": [ + { + "comment": "Tastes very good" + } + ], + "origin": [ + { + "country": "United states of America" + } + ] + }, + { + "food_name": "Carrillada", + "review": [ + { + "comment": "Sabe muy sabroso" + } + ], + "origin": [ + { + "country": "Spain" + } + ] + }, + { + "food_name": "Pav Bhaji", + "review": [ + { + "comment": "स्वाद बहुत अच्छा है" + } + ], + "origin": [ + { + "country": "India" + } + ] + }, + { + "food_name": "Borscht", + "review": [ + { + "comment": "очень вкусно" + } + ], + "origin": [ + { + "country": "Russia" + } + ] + }, + { + "food_name": "mapo tofu", + "review": [ + { + "comment": "真好吃" + } + ], + "origin": [ + { + "country": "China" + } + ] + } + ] +} +``` + +_Note: If this mutation syntax is new to you, refer to the +[first tutorial](/tutorial-1/index.md) to learn basics of mutation in Dgraph._ + +Here's our Graph! + +![full graph](/images/tutorials/4/a-full-graph.png) + +Our Graph has: + +- Five blue food nodes. +- The green nodes represent the country of origin of these food items. +- The reviews of the food items are in pink. + +You can also see that Dgraph has auto-detected the data types of the predicates. +You can check that out from the schema tab. + +![full graph](/images/tutorials/4/c-schema.png) + +_Note: Check out the [previous tutorial](./tutorial-3/index.md) to know more +about data types in Dgraph._ + +Let's write a query to fetch all the food items, their reviews, and their +country of origin. + +Go to the query tab, paste the query, and click Run. + +```graphql +{ + food_review(func: has(food_name)) { + food_name + review { + comment + } + origin { + country + } + } +} +``` + +_Note: Check the [second tutorial](./tutorial-2/index.md) if you want to learn +more about traversal queries like the above one_ + +Now, Let's fetch only the food items and their reviews, + +```graphql +{ + food_review(func: has(food_name)) { + food_name + review { + comment + } + } +} +``` + +As expected, these comments are in different languages. + +![full graph](/images/tutorials/4/b-comments.png) + +But can we fetch the reviews based on their language? Can we write a query which +says: _Hey Dgraph, can you give me only the reviews written in Chinese?_ + +That's possible, but only if you provide additional information about the +language of the string data. You can do so by using language tags. While adding +the string data using mutations, you can use the language tags to specify the +language of the string predicates. + +Let's see the language tags in action! + +I've heard that Sushi is yummy! Let's add a review for `Sushi` in more than one +language. We'll be writing the review in three different languages: English, +Japanese, and Russian. + +Here's the mutation to do so. + +```json +{ + "set": [ + { + "food_name": "Sushi", + "review": [ + { + "comment": "Tastes very good", + "comment@jp": "とても美味しい", + "comment@ru": "очень вкусно" + } + ], + "origin": [ + { + "country": "Japan" + } + ] + } + ] +} +``` + +Let's take a closer look at how we assigned values for the `comment` predicate +in different languages. + +We used the language tags (@ru, @jp) as a suffix for the `comment` predicate. + +In the above mutation: + +- We used the `@ru` language tag to add the comment in Russian: + `"comment@ru": "очень вкусно"`. + +- We used the `@jp` language tag to add the comment in Japanese: + `"comment@jp": "とても美味しい"`. + +- The comment in `English` is untagged: `"comment": "Tastes very good"`. + +In the mutation above, Dgraph creates a new node for the reviews, and stores +`comment`, `comment@ru`, and `comment@jp` in different predicates inside the +same node. + +_Note: If you're not clear about basic terminology like `predicates`, do read +the [first tutorial](./tutorial-1/index.md)._ + +Let's run the above mutation. + +Go to the mutate tab, paste the mutation, and click Run. + +![lang error](/images/tutorials/4/d-lang-error.png) + +We got an error! Using the language tag requires you to add the `@lang` +directive to the schema. + +Follow the instructions below to add the `@lang` directive to the `comment` +predicate. + +- Go to the Schema tab. +- Click on the `comment` predicate. +- Tick mark the `lang` directive. +- Click on the `Update` button. + +![lang error](/images/tutorials/4/e-update-lang.png) + +Let's re-run the mutation. + +![lang error](/images/tutorials/4/f-mutation-success.png) + +Success! + +Again, remember that using the above mutation, we have added only one review for +Sushi, not three different reviews! + +But, if you want to add three different reviews, here's how you do it. + +Adding the review in the format below creates three nodes, one for each of the +comments. But, do it only when you're adding a new review, not to represent the +same review in different languages. + +``` +"review": [ + { + "comment": "Tastes very good" + }, + { + "comment@jp": "とても美味しい" + }, + { + "comment@ru": "очень вкусно" + } +] +``` + +Dgraph allows any strings to be used as language tags. But, it is highly +recommended only to use the ISO standard code for language tags. + +By following the standard, you eliminate the need to communicate the tags to +your team or to document it somewhere. +[Click here](https://www.w3schools.com/tags/ref_language_codes.asp) to see the +list of ISO standard codes for language tags. + +In our next section, let's make use of the language tags in our queries. + +## Querying using language tags. + +Let's obtain the review comments only for `Sushi`. + +In the [previous article](./tutorial-3/index.md), we learned about using the +`eq` operator and the `hash` index to query for string predicate values. + +Using that knowledge, let's first add the `hash` index for the `food_name` +predicate. + +![hash index](/images/tutorials/4/g-hash.png) + +Now, go to the query tab, paste the query in the text area, and click Run. + +```graphql +{ + food_review(func: eq(food_name,"Sushi")) { + food_name + review { + comment + } + } +} +``` + +![hash index](/images/tutorials/4/h-comment.png) + +By default, the query only returns the untagged comment. + +But you can use the language tag to query specifically for a review comment in a +given language. + +Let's query for a review for `Sushi` in Japanese. + +```graphql +{ + food_review(func: eq(food_name,"Sushi")) { + food_name + review { + comment@jp + } + } +} +``` + +![Japanese](/images/tutorials/4/i-japanese.png) + +Now, let's query for a review for `Sushi` in Russian. + +```graphql +{ + food_review(func: eq(food_name,"Sushi")) { + food_name + review { + comment@ru + } + } +} +``` + +![Russian](/images/tutorials/4/j-russian.png) + +You can also fetch all the comments for `Sushi` written in any language. + +```graphql +{ + food_review(func: eq(food_name,"Sushi")) { + food_name + review { + comment@* + } + } +} +``` + +![Russian](/images/tutorials/4/k-star.png) + +Here is the table with the syntax for various ways of making use of language +tags while querying. + +| Syntax | Result | +| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| comment | Look for an untagged string; return nothing if no untagged review exists. | +| comment@. | Look for an untagged string, if not found, then return review in any language. But, this returns only a single value. | +| comment@jp | Look for comment tagged `@jp`. If not found, the query returns nothing. | +| comment@ru | Look for comment tagged `@ru`. If not found, the query returns nothing. | +| comment@jp:. | Look for comment tagged `@jp` first. If not found, then find the untagged comment. If that's not found too, return anyone comment in other languages. | +| comment@jp:ru | Look for comment tagged `@jp`, then `@ru`. If neither is found, it returns nothing. | +| comment@jp:ru:. | Look for comment tagged `@jp`, then `@ru`. If both not found, then find the untagged comment. If that's not found too, return any other comment if it exists. | +| comment@\* | Return all the language tags, including the untagged. | + +If you remember, we had initially added a Russian dish `Borscht` with its review +in `Russian`. + +![Russian](/images/tutorials/4/l-russian.png) + +If you notice, we haven't used the language tag `@ru` for the review written in +Russian. + +Hence, if we query for all the reviews written in `Russian`, the review for +`Borscht` doesn't make it to the list. + +Only the review for `Sushi,` written in `Russian`, makes it to the list. + +![Russian](/images/tutorials/4/m-sushi.png) + +So, here's the lesson of the day! + +> If you are representing the same information in different languages, don't +> forget to add your language tags! + +## Summary + +In this tutorial, we learned about using multi-language string and operations on +them using the language tags. + +The usage of tags is not just restricted to multi-lingual strings. Language tags +are just a use case of Dgraph's capability to tag data. + +In the next tutorial, we'll continue our quest into the string types in Dgraph. +We'll explore the string type indices in detail. + +Sounds interesting? + +Check out our next tutorial of the getting started series +[here](./tutorial-5/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-5/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-5/index.mdx new file mode 100644 index 00000000..ecf4f1b0 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-5/index.mdx @@ -0,0 +1,665 @@ +--- +title: Get Started with Dgraph - String Indices +--- + +**Welcome to the fifth tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-4/index), we learned about using +multi-language strings and operations on them using +[language tags](https://www.w3schools.com/tags/ref_language_codes.asp). + +In this tutorial, we'll model tweets in Dgraph and, using it, we'll learn more +about string indices in Dgraph. + +We'll specifically learn about: + +- Modeling tweets in Dgraph. +- Using String indices in Dgraph + - Querying twitter users using the `hash` index. + - Comparing strings using the `exact` index. + - Searching for tweets based on keywords using the `term` index. + +Here's the complimentary video for this blog post. It'll walk you through the +steps of this getting started episode. + + + +Let's start analyzing the anatomy of a real tweet and figure out how to model it +in Dgraph. + +The accompanying video of the tutorial will be out shortly, so stay tuned to +[our YouTube channel](https://www.youtube.com/channel/UCghE41LR8nkKFlR3IFTRO4w). + +## Modeling a tweet in Dgraph + +Here's a sample tweet. + +```Test tweet for the fifth episode of getting started series with @dgraphlabs. +Wait for the video of the fourth one by @francesc the coming Wednesday! #GraphDB #GraphQL + +— Karthic Rao | karthic.eth (@hackintoshrao) November 13, 2019 +``` + +Let's dissect the tweet above. Here are the components of the tweet: + +- **The Author** + + The author of the tweet is the user `@hackintoshrao`. + +- **The Body** + + This component is the content of the tweet. + + > Test tweet for the fifth episode of getting started series with @dgraphlabs. + > Wait for the video of the fourth one by @francesc the coming Wednesday! + > #GraphDB #GraphQL + +- **The Hashtags** + + Here are the hashtags in the tweet: `#GraphQL` and `#GraphDB`. + +- **The Mentions** + + A tweet can mention other twitter users. + + Here are the mentions in the tweet above: `@dgraphlabs` and `@francesc`. + +Before we model tweets in Dgraph using these components, let's recap the design +principles of a graph model: + +> `Nodes` and `Edges` are the building blocks of a graph model. May it be a +> sale, a tweet, user info, any concept or an entity is represented as a node. +> If any two nodes are related, represent that by creating an edge between them. + +With the above design principles in mind, let's go through components of a tweet +and see how we could fit them into Dgraph. + +**The Author** + +The Author of a tweet is a twitter user. We should use a node to represent this. + +**The Body** + +We should represent every tweet as a node. + +**The Hashtags** + +It is advantageous to represent a hashtag as a node of its own. It gives us +better flexibility while querying. + +Though you can search for hashtags from the body of a tweet, it's not efficient +to do so. Creating unique nodes to represent a hashtag, allows you to write +performant queries like the following: _Hey Dgraph, give me all the tweets with +hashtag #graphql_ + +**The Mentions** + +A mention represents a twitter user, and we've already modeled a user as a node. +Therefore, we represent a mention as an edge between a tweet and the users +mentioned. + +### The Relationships + +We have three types of nodes: `User`, `Tweet,` and `Hashtag`. + +![graph nodes](/images/tutorials/5/a-nodes.jpg) + +Let's look at how these nodes might be related to each other and model their +relationship as an edge between them. + +**The User and Tweet nodes** + +There's a two-way relationship between a `Tweet` and a `User` node. + +- Every tweet is authored by a user, and a user can author many tweets. + +Let's name the edge representing this relationship as `authored` . + +An `authored` edge points from a `User` node to a `Tweet` node. + +- A tweet can mention many users, and users can be mentioned in many tweets. + +Let's name the edge which represents this relationship as `mentioned`. + +A `mentioned` edge points from a `Tweet` node to a `User` node. These users are +the ones who are mentioned in the tweet. + +![graph nodes](/images/tutorials/5/a-tweet-user.jpg) + +**The tweet and the hashtag nodes** + +A tweet can have one or more hashtags. Let's name the edge, which represents +this relationship as `tagged_with`. + +A `tagged_with` edge points from a `Tweet` node to a `Hashtag` node. These +hashtag nodes correspond to the hashtags in the tweets. + +![graph nodes](/images/tutorials/5/a-tagged.jpg) + +**The Author and hashtag nodes** + +There's no direct relationship between an author and a hashtag node. Hence, we +don't need a direct edge between them. + +Our graph model of a tweet is ready! Here's it is. + +![tweet model](/images/tutorials/5/a-graph-model.jpg) + +Here is the graph of our sample tweet. + +![tweet model](/images/tutorials/5/c-tweet-model.jpg) + +Let's add a couple of tweets to the list. + +``` +So many good talks at #graphqlconf, next year I'll make sure to be *at least* in the audience! + +Also huge thanks to the live tweeting by @dgraphlabs for alleviating the FOMO 😊#GraphDB ♥️ #GraphQL https://t.co/5uDpbswFZi + +— francesc (@francesc) June 21, 2019 +Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph! + +Be there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!#golang #GraphDB #Databases #Dgraph pic.twitter.com/sK90DJ6rLs + +— Dgraph Labs (@dgraphlabs) November 8, 2019 +``` + +We'll be using these two tweets and the sample tweet, which we used in the +beginning as our dataset. Open Ratel, go to the mutate tab, paste the mutation, +and click Run. + +```json +{ + "set": [ + { + "user_handle": "hackintoshrao", + "user_name": "Karthic Rao", + "uid": "_:hackintoshrao", + "authored": [ + { + "tweet": "Test tweet for the fifth episode of getting started series with @dgraphlabs. Wait for the video of the fourth one by @francesc the coming Wednesday!\n#GraphDB #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql", + "hashtag": "GraphQL" + }, + { + "uid": "_:graphdb", + "hashtag": "GraphDB" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "francesc", + "user_name": "Francesc Campoy", + "uid": "_:francesc", + "authored": [ + { + "tweet": "So many good talks at #graphqlconf, next year I'll make sure to be *at least* in the audience!\nAlso huge thanks to the live tweeting by @dgraphlabs for alleviating the FOMO😊\n#GraphDB ♥️ #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "graphqlconf" + } + ], + "mentioned": [ + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "dgraphlabs", + "user_name": "Dgraph Labs", + "uid": "_:dgraphlabs", + "authored": [ + { + "tweet": "Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph!\nBe there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!\n#golang #GraphDB #Databases #Dgraph ", + "tagged_with": [ + { + "hashtag": "golang" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "Databases" + }, + { + "hashtag": "Dgraph" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + }, + { + "uid": "_:gopherpalooza", + "user_handle": "gopherpalooza", + "user_name": "Gopherpalooza" + } + ] + } + ] +} +``` + + + {" "} + If you're new to Dgraph, and yet to figure out how to run the database and use + Ratel, we highly recommend reading the [first article of the + series](/tutorial-1/index) + + +Here is the graph we built. + +![tweet graph](/images/tutorials/5/x-all-tweets.png) + +Our graph has: + +- Five blue twitter user nodes. +- The green nodes are the tweets. +- The blue ones are the hashtags. + +Let's start our tweet exploration by querying for the twitter users in the +database. + +``` +{ + tweet_graph(func: has(user_handle)) { + user_handle + } +} +``` + +![tweet model](/images/tutorials/5/j-users.png) + +_Note: If the query syntax above looks not so familiar to you, check out the +[first tutorial](./tutorial-1/index)._ + +We have four twitter users: `@hackintoshrao`, `@francesc`, `@dgraphlabs`, and +`@gopherpalooza`. + +Now, let's find their tweets and hashtags too. + +```graphql +{ + tweet_graph(func: has(user_handle)) { + user_name + authored { + tweet + tagged_with { + hashtag + } + } + } +} +``` + +![tweet model](/images/tutorials/5/y-author-tweet.png) + +_Note: If the traversal query syntax in the above query is not familiar to you, +[check out the third tutorial](./tutorial-3/index) of the series._ + +Before we start querying our graph, let's learn a bit about database indices +using a simple analogy. + +### What are indices? + +Indexing is a way to optimize the performance of a database by minimizing the +number of disk accesses required when a query is processed. + +Consider a "Book" of 600 pages, divided into 30 sections. Let's say each section +has a different number of pages in it. + +Now, without an index page, to find a particular section that starts with the +letter "F", you have no other option than scanning through the entire book. i.e: +600 pages. + +But with an index page at the beginning makes it easier to access the intended +information. You just need to look over the index page, after finding the +matching index, you can efficiently jump to the section by skipping other +sections. + +But remember that the index page also takes disk space! Use them only when +necessary. + +In our next section,let's learn some interesting queries on our twitter graph. + +## String indices and querying + +### Hash index + +Let's compose a query which says: _Hey Dgraph, find me the tweets of user with +twitter handle equals to `hackintoshrao`._ + +Before we do so, we need first to add an index has to the `user_handle` +predicate. We know that there are 5 types of string indices: `hash`, `exact`, +`term`, `full-text`, and `trigram`. + +The type of string index to be used depends on the kind of queries you want to +run on the string predicate. + +In this case, we want to search for a node based on the exact string value of a +predicate. For a use case like this one, the `hash` index is recommended. + +Let's first add the `hash` index to the `user_handle` predicate. + +![tweet model](/images/tutorials/5/k-hash.png) + +Now, let's use the `eq` comparator to find all the tweets of `hackintoshrao`. + +Go to the query tab, type in the query, and click Run. + +```graphql + { + tweet_graph(func: eq(user_handle, "hackintoshrao")) { + user_name + authored { + tweet + } + } +} +``` + +![tweet model](/images/tutorials/5/z-exact.png) + +_Note: Refer to [the third tutorial](./tutorial-3/index), if you want to know +about comparator functions like `eq` in detail._ + +Let's extend the last query also to fetch the hashtags and the mentions. + +```graphql +{ + tweet_graph(func: eq(user_handle, "hackintoshrao")) { + user_name + authored { + tweet + tagged_with { + hashtag + } + mentioned { + user_name + } + } + } +} +``` + +![tweet model](/images/tutorials/5/l-hash-query.png) + +_Note: If the traversal query syntax in the above query is not familiar to you, +[check out the third tutorial](./tutorial-3/index) of the series._ + +Did you know that string values in Dgraph can also be compared using comparators +like greater-than or less-than? + +In our next section, let's see how to run the comparison functions other than +`equals to (eq)` on the string predicates. + +### Exact Index + +We discussed in the [third tutorial](./tutorial-3/index) that there five +comparator functions in Dgraph. + +Here's a quick recap: + +| comparator function name | Full form | +| ------------------------ | ------------------------ | +| eq | equals to | +| lt | less than | +| le | less than or equal to | +| gt | greater than | +| ge | greater than or equal to | + +All five comparator functions can be applied to the string predicates. + +We have already used the `eq` operator. The other four are useful for +operations, which depend on the alphabetical ordering of the strings. + +Let's learn about it with a simple example. + +Let's find the twitter accounts which come after `dgraphlabs` in alphabetically +sorted order. + +```graphql +{ + using_greater_than(func: gt(user_handle, "dgraphlabs")) { + user_handle + } +} +``` + +![tweet model](/images/tutorials/5/n-exact-error.png) + +Oops, we have an error! + +You can see from the error that the current `hash` index on the `user_handle` +predicate doesn't support the `gt` function. + +To be able to do string comparison operations like the one above, you need first +set the `exact` index on the string predicate. + +The `exact` index is the only string index that allows you to use the `ge`, +`gt`, `le`, `lt` comparators on the string predicates. + +Remind you that the `exact` index also allows you to use `equals to (eq)` +comparator. But, if you want to just use the `equals to (eq)` comparator on +string predicates, using the `exact` index would be an overkill. The `hash` +index would be a better option, as it is, in general, much more space-efficient. + +Let's see the `exact` index in action. + +![set exact](/images/tutorials/5/o-exact-conflict.png) + +We again have an error! + +Though a string predicate can have more than one index, some of them are not +compatible with each other. One such example is the combination of the `hash` +and the `exact` indices. + +The `user_handle` predicate already has the `hash` index, so trying to set the +`exact` index gives you an error. + +Let's uncheck the `hash` index for the `user_handle` predicate, select the +`exact` index, and click update. + +![set exact](/images/tutorials/5/p-set-exact.png) + +Though Dgraph allows you to change the index type of a predicate, do it only if +it's necessary. When the indices are changed, the data needs to be re-indexed, +and this takes some computing, so it could take a bit of time. While the +re-indexing operation is running, all mutations will be put on hold. + +Now, let's re-run the query. + +![tweet model](/images/tutorials/5/q-exact-gt.png) + +The result contains three twitter handles: `francesc`, `gopherpalooza`, and +`hackintoshrao`. + +In the alphabetically sorted order, these twitter handles are greater than +`dgraphlabs`. + +Some tweets appeal to us better than others. For instance, I love `Graphs` and +`Go`. Hence, I would surely enjoy tweets that are related to these topics. A +keyword-based search is a useful way to find relevant information. + +Can we search for tweets based on one or more keywords related to your +interests? + +Yes, we can! Let's do that in our next section. + +### The Term index + +The `term` index lets you search string predicates based on one or more +keywords. These keywords are called terms. + +To be able to search tweets with specific keywords or terms, we need to first +set the `term` index on the tweets. + +Adding the `term` index is similar to adding any other string index. + +![term set](/images/tutorials/5/r-term-set.png) + +Dgraph provides two built-in functions specifically to search for terms: +`allofterms` and `anyofterms`. + +Apart from these two functions, the `term` index only supports the `eq` +comparator. This means any other query functions (like lt, gt, le...) fails when +run on string predicates with the `term` index. + +We'll soon take a look at the table containing the string indices and their +supporting query functions. But first, let's learn how to use `anyofterms` and +`allofterms` query functions. Let's write a query to find all tweets with terms +or keywords `Go` or `Graph` in them. + +Go the query tab, paste the query, and click Run. + +```graphql +{ + find_tweets(func: anyofterms(tweet, "Go Graph")) { + tweet + } +} +``` + +Here's the matched tweet from the query response: + +```json +{ + "tweet": "Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph!\nBe there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!\n#golang #GraphDB #Databases #Dgraph " +} +``` + +![go graph set](/images/tutorials/5/s-go-graph.png) + +_Note: Check out [the first tutorial](./tutorial-1/index) if the query syntax, +in general, is not familiar to you_ + +The `anyofterms` function returns tweets which have either of `Go` or `Graph` +keyword. + +In this case, we've used only two terms to search for (`Go` and `Graph`), but +you can extend for any number of terms to be searched or matched. + +The result has one of the three tweets in the database. The other two tweets +don't make it to the result since they don't have either of the terms `Go` or +`Graph`. + +It's also important to notice that the term search functions (`anyofterms` and +`allofterms`) are insensitive to case and special characters. + +This means, if you search for the term `GraphQL`, the query returns a positive +match for all of the following terms found in the tweets: `graphql`, `graphQL`, +`#graphql`, `#GraphQL`. + +Now, let's find tweets that have either of the terms `Go` or `GraphQL` in them. + +```graphql +{ + find_tweets(func: anyofterms(tweet, "Go GraphQL")) { + tweet + } +} +``` + +![Go Graphql](/images/tutorials/5/t-go-graphql-all.png) + +Oh wow, we have all the three tweets in the result. This means, all of the three +tweets have either of the terms `Go` or `GraphQL`. + +Now, how about finding tweets that contain both the terms `Go` and `GraphQL` in +them. We can do it by using the `allofterms` function. + +```graphql +{ + find_tweets(func: allofterms(tweet, "Go GraphQL")) { + tweet + } +} +``` + +![Go Graphql](/images/tutorials/5/u-allofterms.png) + +We have an empty result. None of the tweets have both the terms `Go` and +`GraphQL` in them. + +Besides `Go` and `Graph`, I'm also a big fan of `GraphQL` and `GraphDB`. + +Let's find out tweets that contain both the keywords `GraphQL` and `GraphDB` in +them. + +![Graphdb-GraphQL](/images/tutorials/5/v-graphdb-graphql.png) + +We have two tweets in a result which has both the terms `GraphQL` and `GraphDB`. + +``` +{ + "tweet": "Test tweet for the fifth episode of getting started series with @dgraphlabs. Wait for the video of the fourth one by @francesc the coming Wednesday!\n#GraphDB #GraphQL" +}, +{ + "tweet": "So many good talks at #graphqlconf, next year I'll make sure to be *at least* in the audience!\nAlso huge thanks to the live tweeting by @dgraphlabs for alleviating the FOMO😊\n#GraphDB ♥️ #GraphQL" +} +``` + +Before we wrap up, here's the table containing the three string indices we +learned about, and their compatible built-in functions. + +| Index | Valid query functions | +| ----- | -------------------------- | +| hash | eq | +| exact | eq, lt, gt, le, ge | +| term | eq, allofterms, anyofterms | + +## Summary + +In this tutorial, we modeled a series of tweets and set up the exact, term, and +hash indices in order to query them. + +Did you know that Dgraph also offers more powerful search capabilities like +full-text search and regular expressions based search? + +In the next tutorial, we'll explore these features and learn about more powerful +ways of searching for your favorite tweets! + +Sounds interesting? Then see you all soon in the next tutorial. Till then, happy +Graphing! + +Check out our next tutorial of the getting started series +[here](./tutorial-6/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-6/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-6/index.mdx new file mode 100644 index 00000000..5af6525b --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-6/index.mdx @@ -0,0 +1,455 @@ +--- +title: Get Started with Dgraph - Advanced Text Search +--- + +**Welcome to the sixth tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-5/index), we learned about building social +graphs in Dgraph, by modeling tweets as an example. We queried the tweets using +the `hash` and `exact` indices, and implemented a keyword-based search to find +your favorite tweets using the `term` index and its functions. + +In this tutorial, we'll continue from where we left off and learn about advanced +text search features in Dgraph. + +Specifically, we'll focus on two advanced feature: + +- Searching for tweets using Full-text search. +- Searching for hashtags using the regular expression search. + +The accompanying video of the tutorial will be out shortly, so stay tuned to +[our YouTube channel](https://www.youtube.com/channel/UCghE41LR8nkKFlR3IFTRO4w). + +--- + +Before we dive in, let's do a quick recap of how to model the tweets in Dgraph. + +![tweet model](/images/tutorials/5/a-graph-model.jpg) + +In the previous tutorial, we took three real tweets as a sample dataset and +stored them in Dgraph using the above graph as a model. + +In case you haven't stored the tweets from the +[previous tutorial](./tutorial-5/index) into Dgraph, here's the sample dataset +again. + +Copy the mutation below, go to the mutation tab and click Run. + +```json +{ + "set": [ + { + "user_handle": "hackintoshrao", + "user_name": "Karthic Rao", + "uid": "_:hackintoshrao", + "authored": [ + { + "tweet": "Test tweet for the fifth episode of getting started series with @dgraphlabs. Wait for the video of the fourth one by @francesc the coming Wednesday!\n#GraphDB #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql", + "hashtag": "GraphQL" + }, + { + "uid": "_:graphdb", + "hashtag": "GraphDB" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "francesc", + "user_name": "Francesc Campoy", + "uid": "_:francesc", + "authored": [ + { + "tweet": "So many good talks at #graphqlconf, next year I'll make sure to be *at least* in the audience!\nAlso huge thanks to the live tweeting by @dgraphlabs for alleviating the FOMO😊\n#GraphDB ♥️ #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "graphqlconf" + } + ], + "mentioned": [ + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "dgraphlabs", + "user_name": "Dgraph Labs", + "uid": "_:dgraphlabs", + "authored": [ + { + "tweet": "Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph!\nBe there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!\n#golang #GraphDB #Databases #Dgraph ", + "tagged_with": [ + { + "hashtag": "golang" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "Databases" + }, + { + "hashtag": "Dgraph" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + }, + { + "uid": "_:gopherpalooza", + "user_handle": "gopherpalooza", + "user_name": "Gopherpalooza" + } + ] + } + ] +} +``` + +_Note: If you're new to Dgraph, and this is the first time you're running a +mutation, we highly recommend reading the +[first tutorial of the series before proceeding.](./tutorial-1/index)_ + +Voilà! Now you have a graph with `tweets`, `users`, and `hashtags`. It is ready +for us to explore. + +![tweet graph](/images/tutorials/5/x-all-tweets.png) + +_Note: If you're curious to know how we modeled the tweets in Dgraph, refer to +[the previous tutorial.](./tutorial-5/index)_ + +Let's start by finding your favorite tweets using the full-text search feature +first. + +## Full text search + +Before we learn how to use the Full-text search feature, it's important to +understand when to use it. + +The length and the number of words in a string predicate value vary based on +what the predicates represent. + +Some string predicate values have only a few terms (words) in them. Predicates +representing `names`, `hashtags`, `twitter handle`, `city names` are a few good +examples. These predicates are easy to query using their exact values. + +For instance, here is an example query. + +_Give me all the tweets where the user name is equal to `John Campbell`_. + +You can easily compose queries like these after adding either the `hash` or an +`exact` index to the string predicates. + +But, some of the string predicates store sentences. Sometimes even one or more +paragraphs of text data in them. Predicates representing a tweet, a bio, a blog +post, a product description, or a movie review are just some examples. It's +relatively hard to query these predicates. + +It's not practical to query such predicates using the `hash` or `exact` string +indices. A keyword-based search using the `term` index is a good starting point +to query such predicates. We used it in our +[previous tutorial](./tutorial-5/index) to find the tweets with an exact match +for keywords like `GraphQL`, `Graphs`, and `Go`. + +But, for some of the use cases, just the keyword-based search may not be +sufficient. You might need a more powerful search capability, and that's when +you should consider using Full-text search. + +Let's write some queries and understand Dgraph's Full-text search capability in +detail. + +To be able to do a Full-text search, you need to first set a `fulltext` index on +the `tweet` predicate. + +Creating a `fulltext` index on any string predicate is similar to creating any +other string indices. + +![full text](/images/tutorials/6/a-set-index.png) + +_Note: Refer to the [previous tutorial](./tutorial-5/index) if you're not sure +about creating an index on a string predicate._ + +Now, let's do a Full-text search query to find tweets related to the following +topic: `graph data and analyzing it in graphdb`. + +You can do so by using either of `alloftext` or `anyoftext` in-built functions. +Both functions take two arguments. The first argument is the predicate to +search. The second argument is the space-separated string values to search for, +and we call these as the `search strings`. + +```sh +- alloftext(predicate, "space-separated search strings") +- anyoftext(predicate, "space-separated search strings") +``` + +We'll look at the difference between these two functions later. For now, let's +use the `alloftext` function. + +Go to the query tab, paste the query below, and click Run. Here is our search +string: `graph data and analyze it in graphdb`. + +```graphql +{ + search_tweet(func: alloftext(tweet, "graph data and analyze it in graphdb")) { + tweet + } +} +``` + +![tweet graph](/images/tutorials/6/b-full-text-query-1.png) + +Here's the matched tweet, which made it to the result. + +```Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph! + +Be there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!#golang #GraphDB #Databases #Dgraph pic.twitter.com/sK90DJ6rLs + +— Dgraph Labs (@dgraphlabs) November 8, 2019 +``` + +If you observe, you can see some of the words from the search strings are not +present in the matched tweet, but the tweet has still made it to the result. + +To be able to use the Full-text search capability effectively, we must +understand how it works. + +Let's understand it in detail. + +Once you set a `fulltext` index on the tweets, internally, the tweets are +processed, and `fulltext` tokens are generated. These `fulltext` tokens are then +indexed. + +The search string also goes through the same processing pipeline, and `fulltext` +tokens generated them too. + +Here are the steps to generate the `fulltext` tokens: + +- Split the tweets into chunks of words called tokens (tokenizing). +- Convert these tokens to lowercase. +- [Unicode-normalize](http://unicode.org/reports/tr15/#Norm_Forms) the tokens. +- Reduce the tokens to their root form, this is called + [stemming](https://en.wikipedia.org/wiki/Stemming) (running to run, faster to + fast and so on). +- Remove the [stop words](https://en.wikipedia.org/wiki/Stop_words). + +You would have seen in [the fourth tutorial](./tutorial-4/index) that Dgraph +allows you to build multi-lingual apps. + +The stemming and stop words removal are not supported for all the languages. +Here is +[the link to the docs](https://dgraph.io/docs/query-language/#full-text-search) +that contains the list of languages and their support for stemming and stop +words removal. + +Here is the table with the matched tweet and its search string in the first +column. The second column contains their corresponding `fulltext` tokens +generated by Dgraph. + +| Actual text data | fulltext tokens generated by Dgraph | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | +| Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph!\nBe there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!\n#golang #GraphDB #Databases #Dgraph | [analyz build built catch code data databas dgraph francesc go goe golang gopherpalooza graph graphdb program scan sourc store todai us] | +| graph data and analyze it in graphdb | [analyz data graph graphdb] | + +From the table above, you can see that the tweets are reduced to an array of +strings or tokens. + +Dgraph internally uses [Bleve package](https://github.com/blevesearch/bleve) to +do the stemming. + +Here are the `fulltext` tokens generated for our search string: [`analyz`, +`data`, `graph`, `graphdb`]. + +As you can see from the table above, all of the `fulltext` tokens generated for +the search string exist in the matched tweet. Hence, the `alloftext` function +returns a positive match for the tweet. It would not have returned a positive +match even if one of the tokens in the search string is missing for the tweet. +But, the `anyoftext` function would've returned a positive match as long as the +tweets and the search string have at least one of the tokens in common. + +If you're interested to see Dgraph's `fulltext` tokenizer in action, +[here is the gist](https://gist.github.com/hackintoshrao/0e8d715d8739b12c67a804c7249146a3) +containing the instructions to use it. + +Dgraph generates the same `fulltext` tokens even if the words in a search string +is differently ordered. Hence, using the same search string with different order +would not impact the query result. + +As you can see, all three queries below are the same for Dgraph. + +```graphql +{ + search_tweet(func: alloftext(tweet, "graph analyze and it in graphdb data")) { + tweet + } +} +``` + +```graphql +{ + search_tweet(func: alloftext(tweet, "data and data analyze it graphdb in")) { + tweet + } +} +``` + +```graphql +{ + search_tweet(func: alloftext(tweet, "analyze data and it in graph graphdb")) { + tweet + } +} +``` + +Now, let's move onto the next advanced text search feature of Dgraph: regular +expression based queries. + +Let's use them to find all the hashtags containing the following substring: +`graph`. + +## Regular expression search + +[Regular expressions](https://www.geeksforgeeks.org/write-regular-expressions/) +are powerful ways of expressing search patterns. Dgraph allows you to search for +string predicates based on regular expressions. You need to set the `trigram` +index on the string predicate to be able to perform regex-based queries. + +Using regular expression based search, let's match all the hashtags that have +this particular pattern: +`Starts and ends with any characters of indefinite length, but with the substring graph in it`. + +Here is the regex expression we can use: `^.*graph.*$` + +Check out +[this tutorial](https://www.geeksforgeeks.org/write-regular-expressions/) if +you're not familiar with writing a regular expression. + +Let's first find all the hashtags in the database using the `has()` function. + +```graphql +{ + hash_tags(func: has(hashtag)) { + hashtag + } +} +``` + +![The hashtags](/images/tutorials/6/has-hashtag.png) + +_If you're not familiar with using the `has()` function, refer to +[the first tutorial](./tutorial-1/index) of the series._ + +You can see that we have six hashtags in total, and four of them have the +substring `graph` in them: `Dgraph`, `GraphQL`, `graphqlconf`, `graphDB`. + +We should use the built-in function `regexp` to be able to use regular +expressions to search for predicates. This function takes two arguments, the +first is the name of the predicate, and the second one is the regular +expression. + +Here is the syntax of the `regexp` function: +`regexp(predicate, /regular-expression/)` + +Let's execute the following query to find the hashtags that have the substring +`graph`. + +Go to the query tab, type in the query, and click Run. + +```graphql +{ + reg_search(func: regexp(hashtag, /^.*graph.*$/)) { + hashtag + } +} +``` + +Oops! We have an error! It looks like we forgot to set the `trigram` index on +the `hashtag` predicate. + +![The hashtags](/images/tutorials/6/trigram-error.png) + +Again, setting a `trigram` index is similar to setting any other string index, +let's do that for the `hashtag` predicate. + +![The hashtags](/images/tutorials/6/set-trigram.png) + +_Note: Refer to the [previous tutorial](./tutorial-5/index) if you're not sure +about creating an index on a string predicate._ + +Now, let's re-run the `regexp` query. + +![regex-1](/images/tutorials/6/regex-query-1.png) + +_Note: Refer to [the first tutorial](./tutorial-1/index) if you're not familiar +with the query structure in general_ Success! + +But we only have the following hashtags in the result: `Dgraph` and +`graphqlconf`. + +That's because `regexp` function is case-sensitive by default. + +Add the character `i` at the the end of the second argument of the `regexp` +function to make it case insensitive: `regexp(predicate, /regular-expression/i)` + +![regex-2](/images/tutorials/6/regex-query-2.png) + +Now we have the four hashtags with substring `graph` in them. + +Let's modify the regular expression to match only the `hashtags` which have a +prefix called `graph`. + +```graphql +{ + reg_search(func: regexp(hashtag, /^graph.*$/i)) { + hashtag + } +} +``` + +![regex-3](/images/tutorials/6/regex-query-3.png) + +## Summary + +In this tutorial, we learned about Full-text search and regular expression based +search capabilities in Dgraph. + +Did you know that Dgraph also offers fuzzy search capabilities, which can be +used to power features like `product` search in an e-commerce store? + +Let's learn about the fuzzy search in our next tutorial. + +Sounds interesting? + +Check out our next tutorial of the getting started series +[here](./tutorial-7/index). + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-7/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-7/index.mdx new file mode 100644 index 00000000..dd500d02 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-7/index.mdx @@ -0,0 +1,325 @@ +--- +title: Get Started with Dgraph - Fuzzy Search +--- + +**Welcome to the seventh tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-6/index), we learned about building +advanced text searches on social graphs in Dgraph, by modeling tweets as an +example. We queried the tweets using the `fulltext` and `trigram` indices and +implemented full-text and regular expression search on the tweets. + +In this tutorial, we'll continue exploring Dgraph's string querying capabilities +using the twitter model from [the fifth](./tutorial-5/index) and +[the sixth](./tutorial-6/index) tutorials. In particular, we'll implement a +`twitter username` search feature using the Dgraph's fuzzy search function. + +The accompanying video of the tutorial will be out shortly, so stay tuned to +[our YouTube channel](https://www.youtube.com/channel/UCghE41LR8nkKFlR3IFTRO4w). + +--- + +Before we dive in, let's review of how we modeled the tweets in the previous two +tutorials: + +![tweet model](/images/tutorials/5/a-graph-model.jpg) + +We used three real-life example tweets as a sample dataset and stored them in +Dgraph using the above graph as a model. + +Here is the sample dataset again if you skipped the previous tutorials. Copy the +mutation below, go to the mutation tab and click Run. + +```json +{ + "set": [ + { + "user_handle": "hackintoshrao", + "user_name": "Karthic Rao", + "uid": "_:hackintoshrao", + "authored": [ + { + "tweet": "Test tweet for the fifth episode of getting started series with @dgraphlabs. Wait for the video of the fourth one by @francesc the coming Wednesday!\n#GraphDB #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql", + "hashtag": "GraphQL" + }, + { + "uid": "_:graphdb", + "hashtag": "GraphDB" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "francesc", + "user_name": "Francesc Campoy", + "uid": "_:francesc", + "authored": [ + { + "tweet": "So many good talks at #graphqlconf, next year I'll make sure to be *at least* in the audience!\nAlso huge thanks to the live tweeting by @dgraphlabs for alleviating the FOMO😊\n#GraphDB ♥️ #GraphQL", + "tagged_with": [ + { + "uid": "_:graphql" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "graphqlconf" + } + ], + "mentioned": [ + { + "uid": "_:dgraphlabs" + } + ] + } + ] + }, + { + "user_handle": "dgraphlabs", + "user_name": "Dgraph Labs", + "uid": "_:dgraphlabs", + "authored": [ + { + "tweet": "Let's Go and catch @francesc at @Gopherpalooza today, as he scans into Go source code by building its Graph in Dgraph!\nBe there, as he Goes through analyzing Go source code, using a Go program, that stores data in the GraphDB built in Go!\n#golang #GraphDB #Databases #Dgraph ", + "tagged_with": [ + { + "hashtag": "golang" + }, + { + "uid": "_:graphdb" + }, + { + "hashtag": "Databases" + }, + { + "hashtag": "Dgraph" + } + ], + "mentioned": [ + { + "uid": "_:francesc" + }, + { + "uid": "_:dgraphlabs" + } + ] + }, + { + "uid": "_:gopherpalooza", + "user_handle": "gopherpalooza", + "user_name": "Gopherpalooza" + } + ] + } + ] +} +``` + +_Note: If you're new to Dgraph, and this is the first time you're running a +mutation, we highly recommend reading the +[first tutorial of the series before proceeding](./tutorial-1/index)._ + +Now you should have a graph with tweets, users, and hashtags, and it is ready +for us to explore. + +![tweet graph](/images/tutorials/5/x-all-tweets.png) + +_Note: If you're curious to know how we modeled the tweets in Dgraph, refer to +[the fifth tutorial](./tutorial-5/index)._ + +Before we show you the fuzzy search in action, let's first understand what it is +and how does it work. + +## Fuzzy search + +Providing search capabilities on products or usernames requires searching for +the closest match to a string, if a full match doesn't exist. This feature helps +you get relevant results even if there's a typo or the user doesn't search based +on the exact name it is stored. This is exactly what the fuzzy search does: it +compares the string values and returns the nearest matches. Hence, it's ideal +for our use case of implementing search on the `twitter usernames`. + +The functioning of the fuzzy search is based on the `Levenshtein distance` +between the value of the user name stored in Dgraph and the search string. + +[`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) is +a metric that defines the closeness of two strings. `Levenshtein distance` +between two words is the minimum number of single-character edits (insertions, +deletions or substitutions) required to change one word into the other. + +For instance, the `Levenshtein Distance` between the strings `book` and `back` +is 2. The value of 2 is justified because by changing two characters, we changed +the word `book` to `back`. + +Now you've understood what the fuzzy search is and what it can do. Next, let's +learn how to use it on string predicates in Dgraph. + +## Implement Fuzzy Search in Dgraph + +To use the fuzzy search on a string predicate in Dgraph, you first set the +`trigram` index. + +Go to the Schema tab and set the `trigram` index on the `user_name` predicate. + +After setting the `trigram` index on the `user_name` predicate, you can use +Dgraph's built-in function `match` to run a fuzzy search query. + +Here is the syntax of the `match` function: +`match(predicate, search string, distance)` + +The [match function](https://dgraph.io/docs/query-language/#fuzzy-matching) +takes in three parameters: + +1. The name of the string predicate used for querying. +2. The search string provided by the user +3. An integer that represents the maximum `Levenshtein Distance` between the + first two parameters. This value should be greater than 0. For example, when + having an integer of 8 returns predicates with a distance value of less than + or equal to 8. + +Using a greater value for the `distance` parameter can potentially match more +string predicates, but it also yields less accurate results. + +Before we use the `match` function, let's first get the list of user names +stored in the database. + +```graphql +{ + names(func: has(user_name)) { + user_name + } +} +``` + +![tweet graph](/images/tutorials/7/e-names.png) + +As you can see from the result, we have four user names: `Gopherpalooza`, +`Karthic Rao`, `Francesc Campoy`, and `Dgraph Labs`. + +First, we set the `Levenshtein Distance` parameter to 3. We expect to see Dgraph +returns all the `username` predicates with three or fewer distances from the +provided searching string. + +Then, we set the second parameter, the search string provided by the user, as +`graphLabs`. + +Go to the query tab, paste the query below and click Run. + +```graphql +{ + user_names_Search(func: match(user_name, "graphLabs", 3)) { + user_name + } +} +``` + +![first query](/images/tutorials/7/h-one.png) + +We got a positive match! Because the search string `graphLabs` is at a distance +of two from the predicate value of `Dgraph Labs`, so we see it in the search +result. + +If you are interested in learning more about how to find the Levenshtein +Distance between two strings, +[here is a useful site](https://planetcalc.com/1721/). + +Let's run the above query again, but this time we will use the search string +`graphLab` instead. Go to the query tab, paste the query below and click Run. + +```graphql +{ + user_names_Search(func: match(user_name, "graphLab", 3)) { + user_name + } +} +``` + +![first query](/images/tutorials/7/i-two.png) + +We still got a positive match with the `user_name` predicate with the value +`Dgraph Labs`! That's because the search string `graphLab` is at a distance of +three from the predicate value of `Dgraph Labs`, so we see it in the search +result. + +In this case, the `Levenshtein Distance` between the search string `graphLab` +and the predicate `Dgraph Labs` is 3, hence the match. + +For the last run of the query, let's change the search string to `Dgraph` but +keep the Levenshtein Distance at 3. + +```graphql +{ + user_names_Search(func: match(user_name, "Dgraph", 3)) { + user_name + } +} +``` + +![first query](/images/tutorials/7/j-three.png) + +Now you no longer see Dgraph Labs appears in the search result because the +distance between the word `Dgraph` and `Dgraph Labs` is larger than 3. But based +on normal human rationales, you would naturally expect Dgraph Labs appears in +the search result while using Dgraph as the search string. + +This is one of the downsides of the fuzzy search based on the +`Levenshtein Distance` algorithm. The effectiveness of the fuzzy search reduces +as the value of the distance parameter decreases, and it also reduces with an +increase in the number of words included in the string predicate. + +Therefore it's not recommended to use the fuzzy search on the string predicates +which could contain many words, for instance, predicates which store the values +for `blog posts`, `bio`, `product description` and so on. Hence, the ideal +candidates to use fuzzy search are predicates like `names`, `zipcodes`, +`places`, where the number of words in the string predicate would generally +between 1-3. + +Also, based on the use case, tuning the `distance` parameter is crucial for the +effectiveness of fuzzy search. + +## Fuzzy search scoring because you asked for it + +At Dgraph, we're committed to improving the all-round capabilities of the +distributed Graph database. As part of one of our recent efforts to improve the +database features, we've taken note of the +[request on Github](https://github.com/dgraph-io/dgraph/issues/3211) by one of +our community members to integrate a `tf-idf` score based text search. This +integration will further enhance the search capabilities of Dgraph. + +We've prioritized the resolve of the issue in our product roadmap. We would like +to take this opportunity to say thank you to our community of users for helping +us make the product better. + +## Summary + +Fuzzy search is a simple and yet effective search technique for a wide range of +use cases. Along with the existing features to query and search string +predicates, the addition of `tf-idf` based search will further improve Dgraph's +capabilities. + +This marks the end of our three tutorial streak exploring string indices and +their queries using the graph model of tweets. + +Check out our next tutorial of the getting started series +[here](./tutorial-8/index). + +Remember to click the “Join our community” button below and subscribe to our +newsletter to get the latest tutorial right to your inbox. + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-8/index.mdx b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-8/index.mdx new file mode 100644 index 00000000..95b06528 --- /dev/null +++ b/dgraph/reference/learn/data-engineer/get-started-with-dgraph/tutorial-8/index.mdx @@ -0,0 +1,851 @@ +--- +title: Get Started with Dgraph - Geolocation +--- + +**Welcome to the eight tutorial of getting started with Dgraph.** + +In the [previous tutorial](./tutorial-7/index.md), we learned about building a +twitter-like user-search feature using +[Dgraph's fuzzy search](./query-language/functions.md#fuzzy-matching). + +In this tutorial, we'll build a graph of tourist locations around San Francisco +and help our Zoologist friend, Mary, and her team in their mission to conserve +birds using Dgraph's geolocation capabilities. + +You might have used Google to find the restaurants near you or to find the +shopping centers within a mile of your current location. Applications like these +make use of your geolocation data. + +Geolocation has become an integral part of mobile applications, especially with +the advent of smartphones in the last decade, the list of applications which +revolves around users location to power application features has grown beyond +imagination. + +Let's take Uber, for instance, the location data of the driver and passenger is +pivotal for the functionality of the application. We're gathering more GPS data +than ever before, being able to store and query the location data efficiently +can give you an edge over your competitors. + +Real-world data is interconnected; they are not sparse; this is more relevant +when it comes to location data. The natural representation of railway networks, +maps, routes are graphs. + +The good news is that [Dgraph](https://dgraph.io), the world's most advanced +graph database, comes with functionalities to efficiently store and perform +useful queries on graphs containing location data. If you want to run queries +like `find me the hotels near Golden Gate Bridge`, or find me all the tourist +location around `Golden Gate Park`, Dgraph has your back. + +First, let's learn how to represent Geolocation data in Dgraph. + +## Representing Geolocation data + +You can represent location data in Dgraph using two ways: + +- **Point location** + +Point location contains the geo-coordinate tuple (latitude, longitude) of your +location of interest. + +The following image has the point location with the latitude and longitude for +the Eifel tower in Paris. Point locations are useful for representing a precise +location; for instance, your location when booking a cab or your delivery +address. + +![model](/images/tutorials/8/b-paris.png) + +- **Polygonal location** + +It's not possible to represent geographical entities which are spread across +multiple geo-coordinates just using a point location. To represent geo entities +like a city, a lake, or a national park, you should use a polygonal location. + +Here is an example: + +![model](/images/tutorials/8/c-delhi.jpg) + +The polygonal fence above represents the city of Delhi, India. This polygonal +fence or the geo-fence is formed by connecting multiple straight-line +boundaries, and they are collectively represented using an array of location +tuples of format `[(latitude, longitude), (latitude, longitude), ...]`. Each +tuple pair `(2 tuples and 4 coordinates)` represents a straight line boundary of +the geo-fence, and a polygonal fence can contain any number of lines. + +Let's start with building a simple San Francisco tourist graph, here's the graph +model. + +![model](/images/tutorials/8/a-graph.jpg) + +The above graph has three entities represented by the nodes: + +- **City** + +A `city node` represents the tourist city. Our dataset only contains the city of +`San Francisco`, and a node in the graph represents it. + +- **Location** + +A location node, along with the name of the location, it contains the point or +polygonal location of the place of interest. + +- **Location Type** + +A location type consists of the type of location. There are four types of +location in our dataset: `zoo`, `museum`, `hotel` or a `tourist attraction`. + +The `location nodes` with geo-coordinates of a `hotel` also contains their +pricing information. + +There are different ways to model the same graph. For instance, the +`location type` could just be a property or a predicate of the `location node`, +rather than being a node of its own. + +The queries you want to perform or the relationships you like to explore mostly +influence the modeling decisions. The goal of the tutorial is not to arrive at +the ideal graph model, but to use a simple dataset to demonstrate the +geolocation capabilities of Dgraph. + +For the rest of the tutorial, let's call the node representing a `City` as a +`city` node, and the node representing a `Location` as a `location` node, and +the node representing the `Location Type` as a `location type` node. + +Here's the relationship between these nodes: + +- Every `city node` is connected to a `location node` via the `has_location ` + edge. +- Every `location node` is connected to its node representing a `location type` + via the `has_type` edge. + +_Note: Dgraph allows you to associate one or more types for the nodes using its +type system feature, for now, we are using nodes without types, we'll learn +about type system for nodes in a future tutorial. Check +[this page from the documentation site](https://dgraph.io/docs/query-language/#type-system), +if you want to explore type system feature for nodes._ + +Here is our sample dataset. Open Ratel, go to the mutate tab, paste the +mutation, and click Run. + +```json +{ + "set": [ + { + "city": "San Francisco", + "uid": "_:SFO", + "has_location": [ + { + "name": "USS Pampanito", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.4160088, 37.8096674], + [-122.4161147, 37.8097628], + [-122.4162064, 37.8098357], + [-122.4163467, 37.8099312], + [-122.416527, 37.8100471], + [-122.4167504, 37.8101792], + [-122.4168272, 37.8102137], + [-122.4167719, 37.8101612], + [-122.4165683, 37.8100108], + [-122.4163888, 37.8098923], + [-122.4162492, 37.8097986], + [-122.4161469, 37.8097352], + [-122.4160088, 37.8096674] + ] + ] + }, + "has_type": [ + { + "uid": "_:museum", + "loc_type": "Museum" + } + ] + }, + { + "name": "Alameda Naval Air Museum", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.2995054, 37.7813924], + [-122.2988538, 37.7813582], + [-122.2988421, 37.7814972], + [-122.2994937, 37.7815314], + [-122.2995054, 37.7813924] + ] + ] + }, + "street": "Ferry Point Road", + "has_type": [ + { + "uid": "_:museum" + } + ] + }, + { + "name": "Burlingame Museum of PEZ Memorabilia", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.3441509, 37.5792003], + [-122.3438207, 37.5794257], + [-122.3438987, 37.5794587], + [-122.3442289, 37.5792333], + [-122.3441509, 37.5792003] + ] + ] + }, + "street": "California Drive", + "has_type": [ + { + "uid": "_:museum" + } + ] + }, + { + "name": "Carriage Inn", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.3441509, 37.5792003], + [-122.3438207, 37.5794257], + [-122.3438987, 37.5794587], + [-122.3442289, 37.5792333], + [-122.3441509, 37.5792003] + ] + ] + }, + "street": "7th street", + "price_per_night": 350.0, + "has_type": [ + { + "uid": "_:hotel", + "loc_type": "Hotel" + } + ] + }, + { + "name": "Lombard Motor In", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.4260484, 37.8009811], + [-122.4260137, 37.8007969], + [-122.4259083, 37.80081], + [-122.4258724, 37.8008144], + [-122.4257962, 37.8008239], + [-122.4256354, 37.8008438], + [-122.4256729, 37.8010277], + [-122.4260484, 37.8009811] + ] + ] + }, + "street": "Lombard Street", + "price_per_night": 400.0, + "has_type": [ + { + "uid": "_:hotel" + } + ] + }, + { + "name": "Holiday Inn San Francisco Golden Gateway", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.4214895, 37.7896108], + [-122.4215628, 37.7899798], + [-122.4215712, 37.790022], + [-122.4215987, 37.7901606], + [-122.4221004, 37.7900985], + [-122.4221044, 37.790098], + [-122.4219952, 37.7895481], + [-122.4218207, 37.78957], + [-122.4216158, 37.7895961], + [-122.4214895, 37.7896108] + ] + ] + }, + "street": "Van Ness Avenue", + "price_per_night": 250.0, + "has_type": [ + { + "uid": "_:hotel" + } + ] + }, + { + "name": "Golden Gate Bridge", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.479784, 37.8288329], + [-122.4775646, 37.8096291], + [-122.4775538, 37.8095165], + [-122.4775465, 37.8093304], + [-122.4775823, 37.8093296], + [-122.4775387, 37.8089749], + [-122.4773545, 37.8089887], + [-122.4773402, 37.8089575], + [-122.4772752, 37.8088285], + [-122.4772084, 37.8087099], + [-122.4771322, 37.8085903], + [-122.4770518, 37.8084793], + [-122.4769647, 37.8083687], + [-122.4766802, 37.8080091], + [-122.4766629, 37.8080195], + [-122.4765701, 37.8080751], + [-122.476475, 37.8081322], + [-122.4764106, 37.8081708], + [-122.476396, 37.8081795], + [-122.4764936, 37.8082814], + [-122.476591, 37.8083823], + [-122.4766888, 37.8084949], + [-122.47677, 37.808598], + [-122.4768444, 37.8087008], + [-122.4769144, 37.8088105], + [-122.4769763, 37.8089206], + [-122.4770373, 37.8090416], + [-122.477086, 37.809151], + [-122.4771219, 37.8092501], + [-122.4771529, 37.809347], + [-122.477179, 37.8094517], + [-122.4772003, 37.809556], + [-122.4772159, 37.8096583], + [-122.4794624, 37.8288561], + [-122.4794098, 37.82886], + [-122.4794817, 37.8294742], + [-122.4794505, 37.8294765], + [-122.4794585, 37.8295453], + [-122.4795423, 37.8295391], + [-122.4796312, 37.8302987], + [-122.4796495, 37.8304478], + [-122.4796698, 37.8306078], + [-122.4796903, 37.830746], + [-122.4797182, 37.8308784], + [-122.4797544, 37.83102], + [-122.479799, 37.8311522], + [-122.4798502, 37.8312845], + [-122.4799025, 37.8314139], + [-122.4799654, 37.8315458], + [-122.4800346, 37.8316718], + [-122.4801231, 37.8318137], + [-122.4802112, 37.8319368], + [-122.4803028, 37.8320547], + [-122.4804046, 37.8321657], + [-122.4805121, 37.8322792], + [-122.4805883, 37.8323459], + [-122.4805934, 37.8323502], + [-122.4807146, 37.8323294], + [-122.4808917, 37.832299], + [-122.4809526, 37.8322548], + [-122.4809672, 37.8322442], + [-122.4808396, 37.8321298], + [-122.4807166, 37.8320077], + [-122.4806215, 37.8319052], + [-122.4805254, 37.8317908], + [-122.4804447, 37.8316857], + [-122.4803548, 37.8315539], + [-122.4802858, 37.8314395], + [-122.4802227, 37.8313237], + [-122.4801667, 37.8312051], + [-122.4801133, 37.8310812], + [-122.4800723, 37.8309602], + [-122.4800376, 37.8308265], + [-122.4800087, 37.8307005], + [-122.4799884, 37.8305759], + [-122.4799682, 37.8304181], + [-122.4799501, 37.8302699], + [-122.4798628, 37.8295146], + [-122.4799157, 37.8295107], + [-122.4798451, 37.8289002], + [-122.4798369, 37.828829], + [-122.479784, 37.8288329] + ] + ] + }, + "street": "Golden Gate Bridge", + "has_type": [ + { + "uid": "_:attraction", + "loc_type": "Tourist Attraction" + } + ] + }, + { + "name": "Carriage Inn", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.3441509, 37.5792003], + [-122.3438207, 37.5794257], + [-122.3438987, 37.5794587], + [-122.3442289, 37.5792333], + [-122.3441509, 37.5792003] + ] + ] + }, + "street": "7th street", + "has_type": [ + { + "uid": "_:attraction" + } + ] + }, + { + "name": "San Francisco Zoo", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.5036126, 37.7308562], + [-122.5028991, 37.7305879], + [-122.5028274, 37.7305622], + [-122.5027812, 37.7305477], + [-122.5026992, 37.7305269], + [-122.5026211, 37.7305141], + [-122.5025342, 37.7305081], + [-122.5024478, 37.7305103], + [-122.5023667, 37.7305221], + [-122.5022769, 37.7305423], + [-122.5017546, 37.7307008], + [-122.5006917, 37.7311277], + [-122.4992484, 37.7317075], + [-122.4991414, 37.7317614], + [-122.4990379, 37.7318177], + [-122.4989369, 37.7318762], + [-122.4988408, 37.731938], + [-122.4987386, 37.7320142], + [-122.4986377, 37.732092], + [-122.4978359, 37.7328712], + [-122.4979122, 37.7333232], + [-122.4979485, 37.7333909], + [-122.4980162, 37.7334494], + [-122.4980945, 37.7334801], + [-122.4989553, 37.7337384], + [-122.4990551, 37.7337743], + [-122.4991479, 37.7338184], + [-122.4992482, 37.7338769], + [-122.4993518, 37.7339426], + [-122.4997605, 37.7342142], + [-122.4997578, 37.7343433], + [-122.5001258, 37.7345486], + [-122.5003425, 37.7346621], + [-122.5005576, 37.7347566], + [-122.5007622, 37.7348353], + [-122.500956, 37.7349063], + [-122.5011438, 37.7349706], + [-122.5011677, 37.7349215], + [-122.5013556, 37.7349785], + [-122.5013329, 37.7350294], + [-122.5015181, 37.7350801], + [-122.5017265, 37.7351269], + [-122.5019229, 37.735164], + [-122.5021252, 37.7351953], + [-122.5023116, 37.7352187], + [-122.50246, 37.7352327], + [-122.5026074, 37.7352433], + [-122.5027534, 37.7352501], + [-122.5029253, 37.7352536], + [-122.5029246, 37.735286], + [-122.5033453, 37.7352858], + [-122.5038376, 37.7352855], + [-122.5038374, 37.7352516], + [-122.5054006, 37.7352553], + [-122.5056182, 37.7352867], + [-122.5061792, 37.7352946], + [-122.5061848, 37.7352696], + [-122.5063093, 37.7352671], + [-122.5063297, 37.7352886], + [-122.5064719, 37.7352881], + [-122.5064722, 37.735256], + [-122.506505, 37.7352268], + [-122.5065452, 37.7352287], + [-122.5065508, 37.7351214], + [-122.5065135, 37.7350885], + [-122.5065011, 37.7351479], + [-122.5062471, 37.7351127], + [-122.5059669, 37.7349341], + [-122.5060092, 37.7348205], + [-122.5060405, 37.7347219], + [-122.5060611, 37.734624], + [-122.5060726, 37.7345101], + [-122.5060758, 37.73439], + [-122.5060658, 37.73427], + [-122.5065549, 37.7342676], + [-122.5067262, 37.7340364], + [-122.506795, 37.7340317], + [-122.5068355, 37.733827], + [-122.5068791, 37.7335407], + [-122.5068869, 37.7334106], + [-122.5068877, 37.733281], + [-122.5068713, 37.7329795], + [-122.5068598, 37.7328652], + [-122.506808, 37.7325954], + [-122.5067837, 37.732482], + [-122.5067561, 37.7323727], + [-122.5066387, 37.7319688], + [-122.5066273, 37.731939], + [-122.5066106, 37.7319109], + [-122.506581, 37.7318869], + [-122.5065404, 37.731872], + [-122.5064982, 37.7318679], + [-122.5064615, 37.731878], + [-122.5064297, 37.7318936], + [-122.5063553, 37.7317985], + [-122.5063872, 37.7317679], + [-122.5064106, 37.7317374], + [-122.5064136, 37.7317109], + [-122.5063998, 37.7316828], + [-122.5063753, 37.7316581], + [-122.5061296, 37.7314636], + [-122.5061417, 37.731453], + [-122.5060145, 37.7313791], + [-122.5057839, 37.7312678], + [-122.5054352, 37.7311479], + [-122.5043701, 37.7310447], + [-122.5042805, 37.7310343], + [-122.5041861, 37.7310189], + [-122.5041155, 37.7310037], + [-122.5036126, 37.7308562] + ] + ] + }, + "street": "San Francisco Zoo", + "has_type": [ + { + "uid": "_:zoo", + "loc_type": "Zoo" + } + ] + }, + { + "name": "Flamingo Park", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.5033039, 37.7334601], + [-122.5032811, 37.7334601], + [-122.503261, 37.7334601], + [-122.5032208, 37.7334495], + [-122.5031846, 37.7334357], + [-122.5031806, 37.7334718], + [-122.5031685, 37.7334962], + [-122.5031336, 37.7335078], + [-122.503128, 37.7335189], + [-122.5031222, 37.7335205], + [-122.5030954, 37.7335269], + [-122.5030692, 37.7335444], + [-122.5030699, 37.7335677], + [-122.5030813, 37.7335868], + [-122.5031034, 37.7335948], + [-122.5031511, 37.73359], + [-122.5031933, 37.7335916], + [-122.5032228, 37.7336022], + [-122.5032697, 37.7335937], + [-122.5033194, 37.7335874], + [-122.5033515, 37.7335693], + [-122.5033723, 37.7335518], + [-122.503369, 37.7335068], + [-122.5033603, 37.7334702], + [-122.5033462, 37.7334474], + [-122.5033073, 37.733449], + [-122.5033039, 37.7334601] + ] + ] + }, + "street": "San Francisco Zoo", + "has_type": [ + { + "uid": "_:zoo" + } + ] + }, + { + "name": "Peace Lantern", + "location": { + "type": "Point", + "coordinates": [-122.4705776, 37.7701084] + }, + "street": "Golden Gate Park", + "has_type": [ + { + "uid": "_:attraction" + } + ] + }, + { + "name": "Buddha", + "location": { + "type": "Point", + "coordinates": [-122.469942, 37.7703183] + }, + "street": "Golden Gate Park", + "has_type": [ + { + "uid": "_:attraction" + } + ] + }, + { + "name": "Japanese Tea Garden", + "location": { + "type": "Polygon", + "coordinates": [ + [ + [-122.4692131, 37.7705116], + [-122.4698998, 37.7710069], + [-122.4702431, 37.7710137], + [-122.4707248, 37.7708919], + [-122.4708911, 37.7701541], + [-122.4708428, 37.7700354], + [-122.4703492, 37.7695011], + [-122.4699255, 37.7693989], + [-122.4692131, 37.7705116] + ] + ] + }, + "street": "Golden Gate Park", + "has_type": [ + { + "uid": "_:attraction" + } + ] + } + ] + } + ] +} +``` + +_Note: If this mutation syntax is new to you, refer to the +[first tutorial](/tutorial-1/index.md) to learn the basics of mutations in +Dgraph._ + +Run the query below to fetch the entire graph: + +```graphql +{ + entire_graph(func: has(city)) { + city + has_location { + name + has_type { + loc_type + } + } + } +} +``` + +_Note: Check the [second tutorial](./tutorial-2/index.md) if you want to learn +more about traversal queries like the above one._ + +Here's our graph! + +![full graph](/images/tutorials/8/d-full-graph.png) + +Our graph has: + +- One blue `city node`. We just have one node which represents the city of + `San Francisco`. +- The green ones are the the `location` nodes. We have a total of 13 locations. +- The pink nodes represent the `location types`. We have four kinds of locations + in our dataset: `museum`, `zoo`, `hotel`, and `tourist attractions`. + +You can also see that Dgraph has auto-detected the data types of the predicates +from the schema tab, and the location predicate has been auto-assigned `geo` +type. + +![type detection](/images/tutorials/8/e-schema.png) + +_Note: Check out the [previous tutorial](./tutorial-3/index.md) to know more +about data types in Dgraph._ + +Before we start, please say Hello to `Mary`, a zoologist who has dedicated her +research for the cause of conserving various bird species. + +For the rest of the tutorial, let's help Mary and her team of zoologists in +their mission to conserving birds. + +## Enter San Francisco: Hotel booking + +Several research projects done by Mary suggested that Flamingos thrive better +when there are abundant water bodies for their habitat. + +Her team got approval for expanding the water source for the Flamingos in the +San Francisco Zoo, and her team is ready for a trip to San Francisco with Mary +remotely monitoring the progress of the team. + +Her teammates wish to stay close to the `Golden Gate Bridge` so that they could +cycle around the Golden gate, enjoy the breeze, and the sunrise every morning. + +Let's help them find a hotel which is within a reasonable distance from the +`Golden Gate Bridge`, and we'll do so using Dgraph's geolocation functions. + +Dgraph provides a variety of functions to query geolocation data. To use them, +you have to set the `geo` index first. + +Go to the Schema tab and set the index on the `location` predicate. + +![geo-index](/images/tutorials/8/f-index.png) + +After setting the `geo` index on the `location` predicate, you can use Dgraph's +built-in function `near` to find the hotels near the Golden gate bridge. + +Here is the syntax of the `near` function: +`near(geo-predicate, [long, lat], distance)`. + +The [`near` function](https://dgraph.io/docs/query-language/#near) matches and +returns all the geo-predicates stored in the database which are within +`distance meters` of geojson coordinate `[long, lat]` provided by the user. + +Let's search for hotels within 7KM of from a point on the Golden Gate bridge. + +Go to the query tab, paste the query below and click Run. + +```graphql +{ + find_hotel(func: near(location, [-122.479784,37.82883295],7000) ) { + name + has_type { + loc_type + } + } +} +``` + +![geo-index](/images/tutorials/8/g-near-1.png) + +Wait! The search returns not just the hotels, but also all other locations +within 7 Km from the point coordinate on the `Golden Gate Bridge`. + +Let's use the `@filter` function to filter for search results containing only +the hotels. You can visit our [third tutorial](./tutorial-3/index.md) of the +series to refresh our previous discussions around using the `@filter` directive. + +```graphql +{ + find_hotel(func: near(location, [-122.479784,37.82883295],7000)) { + name + has_type @filter(eq(loc_type, "Hotel")){ + loc_type + } + } +} +``` + +Oops, we forgot to add an index while using the `eq` comparator in the filter. + +![geo-index](/images/tutorials/8/h-near-2.png) + +Let's add a `hash` index to the `loc_type` and re-run the query. + +![geo-index](/images/tutorials/8/i-near-3.png) + +![geo-index](/images/tutorials/8/j-near-4.png) + +_Note: Refer to the [third tutorial](./tutorial-3/index.md) of the series to +learn more about hash index and comparator functions in Dgraph._ + +The search result still contains nodes representing locations which are not +hotels. That's because the root query first finds all the location nodes which +are within 7KM from the specified point location, and then it applies the filter +while selectively traversing to the `location type nodes`. + +Only the predicates in the location nodes can be filtered at the root level, and +you cannot filter the `location types` without traversing to the +`location type nodes`. + +We have the filter to select only the `hotels` while we traverse the +`location type nodes`. Can we cascade or bubble up the filter to the root level, +so that, we only have `hotels` in the final result? + +Yes you can! You can do by using the `@cascade` directive. + +The `@cascade` directive helps you `cascade` or `bubble up` the filters applied +to your inner query traversals to the root level nodes, by doing so, we get only +the locations of `hotels` in our result. + +```graphql +{ + find_hotel(func: near(location, [-122.479784,37.82883295],7000)) @cascade { + name + price_per_night + has_type @filter(eq(loc_type,"Hotel")){ + loc_type + } + } +} +``` + +![geo-index](/images/tutorials/8/k-near-5.png) + +Voila! You can see in the result that, after adding the `@cascade` directive in +the query, only the locations with type `hotel` appear in the result. + +We have two hotels in the result, and one of them is over their budget of 300$ +per night. Let's add another filter to search for Hotels priced below $300 per +night. + +The price information of every hotel is stored in the `location nodes` along +with their coordinates, hence the filter on the pricing should be at the root +level of the query, not at the level we traverse the location type nodes. + +Before you jump onto run the query, don't forget to add an index on the +`price_per_night` predicate. + +![geo-index](/images/tutorials/8/l-float-index.png) + +```graphql +{ + find_hotel(func: near(location, [-122.479784,37.82883295],7000)) @cascade @filter(le(price_per_night, 300)){ + name + price_per_night + has_type @filter(eq(loc_type,"Hotel")){ + loc_type + } + } +} + +``` + +![geo-index](/images/tutorials/8/m-final-result.png) + +Now we have a hotel well within the budget, and also close to the Golden Gate +Bridge! + +## Summary + +In this tutorial, we learned about geolocation capabilities in Dgraph, and +helped Mary's team book a hotel near Golden bridge. + +In the next tutorial, we'll showcase more geolocation functionalities in Dgraph +and assist Mary's team in their quest for conserving Flamingo's. + +See you all in the next tutorial. Till then, happy Graphing! + +Remember to click the "Join our community" button below and subscribe to our +newsletter to get the latest tutorial right into your inbox. + +## What's Next? + +- Go to [Clients](./clients) to see how to communicate with Dgraph from your + application. +- Take the [Tour](https://dgraph.io/tour/) for a guided tour of how to write + queries in Dgraph. +- A wider range of queries can also be found in the + [Query Language](/query-language/_index) reference. +- See [Deploy](./deploy/_index) if you wish to run Dgraph in a cluster. + +## Need Help + +- Please use [discuss.dgraph.io](https://discuss.dgraph.io) for questions, + feature requests, bugs, and discussions. diff --git a/dgraph/reference/learn/data-engineer/index.mdx b/dgraph/reference/learn/data-engineer/index.mdx new file mode 100644 index 00000000..52f2b60b --- /dev/null +++ b/dgraph/reference/learn/data-engineer/index.mdx @@ -0,0 +1,24 @@ +--- +title: Dgraph for data engineers +description: + From learning the basics of graph databases to advanced functions and + capabilities, Dgraph docs have the information you need +--- + +### Recommended learning path + +- See [Dgraph Overview](./dgraph-overview) for an introduction to Dgraph + database and a presentation of Dgraph cluster architecture. +- Get familiar with some terms in the [Glossary](./dgraph-glossary). +- Follow the [Dgraph Query Language(DQL) Quickstart](./dql/dql-get-started) to + execute some queries. +- Take the interactive [DQL Tour](https://dgraph.io/tour/intro/1/). +- Follow the + [Get Started with Dgraph](./learn/data-engineer/get-started-with-dgraph) + tutorial. +- Use [DQL Syntax](./dql/dql-syntax) and + [Query Language](/query-language/_index) as references. +- Go to [Clients](./clients) to see how to communicate with Dgraph from your + application. + +### In this section diff --git a/dgraph/reference/learn/developer/index.mdx b/dgraph/reference/learn/developer/index.mdx new file mode 100644 index 00000000..8690d411 --- /dev/null +++ b/dgraph/reference/learn/developer/index.mdx @@ -0,0 +1,26 @@ +--- +title: Dgraph for application developers +description: + From learning the basics of graph databases to advanced functions and + capabilities, Dgraph docs have the information you need. +--- + +### Recommended learning path + +- See [Dgraph Overview](./dgraph-overview) for an introduction to Dgraph + database and a presentation of Dgraph cluster architecture. +- Get familiar with some terms in the [Glossary](./dgraph-glossary). +- Follow the [GraphQL API Quickstart](./graphql/quick-start) to create a first + API. +- Take the [GraphQL Tour](https://dgraph.io/tour/graphqlintro/). +- Do the [To-Do List App](./todo-app-tutorial) tutorial or the more advanced + [Message Board in React](./learn/developer/react) tutorial. +- Learn how Dgraph extends the + [GraphQL specifications](https://spec.graphql.org/) with + [directives](/graphql/schema/directives) +- Understand how to configure the GraphQL enpoint [Security](/graphql/security). +- Go further by studying how to customize the behavior of GraphQL operations + using [custom resolvers](./custom-overview.md) or to write you own resolver + logic with [Lambda resolvers](./lambda-overview.md). + +### In this section diff --git a/dgraph/reference/learn/developer/react/graphql/design-app-schema.mdx b/dgraph/reference/learn/developer/react/graphql/design-app-schema.mdx new file mode 100644 index 00000000..36f4ba88 --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/design-app-schema.mdx @@ -0,0 +1,168 @@ +--- +title: Design a Schema for the App +description: + "Build a Message Board App in React with Dgraph Learn. Step 2: GraphQL schema + design - how graph schemas and graph queries work." +--- + +In this section, you'll start designing the schema of the message board app and +look at how graph schemas and graph queries work. + +To design the schema, you won't think in terms of tables or joins or documents, +you'll think in terms of entities in your app and how they are linked to make a +graph. Any requirements or design analysis needs iteration and thinking from a +number of perspectives, so you'll work through some of that process and sketch +out where you are going. + +Graphs tend to model domains like your app really nicely because they naturally +model things like the subgraph of a `user`, their `posts` and the `comments` on +those posts, or the network of friends of a user, or the kinds of posts a user +tends to like; so you'll look at how those kinds of graph queries work. + +## UI requirements + +Most apps are more than what you can see on the screen, but UI is what you are +focusing on here, and thinking about the UI you want will help to kick-off your +design process. So, let's at start by looking at what you would like to build +for your app's UI + +Although a single GraphQL query can save you lots of calls and return you a +subgraph of data, a complete page might be built up of blocks that have +different data requirements. For example, in a sketch of your app's UI you can +already see these data requirements forming. + +![App UI requirements](/images/message-board/UI-components.gif) + +You can start to see the building blocks of the UI and some of the entities +(users, categories and posts) that will form the data in your app. + +## Thinking in Graphs + +Designing a graph schema is about designing the things, or entities, that will +form nodes in the graph, and designing the shape of the graph, or what links +those entities have to other entities. + +There's really two concepts in play here. One is the data itself, often called +the application data graph. The other is the schema, which is itself graph +shaped but really forms the pattern for the data graph. You can think of the +difference as somewhat similar to objects (or data structure definitions) versus +instances in a program, or a relational database schema versus rows of actual +data. + +Already you can start to tease out what some of the types of data and +relationships in your graph are. There's users who post posts, so you know +there's a relationship between users and the posts they've made. You know the +posts are going to be assigned to some set of categories and that each post +might have a list of comments posted by users. + +So your schema is going to have these kinds of entities and relationships +between them. +![Graph schema sketch](/images/message-board/schema-inital-sketch.png) + +I've borrowed some notation from other data modeling patterns here. That's +pretty much the modeling capability GraphQL allows, so let's start sketching +with it for now. + +A `user` is going to have some number of (zero or more `0..*`) `posts` and a +`post` can have exactly one `author`. A `post` can be in only a single +`category`, which, in turn, can contain many `posts`. + +How does that translate into the application data graph? Let's sketch out some +examples. + +Let's start with a single user who's posted three posts into a couple of +different categories. Your graph might start looking like this. + +![first-posts-in-graph](/images/message-board/first-posts-in-graph.png) + +Then another user joins and makes some posts. Your graph gets a bit bigger and +more interesting, but the types of things in the graph and the links they can +have follow what the schema sets out as the pattern --- for example you aren't +linking users to categories. +![more users and posts](/images/message-board/user2-posts-in-graph.png) + +Next the users read some posts and start making and replying to comments. +![users, posts and comments](/images/message-board/comments-in-graph.png) + +Each node in the graph will have the data (a bit like a document) that the +schema says it can have, maybe a username for users and title, text and date +published for posts, and the links to other nodes (the shape of the graph) as +per what the schema allows. + +While you are still sketching things out here, let's take a look at how queries +will work. + +## How graph queries work + +Graph queries in GraphQL are really about entry points and traversals. A query +picks certain nodes as a starting point and then selects data from the nodes or +follows edges to traverse to other nodes. + +For example, to render a user's information, you might need only to find the +user. So your use of the graph might be like in the following sketch --- you'll +find the user as an entry point into the graph, perhaps from searching users by +username, query some of their data, but not traverse any further. +![query a user](/images/message-board/user1-search-in-graph.png) + +Often, though, even in just presenting a user's information, you need to present +information like most recent activity or sum up interest in recent posts. So +it's more likely that you'll start by finding the user as an entry point and +then traversing some edges in the graph to explore a subgraph of interesting +data. That might look like this traversal, starting at the user and then +following edges to their posts. + +![query a user and their posts](/images/message-board/user1-post-search-in-graph.png) + +You can really start to see that traversal when it comes to rendering an +individual post. You'll need to find the post, probably by its id when a user +navigates to a url like `/post/0x2`, then you'll follow edges to the post's +author and category, but you'll also need to follow the edges to all the +comments, and from there to the authors of the comments. That'll be a multi-step +traversal like the following sketch. +![query a post and follow edges](/images/message-board/post2-search-in-graph.png) + +Graphs make these kinds of data traversals really clear, as compared to table +joins or navigating your way through a RESTful API. It can also really help to +jot down a quick sketch. + +It's also possible for a query to have multiple entry points and traversals from +all of those entry points. Imagine, for example, the query that renders the post +list on the main page. That's a query that finds multiple posts, maybe ordered +by date or from particular categories, and then, for each, traverses to the +author, category, etc. + +You can now begin to see the GraphQL queries needed to fill out the UI. For +example, in the sketch at the top, there will be a query starting at the logged +in user to find their details, a query finding all the category nodes to fill +out the category dropdown, and a more complex query that will find a number of +posts and make traversals to find the posts' authors and categories. + +## Schema + +Now that you have investigated and considered what you are going to show for +posts and users, you can start to flesh out your schema some more. + +Posts, for example, are going to need a title and some text for the post, both +string valued. Posts will also need some sort of date to record when they were +uploaded. They'll also need links to the author, category and a list of +comments. + +The next iteration of your schema might look like this sketch. +![Graph schema +schema sketch with data](/images/message-board/schema-sketch.png) + +That's your first cut at a schema --- the pattern your application data graph +will follow. + +You'll keep iterating on this as you work through the tutorial, that's what +you'd do in building an app, no use pretending like you have all the answers at +the start. Eventually, you'll want to add likes and dislikes on the posts, maybe +also tags, and you'll also layer in a permissions system so some categories will +require permissions to view. But, those topics are for later sections in the +tutorial. This is enough to start building with. + +## What's next + +Next you'll make your design concrete, by writing it down as a GraphQL schema, +and upload that to Dgraph Cloud. That'll give you a running GraphQL API and +you'll look at the queries and mutations that will form the data of your app. diff --git a/dgraph/reference/learn/developer/react/graphql/graphql-operations.mdx b/dgraph/reference/learn/developer/react/graphql/graphql-operations.mdx new file mode 100644 index 00000000..7534fcb4 --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/graphql-operations.mdx @@ -0,0 +1,45 @@ +--- +title: GraphQL Operations +description: + Using your schema, Dgraph Cloud generated ways to interact with the graph. In + GraphQL, the API can be inspected with introspection queries. +--- + +The schema that you developed and deployed to Dgraph Cloud in the previous +sections was about the types in our domain and the shape of the application data +graph. From that, Dgraph Cloud generated some ways to interact with the graph. +GraphQL supports the following _operations_, which provide different ways to +interact with a graph: + +- **queries**: used to find a starting point and traverse a subgraph +- **mutations**: used to change the graph and return a result +- **subscriptions**: used to listen for changes in the graph + +In GraphQL, the API can be inspected with special queries called _introspection +queries_. Introspection queries are a type of GraphQL query that provides the +best way to find out what operations you can perform with a GraphQL API. + +## Introspection + +Many GraphQL tools support introspection and generate documentation to help you +explore an API. There are several tools in the GraphQL ecosystem you can use to +explore an API, including +[GraphQL Playground](https://github.com/prisma-labs/graphql-playground), +[Insomnia](https://insomnia.rest/), +[GraphiQL](https://github.com/graphql/graphiql), +[Postman](https://www.postman.com/graphql/), and +[Altair](https://github.com/imolorhe/altair). + +You can also explore your GraphQL API using the API explorer that's included in +the Dgraph Cloud web UI. Navigate to the **GraphQL** tab where you can access +the introspected schema from the "Documentation Explorer" in the right menu. + +_![Dgraph Cloud Schema Explorer](/images/message-board/dgraph-cloud-schema-explorer.png)_ + +From there, you can click through to the queries and mutations and check out the +API. For example, this API includes mutations to add, update and delete users, +posts and comments. + +Next, you'll learn more about the API that Dgraph Cloud created from the schema +by trying out the same kind of queries and mutations you'll use to build the +message board app. diff --git a/dgraph/reference/learn/developer/react/graphql/graphql-schema.mdx b/dgraph/reference/learn/developer/react/graphql/graphql-schema.mdx new file mode 100644 index 00000000..3acec863 --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/graphql-schema.mdx @@ -0,0 +1,258 @@ +--- +title: GraphQL Schema +description: + "How to Build a Message Board App in React. Step 2: GraphQL schema - translate + the schema design to the GraphQL SDL (Schema Definition Language)." +--- + +In this section, you'll learn about how to translate the schema design to the +GraphQL SDL (Schema Definition Language). + +## App Schema + +In the schema design section, you saw the following sketch of a graph schema for +the example message board app: +![data model sketch](/images/message-board/schema-sketch.png) + +Using the GraphQL SDL, Dgraph Cloud generates a running GraphQL API from the +description of a schema as GraphQL types. There are two different aspects of a +GraphQL schema: + +- **Type Definitions**: these define the things included in a graph and the + shape of the graph. In this tutorial, you will derive the type definitions + from the sketch shown above. +- **Operations**: these define what you can do in the graph using the API, like + the search and traversal examples in the previous section. Initially, Dgraph + Cloud will generate create, read, update and destroy (CRUD) operations for + your API. Later in this the tutorial, you'll learn how to define other + operations for your schema. + +You'll start by learning about the GraphQL SDL and then translate the app schema +sketch into GraphQL SDL. + +## GraphQL Schema + +The input schema to Dgraph Cloud is a GraphQL schema fragment that contains type +definitions. Dgraph Cloud builds a GraphQL API from those definitions. + +This input schema can contain types, search directives, IDs, and relationships. + +### Types + +Dgraph Cloud supports pre-defined scalar types (including `Int`, `String`, +`Float` and `DateTime`) and a schema can define any number of other types. For +example, you can start to define the `Post` type in the GraphQL SDL by +translating the following from the app schema sketch shown above: + +```graphql +type Post { + title: String + text: String + datePublished: DateTime + author: User + ... +} +``` + +A `type TypeName { ... }` definition defines a kind of node in your graph. In +this case, `Post` nodes. It also gives those nodes what GraphQL calls _fields_, +which define a node's data values. Those fields can be scalar values: in this +case a `title`, `text` and `datePublished`. They can also be links to other +nodes: in this case the `author` edge must link to a node of type `User`. + +Edges in the graph can be either singular or multiple. If a field is a name and +a type, like `author: User`, then a post can have a single `author` edge. If a +field uses the list notation with square brackets (for example +`comments: [Comment]`), then a post can have multiple `comments` edges. + +GraphQL allows the schema to mark some fields as required. For example, you +might decide that all users must have a username, but that users aren't required +to set a preferred display name. If the display name is null, your app can +choose to display the username instead. In GraphQL, required fields are marked +using an exclamation mark (`!`) annotation after the field's type. + +So, to guarantee that `username` will never be null, but allow `displayName` to +be null, you would define the `User` type as follows in your schema: + +```graphql +type User { + username: String! + displayName: String + ... +} +``` + +This annotation carries over to lists, so `comments: [Comment]` would allow both +a null list and a list with some nulls in it, while `comments: [Comment!]!` will +never allow either a null comments list, nor will it allow a list with that +contains any null values. The `!` notation lets your UI code make some +simplifying assumptions about the data that the API returns, reducing the need +for client-side error handling. + +### Search + +The GraphQL SDL syntax shown above describes your types and the shape of your +application data graph, and you can start to make a pretty faithful translation +of the types in your schema design. However, there's a bit more that you'll need +in the API for this app. + +As well as the shape of the graph, you can use GraphQL directives to tell Dgraph +Cloud some more about how to interpret the graph and what features you'd like in +the GraphQL API. Dgraph Cloud uses this information to specialize the GraphQL +API to fit the requirements of your app. + +For example, with just the type definition, Dgraph Cloud doesn't know what kinds +of search you need your API to support. Adding the `@search` directive to the +schema tells Dgraph Cloud about the search needed. The following schema example +shows two ways to add search directives. + +```graphql +type Post { + ... + title: String! @search(by: [term]) + text: String! @search(by: [fulltext]) + ... +} +``` + +These search directives tell Dgraph Cloud that you want your API support +searching posts by title using terms, and searching post text using full-text +search. This syntax supports searches like "all the posts with GraphQL in the +title" and broader search-engine style searches like "all the posts about +developing GraphQL apps". + +### IDs + +Dgraph Cloud supports two types of identifiers: an `ID` type that gives +auto-generated 64-bit IDs, and an `@id` directive that allows external IDs to be +used for IDs. + +`ID` and `@id` have different purposes, as illustrated by their use in this app: + +- `ID` is best for things like posts that need a uniquely-generated ID. +- `@id` is best for types, like `User`, where the ID (their username) is + supplied by the user. + +```graphql +type Post { + id: ID! + ... +} + +type User { + username: String! @id + ... +} +``` + +A post's `id: ID` gives each post an auto-generated ID. For users, you'll need a +bit more. The `username` field should be unique; in fact, it should be the id +for a user. Adding the `@id` directive like `username: String! @id` tells Dgraph +Cloud GraphQL that `username` should be a unique ID for the `User` type. Dgraph +Cloud GraphQL will then generate the GraphQL API such that `username` is treated +as an ID, and ensure that usernames are unique. + +### Relationships + +A critical part of understanding GraphQL is learning how it handles +relationships. A GraphQL schema based around types like those in the following +example schema types specifies that an author has some posts and each post has +an author, but the schema doesn't connect them as a two-way edge in the graph. +So in this case, your app can't assume that the posts it can reach from a +particular author all have that author as the value of their `author` edge. + +```graphql +type User { + ... + posts: [Post!]! +} + +type Post { + ... + author: User! +} +``` + +GraphQL schemas are always under-specified in this way. It's left up to the +documentation and implementation to make a two-way connection, if it exists. +There might be multiple connections between two types; for example, an author +might also be linked to the the posts they have commented on. So, it makes sense +that you need something other than just the types as defined above to specify +two-way edges. + +With Dgraph Cloud you can specify two-way edges by adding the `@hasInverse` +directive. Two-way edges help your app to untangle situations where types have +multiple edges. For example, you might need to make sure that the relationship +between the posts that a user has authored and the ones they've liked are linked +correctly. + +```graphql +type User { + ... + posts: [Post!]! + liked: [Post!]! +} + +type Post { + ... + author: User! @hasInverse(field: posts) + likedBy: [User!]! @hasInverse(field: liked) +} +``` + +The `@hasInverse` directive is only needed on one end of a two-way edge, but you +can add it at both ends if that adds clarity to your documentation and makes +your schema more "human-readable". + +## Final schema + +Working through the four types in the schema sketch, and then adding `@search` +and `@hasInverse` directives, yields the following schema for your app. + +```graphql +type User { + username: String! @id + displayName: String + avatarImg: String + posts: [Post!] + comments: [Comment!] +} + +type Post { + id: ID! + title: String! @search(by: [term]) + text: String! @search(by: [fulltext]) + tags: String @search(by: [term]) + datePublished: DateTime + author: User! @hasInverse(field: posts) + category: Category! @hasInverse(field: posts) + comments: [Comment!] +} + +type Comment { + id: ID! + text: String! + commentsOn: Post! @hasInverse(field: comments) + author: User! @hasInverse(field: comments) +} + +type Category { + id: ID! + name: String! @search(by: [term]) + posts: [Post!] +} +``` + +Dgraph Cloud is built to allow for iteration of your schema. I'm sure you've +picked up things that could be added to enhance this example app, i.e., the +ability to add up and down votes, or to add "likes" to posts. In this tutorial, +we discuss adding new features using an iterative approach. This approach is the +same one that you take when working on your own project: start by building a +minimal working version, and then iterate from there. + +Some iterations, such as adding likes, will just require a schema change; Dgraph +Cloud GraphQL will update very rapidly to adjust to this change. Some +iterations, such as adding a `@search` directive to comments, can be done by +extending the schema. This will cause Dgraph Cloud to index the new data and +then update the API. Very large iterations, such as extending the model to +include a history of edits on a post, might require a data migration. diff --git a/dgraph/reference/learn/developer/react/graphql/index.mdx b/dgraph/reference/learn/developer/react/graphql/index.mdx new file mode 100644 index 00000000..ffde5eed --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/index.mdx @@ -0,0 +1,18 @@ +--- +title: Working in GraphQL +description: + "Developing a Message Board App in React with Dgraph Learn. Step 2: GraphQL + schema design and loading, queries, and mutations." +--- + +To build an app with Dgraph Cloud, you design your application in GraphQL. You +design a set of GraphQL types that describes the app's data requirements. Dgraph +Cloud GraphQL takes those types, prepares graph storage for them and generates a +GraphQL API with queries and mutations. + +In this section of the tutorial, you'll walk through the process of designing a +schema for a message board app, loading the GraphQL schema into Dgraph Cloud, +and then working through the queries and mutations that Dgraph Cloud makes +available from that schema. + +### In this section diff --git a/dgraph/reference/learn/developer/react/graphql/load-schema-to-dgraph-cloud.mdx b/dgraph/reference/learn/developer/react/graphql/load-schema-to-dgraph-cloud.mdx new file mode 100644 index 00000000..08408839 --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/load-schema-to-dgraph-cloud.mdx @@ -0,0 +1,20 @@ +--- +title: Deploy the Schema +description: + "Building a Message Board App in React. Step 2: With the schema defined, it’s + just one step to get a running GraphQL backend for the app." +--- + +With the schema defined, it's just one step to get a running GraphQL backend for +the app. + +Copy the schema, navigate to the **Schema** tab in Dgraph Cloud, paste the +schema in, and press **Deploy**. + +![Deploy Dgraph Cloud schema](/images/message-board/dgraph-cloud-deploy-schema-success.png) + +As soon as the schema is added, Dgraph Cloud generates and deploys a GraphQL API +for the app. + +Next you'll learn about GraphQL operations like queries, mutations and +subscriptions. diff --git a/dgraph/reference/learn/developer/react/graphql/react-graphql-mutations.mdx b/dgraph/reference/learn/developer/react/graphql/react-graphql-mutations.mdx new file mode 100644 index 00000000..d89eb24a --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/react-graphql-mutations.mdx @@ -0,0 +1,302 @@ +--- +title: GraphQL Mutations +description: + Before you can query, you need to add data. Use GraphQL Mutations to add a + user, category, posts, and sample data. +--- + +It'd be great to start by writing some queries to explore the graph of the app, +like you did in the sketches exploring graph ideas, but there's no data yet, so +queries won't be much use. So instead, you'll start by adding data. + +## Add a User + +GraphQL mutations have the useful property of returning a result. That result +can help your UI, for example, to re-render a page without further queries. +Dgraph Cloud lets the result returned from a mutation be as expressive as any +graph traversal. + +Let's start by adding a single test user. The user type has the following +fields: `username`, `displayName`, `avatarImg`, `posts` and `comments`, as shown +below: + +```graphql +type User { + username: String! @id + displayName: String + avatarImg: String + posts: [Post!] + comments: [Comment!] +} +``` + +In this example, the only required field (marked with `!`) is the username. So, +to generate a new user node in the graph, we only need to supply a username. + +The sample app's GraphQL API includes an `addUser` mutation, which can be used +to add multiple users and their data in a single operation. You can add one user +and their `username` with the following mutation: + +```graphql +mutation { + addUser(input: [{ username: "User1" }]) { + user { + username + displayName + } + } +} +``` + +The `mutation` keyword tells a GraphQL server that it's running a mutation. The +mutation (in this case, `addUser`) takes the provided arguments and adds that +data to the graph. + +The mutation shown above adds a single user, `User1`, and returns the newly +created user's `username` and `displayName`. The `displayName` will be `null` +because you didn't provide that data. This user also has no `posts` or +`avatarImg`, but we aren't asking for those in the result. Here's how it looks +when run in the Dgraph Cloud API Explorer. + +![run a mutation in Dgraph Cloud](/images/message-board/dgraph-cloud-add-a-user.png) + +## Add a category + +The graph now has a single node. Next, you'll add a category using the +`addCategory` mutation. Categories are a little different than users because the +id is auto-generated by Dgraph Cloud. The following mutation creates the +category and returns its `name` and the `id` Dgraph Cloud gave it. + +```graphql +mutation { + addCategory(input: [{ name: "Category1" }]) { + category { + id + name + } + } +} +``` + +When run in Dgraph Cloud's API Explorer, the mutation looks as shown below. Note +that the category's `id` is auto generated and will be different on any +execution of the mutation. + +![run a mutation with auto generated id in Dgraph Cloud](/images/message-board/dgraph-cloud-add-a-category.png) + +## Add Some Posts + +Dgraph Cloud can do more than add single graph nodes at a time. The mutations +can add whole subgraphs and link into the existing graph. To show this, let's do +a few things at once. Remember our first sketch of some graph data? + +![sub graph with first posts](/images/message-board/first-posts-in-graph.png) + +At the moment we only have the `User1` and `Category1` nodes. It's not much of a +graph, so let's flesh out the rest of the graph in a single mutation. We'll use +the `addPost` mutation to add the three posts, link all the posts to `User1`, +link posts 2 and 3 to the existing category, and then create `Category2`. And, +you'll do all of this in a single operation using the following mutation: + +```graphql +mutation { + addPost( + input: [ + { + title: "Post1" + text: "Post1" + author: { username: "User1" } + category: { name: "Category2" } + } + { + title: "Post2" + text: "Post2" + author: { username: "User1" } + category: { id: "0xfffd8d6ab6e7890a" } + } + { + title: "Post3" + text: "Post3" + author: { username: "User1" } + category: { id: "0xfffd8d6ab6e7890a" } + } + ] + ) { + post { + id + title + author { + username + } + category { + name + } + } + } +} +``` + +Because categories are referenced by an auto-generated `ID`, when you run such a +mutation, you'll need to make sure that you use the right id value for +`Category1` --- in my run that was `0xfffd8d6ab6e7890a`, but yours might differ. +In the Dgraph Cloud API explorer, that mutation looked like this: + +![dgraph-cloud-deep-mutations](/images/message-board/dgraph-cloud-deep-mutations.png) + +A real app probably wouldn't add multiple posts in that way, but this example +shows the what you can do with mutations in Dgraph Cloud. For example, you could +create a shopping cart and add the first items to that cart in a single +mutation. + +The input format to Dgraph Cloud also shows you another important property that +helps you when you are building an app: serialization of data. In general, you +can serialize your data structures, send them to Dgraph Cloud and it mutates the +graph. So, you don't need to programmatically add single objects from the client +or work out which bits are in the graph and which aren't --- just serialize the +data and Dgraph Cloud works it out. Dgraph Cloud uses the id's in the data to +work out how to connect the new data into the existing graph. + +## Add sample data + +You can run some more mutations, add more users or posts, or add comments to the +posts. To get you started, here's a mutation that adds some more data that we +can use to explore GraphQL queries in the following section. + +```graphql +mutation { + addPost( + input: [ + { + title: "A Post about Dgraph Cloud" + text: "Develop a GraphQL app" + author: { username: "User1" } + category: { id: "0xfffd8d6ab6e7890a" } + } + { + title: "A Post about Dgraph" + text: "It's a GraphQL database" + author: { username: "User1" } + category: { id: "0xfffd8d6ab6e7890a" } + } + { + title: "A Post about GraphQL" + text: "Nice technology for an app" + author: { username: "User1" } + category: { id: "0xfffd8d6ab6e7890a" } + } + ] + ) { + post { + id + title + } + } +} +``` + +## GraphQL Variables + +A mutation that takes data in its arguments is great to try out in a UI tool, +but an app needs to connect the data in its internal data structures to the API +without building complex query strings. GraphQL _Query Variables_ let a query or +mutation depend on input values that are resolved at run-time. + +For example, the following `addOnePost` mutation requires an input `$post` (of +type `post`) that it then passes as the `input` argument to the `addPost` +mutation: + +```graphql +mutation addOnePost($post: AddPostInput!) { + addPost(input: [$post]) { + post { + id + title + text + } + } +} +``` + +Running this mutation requires a packet of JSON that supplies a value for the +needed variable, as follows: + +```json +{ + "post": { + "title": "GraphQL Variables", + "text": "This post uses variables to input data", + "author": { "username": "User1" }, + "category": { "id": "0xfffd8d6ab6e7890a" } + } +} +``` + +In Dgraph Cloud's UI, there's a **Query Variables** tab that you can use to +enter the variables. + +![Mutation with GraphQL Variables](/images/message-board/graphql-variables.png) + +GraphQL variables let an app depend on a fixed mutation string and simply inject +the actual data into the mutation when it's executed, meaning same mutation can +be used over and over with different data. + +## Mutations used in the App + +The app always uses GraphQL variables so that there's a small set of mutations +and the data can be supplied by serializing client-side data structures. + +The app will need a mutation to add users: + +```graphql +mutation ($username: String!) { + addUser(input: [{ username: $username }]) { + user { + username + } + } +} +``` + +It will also need a mutation to add posts: + +```graphql +mutation addPost($post: AddPostInput!) { + addPost(input: [$post]) { + post { + id + # ... and other post data + } + } +} +``` + +It will need a mutation to add comments: + +```graphql +mutation addComment($comment: AddCommentInput!) { + addComment(input: [$comment]) { + comment { + id + # ... and other comment data + } + } +} +``` + +And finally, it will need a mutation to update posts: + +```graphql +mutation updatePost($id: ID!, $post: PostPatch) { + updatePost(input: { filter: { id: [$id] }, set: $post }) { + post { + id + # ... and other post data + } + } +} +``` + +The `updatePost` mutation combines a search and mutation into one. The mutation +first finds the posts to update (the `filter`) and then sets new values for the +post's fields with the `set` argument. To learn how the `filter` works, let's +look at how Dgraph Cloud handles queries. diff --git a/dgraph/reference/learn/developer/react/graphql/react-graphql-queries.mdx b/dgraph/reference/learn/developer/react/graphql/react-graphql-queries.mdx new file mode 100644 index 00000000..9a73dbab --- /dev/null +++ b/dgraph/reference/learn/developer/react/graphql/react-graphql-queries.mdx @@ -0,0 +1,302 @@ +--- +title: GraphQL Queries +description: + GraphQL queries are about starting points and traversals. From simple queries + to deep filters, dive into the queries use in the message board app. +--- + +As we learned earlier, GraphQL queries are about starting points and traversals. +For example, a query can start by finding a post, and then traversing edges from +that post to find the author, category, comments and authors of all the +comments. +![query a post and follow relationships](/images/message-board/post2-search-in-graph.png) + +## Dgraph Cloud Query + +In the API that Dgraph Cloud built from the schema, queries are named for the +types that they let you query: `queryPost`, `queryUser`, etc. A query starts +with, for example, `queryPost` or by filtering to some subset of posts like +`queryPost(filter: ...)`. This defines a starting set of nodes in the graph. +From there, your query traverses into the graph and returns the subgraph it +finds. You can try this out with some example queries in the next section. + +## Simple Queries + +The simplest queries find some nodes and only return data about those nodes, +without traversing further into the graph. The query `queryUser` finds all +users. From those nodes, we can query the usernames as follows: + +```graphql +query { + queryUser { + username + } +} +``` + +The result will depend on how many users you have added. If it's just the +`User1` sample, then you'll get a result like the following: + +```json +{ + "data": { + "queryUser": [ + { + "username": "User1" + } + ] + } +} +``` + +That says that the `data` returned is about the `queryUser` query that was +executed and here's an array of JSON about those users. + +## Query by identifier + +Because `username` is an identifier, there's also a query that finds users by +ID. To grab the data for a single user if you already know their ID, use the +following query: + +```graphql +query { + getUser(username: "User1") { + username + } +} +``` + +This time the query returns a single object, instead of an array. + +```json +{ + "data": { + "getUser": { + "username": "User1" + } + } +} +``` + +## Query with traversal + +Let's do a bit more traversal into the graph. In the example app's UI you can +display the homepage of a user. You might need to find a user's data and some of +their posts. + +![Graph schema sketch](/images/message-board/user1-post-search-in-graph.png) + +Using GraphQL, you can get the same data using the following query: + +```graphql +query { + getUser(username: "User1") { + username + displayName + posts { + title + } + } +} +``` + +This query finds `User1` as the starting point, grabs the `username` and +`displayName`, and then traverses into the graph following the `posts` edges to +get the titles of all the user's posts. + +A query could step further into the graph, finding the category of every post, +like this: + +```graphql +query { + getUser(username: "User1") { + username + displayName + posts { + title + category { + name + } + } + } +} +``` + +Or, a query could traverse even deeper to get the comments on every post and the +authors of those comments, as follows: + +```graphql +query { + getUser(username: "User1") { + username + displayName + posts { + title + category { + name + } + comments { + text + author { + username + } + } + } + } +} +``` + +## Querying with filters + +To render the app's home screen, the app need to find a list of posts. Knowing +how to find starting points in the graph and traverse with a query means we can +use the following query to grab enough data to display a post list for the home +screen: + +```graphql +query { + queryPost { + id + title + author { + username + } + category { + name + } + } +} +``` + +We'll also want to limit the number of posts displayed, and order them. For +example, we probably want to limit the number of posts displayed (at least until +the user scrolls) and maybe order them from newest to oldest. + +This can be accomplished by passing arguments to `queryPost` that specify how we +want the result sorted and paginated. + +```graphql +query { + queryPost(order: { desc: datePublished }, first: 10) { + id + title + author { + username + } + category { + name + } + } +} +``` + +The UI for your app also lets users search for posts. To support this, you added +`@search(by: [term])` to your schema so that Dgraph Cloud would build an API for +searching posts. The nodes found as the starting points in `queryPost` can be +filtered down to match only a subset of posts that have the term "graphql" in +the title by adding `filter: { title: { anyofterms: "graphql" }}` to the query, +as follows: + +```graphql +query { + queryPost( + filter: { title: { anyofterms: "graphql" } } + order: { desc: datePublished } + first: 10 + ) { + id + title + author { + username + } + category { + name + } + } +} +``` + +## Querying with deep filters + +The same filtering works during a traversal. For example, we can combine the +queries we have seen so far to find `User1`, and then traverse to their posts, +but only return those posts that have "graphql" in the title. + +```graphql +query { + getUser(username: "User1") { + username + displayName + posts(filter: { title: { anyofterms: "graphql" } }) { + title + category { + name + } + } + } +} +``` + +Dgraph Cloud builds filters and ordering into the GraphQL API depending on the +types and the placement of the `@search` directive in the schema. Those filters +are then available at any depth in a query, or in returning results from +mutations. + +## Queries used in the message board app + +The message board app used in this tutorial uses a variety of queries, some of +which are described and shown below: + +The following query gets a user's information: + +```graphql +query getUser($username: String!) { + getUser(username: $username) { + username + displayName + avatarImg + } +} +``` + +The following query gets all categories. It is used to render the categories +selector on the main page, and to allow a user to select categories when adding +new posts: + +```graphql +query { + queryCategory { + id + name + } +} +``` + +The followings gets an individual post's data when a user navigates to the +post's URL: + +```graphql +query getPost($id: ID!) { + getPost(id: $id) { + id + title + text + datePublished + author { + username + displayName + avatarImg + } + comments { + text + author { + username + displayName + avatarImg + } + } + } +} +``` + +Next, you'll learn how to build your app's React UI. diff --git a/dgraph/reference/learn/developer/react/index.mdx b/dgraph/reference/learn/developer/react/index.mdx new file mode 100644 index 00000000..e18c46f0 --- /dev/null +++ b/dgraph/reference/learn/developer/react/index.mdx @@ -0,0 +1,23 @@ +--- +title: Message Board in React +description: + From learning the basics of graph databases to advanced functions and + capabilities, Dgraph docs have the information you need +--- + +In this tutorial, you will start by looking at the app you are going to build +and how such an app works in Dgraph Cloud. Then, the tutorial moves on to schema +design with GraphQL, implementing a UI. + +You can complete this tutorial using [Dgraph Cloud](https://cloud.dgraph.io/). + +### Learning goals + +In this tutorial, you will learn how to do the following: + +- Dgraph Cloud basics +- Schema design with GraphQL +- GraphQL queries and mutations +- Building a React UI with GraphQL + +### In this section diff --git a/dgraph/reference/learn/developer/react/react-conclusion.mdx b/dgraph/reference/learn/developer/react/react-conclusion.mdx new file mode 100644 index 00000000..82104649 --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-conclusion.mdx @@ -0,0 +1,32 @@ +--- +title: Conclusion +description: + This is the end of the Dgraph Learn course - Build a Message Board App in + React. But there's still more to learn. +--- + +Congratulations on finishing the Dgraph Learn course: **Build a Message Board +App in React**! + +You’ve now learned how to add much of the core functionality to your React app. +In the advanced course (coming soon), you’ll add login and authorization, +subscriptions, and custom logic to this app. + +Playing with your app and taking your code to the next level? Be sure to share +your creations on our [discussion boards](https://discuss.dgraph.io). We love to +see what you’re working on, and feel free to ask any questions - our team of +developers typically respond within 30 minutes! + +We hope you’ve enjoyed playing with [Dgraph Cloud](https://dgraph.io/cloud) for +this course. A fully-managed GraphQL backend database service, Dgraph Cloud lets +you focus on building apps, not managing infrastructure. Be sure to check out +our [example apps](/sample-apps/) - tinker with the source code and get a feel +for how it all works. Then build your own creation! + +Want to learn more? Check out [another course](/courses/), or head over to our +[docs site](https://dgraph.io/docs). Want to request a tutorial? +[Drop us a comment](https://discuss.dgraph.io/t/dgraph-learn/11969)! + +And if you have been reading along but haven't tried it yet, +[try Dgraph Cloud](https://cloud.dgraph.io/) to launch your first hosted GraphQL +database. diff --git a/dgraph/reference/learn/developer/react/react-introduction.mdx b/dgraph/reference/learn/developer/react/react-introduction.mdx new file mode 100644 index 00000000..9602ad92 --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-introduction.mdx @@ -0,0 +1,109 @@ +--- +title: Introduction +description: + Learn to deploy a GraphQL Backend, design a schema, and implement a React UI. + This 2-hour course walks you through it +--- + +This tutorial walks you through building a reasonably complete message board +app. We selected this app because it's familiar and easy enough to grasp the +schema at a glance, but also extensible enough to add things like integrated +authorization and real-time updates with subscriptions. + +## The App + +This app is designed to manage lists of posts in in different categories. A home +page lets each user view a feed of posts, as follows: + +![message board app main page](/images/message-board/main-screenshot.png) + +This app will use Dgraph Cloud's built-in authorization to allow public posts +that anyone can see (even without logging in) but restrict posting messages to +users who are logged-in. We'll also make some categories private, hiding them +from any users who haven't been granted viewer permissions. Users who are +logged-in can create new posts, and each post can have a stream of comments from +other users. A post is rendered on its own page, as follows: + +![App post page](/images/message-board/post-screenshot.png) + +This app will be completely serverless app: + +- Dgraph Cloud provides the "backend": a Dgraph database in a fully-managed + environment +- Auth0 provides serverless authentication +- Netlify is used to deploy the app UI (the "frontend") + +## Why use GraphQL? + +You can build an app using any number of technologies, so why is GraphQL a good +choice for this app? + +GraphQL is a good choice in many situations, but particularly where the app data +is inherently a _graph_ (a network of data nodes) and where GraphQL queries let +us reduce the complexity of the UI code. + +In this case, both are true. The data for the app is itself a graph; it's about +`users`, `posts` and `comments`, and the links between them. You'll naturally +want to explore that data graph as you work with the app, so GraphQL makes a +great choice. Also, in rendering the UI, GraphQL removes some complexity for +you. + +If you built this app with REST APIs, for example, our clients (i.e., Web and +mobile) will have to programmatically manage getting all the data to render a +page. So, to render a post using a REST API, you will probably need to access +the `/post/{id}` endpoint to get the post itself, then the +`/comment?postid={id}` endpoint to get the comments, and then (iteratively for +each comment) access the `/author/{id}` endpoint. You would have to collect the +data from those endpoints, discard extra data, and then build a data structure +to render the UI. This approach requires different code in each version of the +app, and increases the engineering effort required and the opportunity for bugs +to occur in our app. + +With GraphQL, rendering a page is much simpler. You can run a single GraphQL +query that gets all of the data for a post (its comments and the authors of +those comments) and then simply lay out the page from the returned JSON. GraphQL +gives you a query language to explore the app's data graph, instead of having to +write code to build the data structure that you need from a series of REST API +calls. + +## Why Dgraph Cloud? + +Dgraph Cloud lets you build a GraphQL API for your app with just a GraphQL +schema, and it gets you to a running GraphQL API faster than any other tool. + +Often, a hybrid model is used where a GraphQL API is layered over a REST API or +over a document or relational database. So, in those cases, the GraphQL layer +sits over other data sources and issues many queries to translate the REST or +relational data into something that looks like a graph. There's a cognitive jump +there, because your app is about a graph, but you need to design a relational +schema and work out how that translates into a graph. So, you'll think about the +app in terms of the graph data model, but always have to mentally translate back +and forth between the relational and graph models. This translation presents +engineering challenges, as well as an impact to query efficiency. + +You don't have any of these engineering challenges with Dgraph Cloud. + +Dgraph Cloud provides a fully-managed Dgraph database that stores all data +natively as a graph; it's a database of nodes and edges, with no relational +database running in the background. Compared to a hybrid model, Dgraph lets you +efficiently store, query and traverse data as a graph. Your data will get stored +just like you design it in the schema, and app queries are a single graph query +that fetches data in a format that can be readily consumed by your app. + +With Dgraph Cloud, you design your application in GraphQL. You design a set of +GraphQL types that describes your requirements. Dgraph Cloud takes those types, +prepares graph storage for them, and generates a GraphQL API with queries and +mutations. + +So, you can design a graph, store a graph and query a graph. You think and +design in terms of the graph that your app needs. + +## What's next + +First, you will deploy a running Dgraph Cloud backend that will host our GraphQL +API. This gives you a working backend that you can use to build out your app. + +Then you will move on to the design process - it's graph-first, in fact, it's +GraphQL-first. After you design the GraphQL types that our app is based around, +Dgraph Cloud provides a GraphQL API for those types; then, you can move straight +to building your app around your GraphQL APIs. diff --git a/dgraph/reference/learn/developer/react/react-provision-backend.mdx b/dgraph/reference/learn/developer/react/react-provision-backend.mdx new file mode 100644 index 00000000..7f3cdfea --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-provision-backend.mdx @@ -0,0 +1,27 @@ +--- +title: Provision a Dgraph Cloud backend +description: + Dgraph Learn - Build a Message Board App in React. Deploy a backend for each + app you build with Dgraph Cloud +--- + +In Dgraph Cloud, an app is served by a GraphQL backend powered by Dgraph +database. You should deploy a backend for each app you build, and potentially +backends for test and development environments as well. + +For this tutorial, you will just deploy one backend for development. + +- Follow the instructions to [provision a backend](/provision-backend.md) + +![Dgraph Cloud console](/images/cloud/dgraph-cloud-backend-live.png) + +The URL listed in "GraphQL Endpoint" is the URL at which Dgraph Cloud will serve +data to your app. You'll need that for later, so note it down --- though you'll +always be able to access it from the dashboard. There's nothing at that URL yet, +first you need to design the GraphQL schema for the app. + +## Move on to schema design + +Let's now move on to the design process - it's graph-first, in fact, it's +GraphQL-first. You'll design the GraphQL types that your app is based around, +learn about how graphs work and then look at some example queries and mutations. diff --git a/dgraph/reference/learn/developer/react/react-ui/connect-to-dgraph-cloud.mdx b/dgraph/reference/learn/developer/react/react-ui/connect-to-dgraph-cloud.mdx new file mode 100644 index 00000000..d72e15a0 --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-ui/connect-to-dgraph-cloud.mdx @@ -0,0 +1,80 @@ +--- +title: Connect to Dgraph Cloud +description: + Apollo client provides a connection to the GraphQL endpoint & a GraphQL cache + that lets you manipulate the visual state of the app from the internal cache +--- + +The GraphQL and React state management library you'll be using in the app is +[Apollo Client 3.x](https://www.apollographql.com/docs/react/). + +## Apollo client + +For the purpose of this app, Apollo client provides a connection to a GraphQL +endpoint and a GraphQL cache that lets you manipulate the visual state of the +app from the internal cache. This helps to keep various components of the UI +that rely on the same data consistent. + +Add Apollo client to the project with the following command: + +``` +yarn add graphql @apollo/client +``` + +## Create an Apollo client + +After Apollo client is added to the app's dependencies, create an Apollo client +instance that is connected to your Dgraph Cloud endpoint. Edit `index.tsx` to +add a function to create the Apollo client, as follows: + +```js +const createApolloClient = () => { + const httpLink = createHttpLink({ + uri: "<>", + }) + + return new ApolloClient({ + link: httpLink, + cache: new InMemoryCache(), + }) +} +``` + +Make sure to replace `<>` with the URL of your Dgraph +Cloud endpoint. + +If you didn't note the URL when you created the Dgraph Cloud backend, don't +worry, you can always access it from the Dgraph Cloud dashboard in the Overview +tab. + +## Add Apollo client to the React component hierarchy + +With an Apollo client created, you then need to pass that client into the React +component hierarchy. Other components in the hierarchy can then use the Apollo +client's React hooks to make GraphQL queries and mutations. + +Set up the component hierarchy with the `ApolloProvider` component as the root +component. It takes a `client` argument, which is the remainder of the app. +Change the root of the app in `index.tsx` to use the Apollo component as +follows. + +```js +ReactDOM.render( + + + + + , + document.getElementById("root"), +) +``` + +## This step in GitHub + +This step is also available in the +[tutorial GitHub repo](https://github.com/dgraph-io/discuss-tutorial) with the +[connect-to-slash-graphql tag](https://github.com/dgraph-io/discuss-tutorial/releases/tag/connect-to-slash-graphql) +and is +[this code diff](https://github.com/dgraph-io/discuss-tutorial/commit/56e86302d0d7e77d3861708b77124dab9aeeca61). + +There won't be any visible changes from this step. diff --git a/dgraph/reference/learn/developer/react/react-ui/index.mdx b/dgraph/reference/learn/developer/react/react-ui/index.mdx new file mode 100644 index 00000000..49ba2474 --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-ui/index.mdx @@ -0,0 +1,20 @@ +--- +title: Build a React UI +description: + With the GraphQL backend deployed and serving the schema, you can move + directly to building a UI using React and Typescript with the GraphQL Code + Generator. +--- + +With the GraphQL backend deployed and serving the schema, you can move directly +to building a UI. + +This section of the tutorial walks you through building a UI using React and +Typescript with the +[GraphQL Code Generator](https://graphql-code-generator.com/). + +All of the code for building the React app for this tutorial is available on +GitHub in the +[Message Board App Tutorial repo](https://github.com/dgraph-io/discuss-tutorial). + +### In this section diff --git a/dgraph/reference/learn/developer/react/react-ui/react-app-boiler-plate.mdx b/dgraph/reference/learn/developer/react/react-ui/react-app-boiler-plate.mdx new file mode 100644 index 00000000..8766ec1c --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-ui/react-app-boiler-plate.mdx @@ -0,0 +1,67 @@ +--- +title: React App Boiler Plate +description: + Jump right in thanks to the Message Board App Tutorial repo on GitHub. Get + started with your Message Board App in React with GraphQL. +--- + +## GitHub repo + +All of the code for building the example React app shown in this tutorial is +available in GitHub in the +[Message Board App Tutorial repo](https://github.com/dgraph-io/discuss-tutorial). + +Each step in the tutorial is recorded as a tag on the `learn-tutorial` branch in +that repo. That means you can complete each step in the tutorial and also look +at the git diff if you're comparing what's described to the corresponding code +changes. + +## Boilerplate + +You'd start an app like this with `npx create-react-app ...` and then +`yarn add ...` to add the dependencies listed on the previous page (i.e., +Tailwind CSS, Semantic UI React, etc.) + +This tutorial starts with the minimal boilerplate already complete. To read +through the setup process that was used to build this tutorial, see this +[blog about setting up a Dgraph Cloud app](https://dgraph.io/blog/post/slash-graphql-app-setup/). + +For this tutorial, you can start with the boilerplate React app and CSS by +checking out the setup from GitHub. To do this, see the +[tutorial-boilerplate tag](https://github.com/dgraph-io/discuss-tutorial/releases/tag/tutorial-boilerplate). + +You can do this using the `git` CLI. + +```sh +git clone https://github.com/dgraph-io/discuss-tutorial +cd discuss-tutorial +git fetch --all --tags +git checkout tags/tutorial-boilerplate -b learn-tutorial +``` + +Alternatively, you can visit https://github.com/dgraph-io/discuss-tutorial/tags +and download the archive (**.zip** or **.tar.gz**) for the +`tutorial-boilerplate` tag. + +## Running app boilerplate + +After you have the boilerplate code on your machine, you can start the app using +the following `yarn` command: + +```sh +yarn install +yarn start +``` + +This command builds the source and serves the app UI in development mode. The +app UI is usually served at `http://localhost:3000`, but the exact port may vary +depending on what else is running on your machine. Yarn will report the URL as +soon as it has the server up and running. + +Navigate to the provided URL, and you'll see the boilerplate app running, as +seen below: + +![running boiler plate app](/images/message-board/app-boilerplate.png) + +At this point, you have just the CSS styling and minimal React setup. Next, +you'll move on to building the app. diff --git a/dgraph/reference/learn/developer/react/react-ui/react-routing.mdx b/dgraph/reference/learn/developer/react/react-ui/react-routing.mdx new file mode 100644 index 00000000..af0994eb --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-ui/react-routing.mdx @@ -0,0 +1,108 @@ +--- +title: Routing in React +description: + Use React Router to build a message board app. A routing library in the UI + interprets the URL path and renders an appropriate page for that path. +--- + +In a single-page application like this, a routing library in the UI interprets +the URL path and renders an appropriate page for that path. + +## React Router + +The routing library you'll be using in the app is +[React Router](https://reactrouter.com/web/guides/quick-start). It provides a +way to create routes and navigate between them. For example, the app's home URL +at `/` will render a list of posts, while `/post/0x123` will render the React +component for the post with id `0x123`. + +Add dependencies to the project using the following commands: + +``` +yarn add react-router-dom +yarn add -D @types/react-router-dom +``` + +The `-D` option adds the TypeScript types `@types/react-router-dom` to the +project as a development dependency. Types are part of the development +environment of the project to help you build the app; but, in the build these +types are compiled away. + +## Add components + +You'll need components for the app to route to the various URLs. Create a +`src/components` directory, and then components for the home page +(`components/home.tsx`) and posts (`components/post.tsx`), with the following +file content: + +```js +// components/home.tsx +import React from "react" + +export function Home() { + return
Home
+} +``` + +```js +// components/post.tsx +import React from "react" + +export function Post() { + return
Post
+} +``` + +You can leave those as boilerplate for now and fill them in when you add GraphQL +queries in the next step of this tutorial. + +## Add routing + +With the boilerplate components in place, you are now ready to add the routing +logic to your app. Edit `App.tsx` to add routes for the `home` and `post` views, +as shown below. + +Note that the base URL points to the `home` component and `/post/:id` to the +`post` component. In the post component, `id` is used to get the data for the +right post: + +```js +... +import { Home } from "./components/home"; +import { Post } from "./components/post"; +import { BrowserRouter, Switch, Route } from "react-router-dom"; + +export function App() { + return ( + <> +
+ ... +
+
+

+ Learn about building GraphQL apps with Dgraph Cloud at https://dgraph.io/learn +

+ + + + + + +
+
+ + ); +} +``` + +## This Step in GitHub + +This step is also available in the +[tutorial GitHub repo](https://github.com/dgraph-io/discuss-tutorial) with the +[routing-in-react tag](https://github.com/dgraph-io/discuss-tutorial/releases/tag/routing-in-react) +and is +[this code diff](https://github.com/dgraph-io/discuss-tutorial/commit/8d488e8c9bbccaa96c88fc49860021c493f1afca). + +You can run the app using the `yarn start` command, and then you can navigate to +`http://localhost:3000` and `http://localhost:3000/post/0x123` to see the +various pages rendered. diff --git a/dgraph/reference/learn/developer/react/react-ui/react-ui-graphql-mutations.mdx b/dgraph/reference/learn/developer/react/react-ui/react-ui-graphql-mutations.mdx new file mode 100644 index 00000000..44a78513 --- /dev/null +++ b/dgraph/reference/learn/developer/react/react-ui/react-ui-graphql-mutations.mdx @@ -0,0 +1,453 @@ +--- +title: GraphQL Mutations +description: + With a working UI for querying sample data you added, you now need a UI to add + new posts using GraphQL Mutations in Apollo React. +--- + +Working through the tutorial to this point gives you a working UI that you can +use to query the sample data that you added, but doesn't give you a UI to add +new posts. + +To add new posts, you'll need to generate and use GraphQL Code Generator hooks +for adding posts and layout the UI components so a user can enter the data. + +## GraphQL fragments + +In this part of the tutorial, you'll add the ability to add a post. That's an +`addPost` mutation, and a GraphQL mutation can return data, just like a query. +In this case, it makes sense to have the `addPost` mutation return the same data +as the `allPosts` query, because the UI should adjust to insert the new post +into the home page's post list. GraphQL has a nice mechanism called _fragments_ +to allow this type of reuse. + +In the previous section, you added the `allPosts` query like this: + +```graphql +query allPosts { + queryPost(order: { desc: datePublished }) { + id + title + tags + datePublished + category { + id + name + } + author { + username + displayName + avatarImg + } + commentsAggregate { + count + } + } +} +``` + +This can be easily changed to use a fragment by defining the body of the query +as a fragment and then using that in the query. You can do this by updating the +definition of `allPosts` in the `src/components/operations.graphql` file as +follows: + +```graphql +fragment postData on Post { + id + title + text + tags + datePublished + category { + id + name + } + author { + username + displayName + avatarImg + } + commentsAggregate { + count + } +} + +query allPosts { + queryPost(order: { desc: datePublished }) { + ...postData + } +} +``` + +The syntax `...postData` says "take the `postData` fragment and use it here". + +## GraphQL mutations + +With a fragment setup for the return data, the mutation to add a post can use +exactly the same result data. + +Add the following definition to `src/components/operations.graphql` to add the +mutation that lets users add a post: + +```graphql +mutation addPost($post: AddPostInput!) { + addPost(input: [$post]) { + post { + ...postData + } + } +} +``` + +This mutation expects input data in the shape of the `AddPostInput` input type. +TypeScript, and GraphQL Code Generator will make sure you provide an input of +the correct type. This mutation returns data of the same shape as the `allPosts` +query; you'll see why that's important when using the Apollo cache. + +Run the following command to tell the GraphQL Code Generator to generate a React +hook, `useAddPostMutation`, that extracts the component logic of this mutation +into a reusable function: + +```sh +yarn run generate-types +``` + +The boilerplate to use a query is to use the query as part of loading the +component, as in the following example: + +```js +const { data, loading, error } = useAllPostsQuery() + +if (loading) { + /* render loading indicator */ +} + +if (error) { + /* handle error */ +} + +// layout using 'data' +``` + +However, mutations work differently. To use a mutation, you use the hook to +create a function that actually runs the mutation and configure that with +callback functions that execute after the mutation completes. Accordingly, the +boilerplate for a mutation is as follows: + +```js +const [addPost] = useAddPostMutation({ + /* what happens after the mutation is executed */ +}) +``` + +With this syntax, calling `addPost({ variables: ... })` executes the mutation +with the passed-in post data, and after the GraphQL mutation returns, the +callback functions are executed. + +## Apollo cache + +As well as GraphQL support, the Apollo Client library also provides state +management, using the Apollo Cache. + +You can follow the flow of adding a new post, as follows: The user is on the +home (post list) page. There, they press a button to create a post, which brings +up a modal UI component (sometimes called a _modal dialog_) to enter the post +data. The user fills in the details of the post, and then the mutation is +submitted when they press _Submit_. This results in a new post, but how does +that new post get into the list of posts? One option is to force a reload of the +whole page, but that'll force all components to reload and probably won't be a +great user experience. Another option is to just force reloading of the +`allPosts` query, as follows: + +```js +const [addPost] = useAddPostMutation({ + refetchQueries: [ { query: /* ... allPosts ... */ } ], +}) +``` + +This would work, but still requires two round-trips from the UI to the server to +complete: + +1. Clicking _Submit_ on the new post sends data to the server, and the UI waits + for that to complete (one round trip) +2. This then triggers execution of the `allPosts` query to execute (a second + round trip) + +When the `allPosts` query is re-executed, it changes the `data` value of +`const { data, loading, error } = useAllPostsQuery()` in the post list +component, and React re-renders that component. + +Again, this works, but it could be more efficient: The UI actually already has +all of the data it needs to render the updated UI after the first round trip, +because the new post on the server is only going to be the post that was added +by the mutation. So, to avoid a trip to the server, you can manually update +Apollo's view of the result of the `allPosts` query and force the re-render, +without round-tripping to the server. That's done by editing the cached value, +as follows: + +```js +const [addPost] = useAddPostMutation({ + update(cache, { data }) { + const existing = + cache.readQuery < + AllPostsQuery > + { + query: AllPostsDocument, + } + + cache.writeQuery({ + query: AllPostsDocument, + data: { + queryPost: [ + ...(data?.addPost?.post ?? []), + ...(existing?.queryPost ?? []), + ], + }, + }) + }, +}) +``` + +That sets up the the `addPost` function to run the `addPost` mutation, and on +completion inserts the new post into the cache. + +## Layout for the mutation + +All the logic for adding a post will be in the app header: +`src/component/header.tsx`. This logic adds a button that shows a modal to add +the post. The visibility of the modal is controlled by React state, set up +through the `useState` hook, as follows: + +```js +const [createPost, setCreatePost] = useState(false) +... + +``` + +The state for the the new post data is again controlled by React state. The +modal gives the user input options to update that data, as follows: + +```js + const [title, setTitle] = useState("") + const [category, setCategory]: any = useState("") + const [text, setText]: any = useState("") + const [tags, setTags]: any = useState("") +``` + +Then, clicking submit in the modal closes it and calls a function that collects +together the state and calls the `addPost` function, as follows: + +```js +const submitPost = () => { + setCreatePost(false) + const post = { + text: text, + title: title, + tags: tags, + category: { id: category }, + author: { username: "TestUser" }, + datePublished: new Date().toISOString(), + comments: [], + } + addPost({ variables: { post: post } }) +} +``` + +The modal is now set up with a list of possible categories for the post by first +querying to find the existing categories and populating a dropdown from that. +With all of these changes, the `src/component/header.tsx` file looks as follows: + +```js +import React, { useState } from "react" +import { + Image, + Modal, + Form, + Button, + Dropdown, + Loader, + TextArea, +} from "semantic-ui-react" +import { Link } from "react-router-dom" +import { + useAddPostMutation, + AllPostsQuery, + useCategoriesQuery, + AllPostsDocument, +} from "./types/operations" + +export function AppHeader() { + const [createPost, setCreatePost] = useState(false) + const [title, setTitle] = useState("") + const [category, setCategory]: any = useState("") + const [text, setText]: any = useState("") + const [tags, setTags]: any = useState("") + + const { + data: categoriesData, + loading: categoriesLoading, + error: categoriesError, + } = useCategoriesQuery() + + const addPostButton = () => { + if (categoriesLoading) { + return + } else if (categoriesError) { + return
`Error! ${categoriesError.message}`
+ } else { + return ( + + ) + } + } + + const categoriesOptions = categoriesData?.queryCategory?.map((category) => { + return { key: category?.id, text: category?.name, value: category?.id } + }) + + const [addPost] = useAddPostMutation({ + update(cache, { data }) { + const existing = cache.readQuery({ + query: AllPostsDocument, + }) + + cache.writeQuery({ + query: AllPostsDocument, + data: { + queryPost: [ + ...(data?.addPost?.post ?? []), + ...(existing?.queryPost ?? []), + ], + }, + }) + }, + }) + + const submitPost = () => { + setCreatePost(false) + const post = { + text: text, + title: title, + tags: tags, + category: { id: category }, + author: { username: "TestUser" }, + datePublished: new Date().toISOString(), + comments: [], + } + addPost({ variables: { post: post } }) + } + + const showCreatePost = ( + setCreatePost(false)} + onOpen={() => setCreatePost(true)} + open={createPost} + > + Create Post + + +
+ + + setTitle(e.target.value)} + /> + + + + setCategory(data.value)} + /> + + + + setTags(e.target.value)} + /> + + + +