Skip to content

Commit

Permalink
[tools] Add tool to copy table data to another table
Browse files Browse the repository at this point in the history
Copy table data to another table; the two tables could be in the same
cluster or not. The two tables must have the same table schema, but
could have different partition schemas. Alternatively, the tool can
create the new table using the same table and partition schema as the
source table.

It's useful to use this tool to copy small tables, or just copy table
schemas in a convenient way, and it's recommend to use Java client's
Backup/Restore features to copy large tables.

Change-Id: Ifdec51701ac9ec57739b1a6f7c18786294642a54
  • Loading branch information
acelyc111 committed Mar 22, 2019
1 parent 28c7067 commit 341ce19
Show file tree
Hide file tree
Showing 13 changed files with 808 additions and 72 deletions.
2 changes: 2 additions & 0 deletions src/kudu/client/scan_batch.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Schema;

namespace tools {
class ReplicaDumper;
class TableScanner;
} // namespace tools

namespace client {
Expand Down Expand Up @@ -294,6 +295,7 @@ class KUDU_EXPORT KuduScanBatch::RowPtr {

private:
friend class KuduScanBatch;
friend class tools::TableScanner;
template<typename KeyTypeWrapper> friend struct SliceKeysTestSetup;
template<typename KeyTypeWrapper> friend struct IntKeysTestSetup;

Expand Down
2 changes: 1 addition & 1 deletion src/kudu/client/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ string KuduSchema::ToString() const {
}

KuduSchema KuduSchema::FromSchema(const Schema& schema) {
return KuduSchema(schema);
return KuduSchema(schema.CopyWithoutColumnIds());
}

Schema KuduSchema::ToSchema(const KuduSchema& kudu_schema) {
Expand Down
5 changes: 5 additions & 0 deletions src/kudu/common/partial_row.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ namespace tablet {
template<typename KeyTypeWrapper> struct NumTypeRowOps; // IWYU pragma: keep
} // namespace tablet

namespace tools {
class TableScanner;
} // namespace tools

/// @endcond

class Schema;
Expand Down Expand Up @@ -500,6 +504,7 @@ class KUDU_EXPORT KuduPartialRow {
friend class PartitionSchema;
friend class RowOperationsPBDecoder;
friend class RowOperationsPBEncoder;
friend class tools::TableScanner;
friend class TestScanSpec;
template<typename KeyTypeWrapper> friend struct client::SliceKeysTestSetup;
template<typename KeyTypeWrapper> friend struct client::IntKeysTestSetup;
Expand Down
39 changes: 23 additions & 16 deletions src/kudu/common/partition.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,15 @@ class Partition {
// the methods which format individual partition keys do redact.
class PartitionSchema {
public:
struct RangeSchema {
std::vector<ColumnId> column_ids;
};

struct HashBucketSchema {
std::vector<ColumnId> column_ids;
int32_t num_buckets;
uint32_t seed;
};

// Deserializes a protobuf message into a partition schema.
static Status FromPB(const PartitionSchemaPB& pb,
Expand Down Expand Up @@ -234,21 +243,25 @@ class PartitionSchema {
// contain unredacted row data.
Status MakeUpperBoundRangePartitionKeyExclusive(KuduPartialRow* row) const;

// Decodes a range partition key into a partial row, with variable-length
// fields stored in the arena.
Status DecodeRangeKey(Slice* encode_key,
KuduPartialRow* partial_row,
Arena* arena) const;

const RangeSchema& range_partition_schema() const {
return range_schema_;
}

const std::vector<HashBucketSchema>& hash_partition_schemas() const {
return hash_bucket_schemas_;
}

private:
friend class PartitionPruner;
FRIEND_TEST(PartitionTest, TestIncrementRangePartitionBounds);
FRIEND_TEST(PartitionTest, TestIncrementRangePartitionStringBounds);

struct RangeSchema {
std::vector<ColumnId> column_ids;
};

struct HashBucketSchema {
std::vector<ColumnId> column_ids;
int32_t num_buckets;
uint32_t seed;
};

// Returns a text description of the encoded range key suitable for debug printing.
std::string RangeKeyDebugString(Slice range_key, const Schema& schema) const;
std::string RangeKeyDebugString(const KuduPartialRow& key) const;
Expand Down Expand Up @@ -318,12 +331,6 @@ class PartitionSchema {
// This method is useful used for encoding splits and bounds.
Status EncodeRangeKey(const KuduPartialRow& row, const Schema& schema, std::string* key) const;

// Decodes a range partition key into a partial row, with variable-length
// fields stored in the arena.
Status DecodeRangeKey(Slice* encode_key,
KuduPartialRow* partial_row,
Arena* arena) const;

// Decodes the hash bucket component of a partition key into its buckets.
//
// This should only be called with partition keys created from a row, not with
Expand Down
2 changes: 1 addition & 1 deletion src/kudu/common/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ Schema Schema::CopyWithColumnIds() const {
}

Schema Schema::CopyWithoutColumnIds() const {
CHECK(has_column_ids());
return Schema(cols_, num_key_columns_);
}

Expand Down Expand Up @@ -430,6 +429,7 @@ string Schema::ToString(ToStringMode mode) const {
if (cols_.empty()) return "()";

vector<string> pk_strs;
pk_strs.reserve(num_key_columns_);
for (int i = 0; i < num_key_columns_; i++) {
pk_strs.push_back(cols_[i].name());
}
Expand Down
2 changes: 0 additions & 2 deletions src/kudu/common/schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -696,8 +696,6 @@ class Schema {

// Return a new Schema which is the same as this one, but without any column
// IDs assigned.
//
// Requires that this schema has column IDs.
Schema CopyWithoutColumnIds() const;

// Create a new schema containing only the selected columns.
Expand Down
7 changes: 3 additions & 4 deletions src/kudu/integration-tests/test_workload.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,11 @@ void TestWorkload::set_schema(const client::KuduSchema& schema) {
CHECK_GT(schema.num_columns(), 0) << "Schema should have at least one column";
std::vector<int> key_indexes;
schema.GetPrimaryKeyColumnIndexes(&key_indexes);
CHECK_EQ(1, key_indexes.size()) << "Schema should have just one key column";
CHECK_EQ(0, key_indexes[0]) << "Schema's key column should be index 0";
CHECK_LE(1, key_indexes.size()) << "Schema should have at least one key column";
CHECK_EQ(0, key_indexes[0]) << "Schema's first key column should be index 0";
KuduColumnSchema key = schema.Column(0);
CHECK_EQ("key", key.name()) << "Schema column should be named 'key'";
CHECK_EQ(KuduColumnSchema::INT32, key.type())
<< "Schema key column should be of type INT32";
<< "Schema's first key column should be of type INT32";
schema_ = schema;
}

Expand Down
9 changes: 9 additions & 0 deletions src/kudu/tools/data_gen_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ void WriteValueToColumn(const client::KuduSchema& schema,
case client::KuduColumnSchema::BOOL:
CHECK_OK(row->SetBool(col_idx, value));
break;
case client::KuduColumnSchema::BINARY:
CHECK_OK(row->SetBinaryCopy(col_idx, FastHex64ToBuffer(value, buf)));
break;
case client::KuduColumnSchema::UNIXTIME_MICROS:
CHECK_OK(row->SetUnixTimeMicros(col_idx, value));
break;
case client::KuduColumnSchema::DECIMAL:
CHECK_OK(row->SetUnscaledDecimal(col_idx, value));
break;
default:
LOG(FATAL) << "Unexpected data type: " << type;
}
Expand Down
1 change: 0 additions & 1 deletion src/kudu/tools/kudu-admin-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ using kudu::client::KuduInsert;
using kudu::client::KuduSchema;
using kudu::client::KuduSchemaBuilder;
using kudu::client::KuduTable;
using kudu::client::KuduTableAlterer;
using kudu::client::KuduTableCreator;
using kudu::client::KuduValue;
using kudu::client::sp::shared_ptr;
Expand Down
Loading

0 comments on commit 341ce19

Please sign in to comment.