Skip to content
This repository was archived by the owner on Jul 1, 2023. It is now read-only.

Commit ab9fb4b

Browse files
lwfacebook-github-bot
authored andcommittedSep 4, 2020
Add libnop dependency and use for tensor descriptor
Summary: Include libnop as a dependency but, for now, keep protobuf too. Provide a set of helpers for libnop, primarily to perform type erasure. In order to build and validate these helpers, use them to (de)serialize the tensor descriptors produced/consumed by channels, as they are a simple and self-contained application. Wider migration from protobuf to libnop will come later. Reviewed By: heiner Differential Revision: D22763734 fbshipit-source-id: 75bf8211ba9f89a3ee62e518b9ac44017a4f707e
1 parent 08c93ae commit ab9fb4b

File tree

10 files changed

+161
-66
lines changed

10 files changed

+161
-66
lines changed
 

‎.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@
88
[submodule "third_party/googletest"]
99
path = third_party/googletest
1010
url = https://github.com/google/googletest.git
11+
[submodule "third_party/libnop"]
12+
path = third_party/libnop
13+
url = https://github.com/google/libnop.git

‎tensorpipe/CMakeLists.txt

+8-4
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,14 @@ target_sources(tensorpipe PRIVATE
4343

4444
target_sources(tensorpipe PRIVATE
4545
channel/xth/channel.cc
46-
channel/xth/context.cc
47-
proto/channel/xth.proto)
46+
channel/xth/context.cc)
4847

4948
### cma
5049

5150
if(TP_ENABLE_CMA)
5251
target_sources(tensorpipe PRIVATE
5352
channel/cma/channel.cc
54-
channel/cma/context.cc
55-
proto/channel/cma.proto)
53+
channel/cma/context.cc)
5654
set(TENSORPIPE_HAS_CMA_CHANNEL 1)
5755
else()
5856
set(TENSORPIPE_HAS_CMA_CHANNEL 0)
@@ -131,6 +129,12 @@ endif()
131129
configure_file(tensorpipe.h.in tensorpipe.h)
132130

133131

132+
## Libnop
133+
134+
# Keep libnop headers private as they should not be exposed to downstream users.
135+
target_include_directories(tensorpipe PRIVATE ${PROJECT_SOURCE_DIR}/third_party/libnop/include)
136+
137+
134138
## Python bindings
135139

136140
if(TP_BUILD_PYTHON)

‎tensorpipe/channel/cma/channel.cc

+23-9
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include <list>
1818
#include <mutex>
1919

20+
#include <nop/serializer.h>
21+
#include <nop/structure.h>
22+
2023
#include <tensorpipe/channel/error.h>
2124
#include <tensorpipe/channel/helpers.h>
2225
#include <tensorpipe/common/callback.h>
@@ -26,12 +29,21 @@
2629
#include <tensorpipe/common/optional.h>
2730
#include <tensorpipe/common/queue.h>
2831
#include <tensorpipe/common/system.h>
29-
#include <tensorpipe/proto/channel/cma.pb.h>
3032

3133
namespace tensorpipe {
3234
namespace channel {
3335
namespace cma {
3436

37+
namespace {
38+
39+
struct Descriptor {
40+
uint32_t pid;
41+
uint64_t ptr;
42+
NOP_STRUCTURE(Descriptor, pid, ptr);
43+
};
44+
45+
} // namespace
46+
3547
class Channel::Impl : public std::enable_shared_from_this<Channel::Impl> {
3648
public:
3749
Impl(
@@ -221,11 +233,12 @@ void Channel::Impl::sendFromLoop_(
221233
callback(impl.error_);
222234
}));
223235

224-
proto::Descriptor pbDescriptor;
225-
pbDescriptor.set_pid(getpid());
226-
pbDescriptor.set_ptr(reinterpret_cast<uint64_t>(ptr));
236+
NopHolder<Descriptor> nopHolder;
237+
Descriptor& nopDescriptor = nopHolder.getObject();
238+
nopDescriptor.pid = getpid();
239+
nopDescriptor.ptr = reinterpret_cast<uint64_t>(ptr);
227240

228-
descriptorCallback(Error::kSuccess, saveDescriptor(pbDescriptor));
241+
descriptorCallback(Error::kSuccess, saveDescriptor(nopHolder));
229242
}
230243

231244
// Receive memory region from peer.
@@ -277,10 +290,11 @@ void Channel::Impl::recvFromLoop_(
277290
return;
278291
}
279292

280-
proto::Descriptor pbDescriptor;
281-
loadDescriptor(pbDescriptor, descriptor);
282-
pid_t remotePid = pbDescriptor.pid();
283-
void* remotePtr = reinterpret_cast<void*>(pbDescriptor.ptr());
293+
NopHolder<Descriptor> nopHolder;
294+
loadDescriptor(nopHolder, descriptor);
295+
Descriptor& nopDescriptor = nopHolder.getObject();
296+
pid_t remotePid = nopDescriptor.pid;
297+
void* remotePtr = reinterpret_cast<void*>(nopDescriptor.ptr);
284298

285299
TP_VLOG(6) << "Channel " << id_ << " is copying payload (#" << sequenceNumber
286300
<< ")";

‎tensorpipe/channel/helpers.cc

+17-9
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,30 @@
99
#include <tensorpipe/channel/helpers.h>
1010

1111
#include <tensorpipe/common/defs.h>
12+
#include <tensorpipe/common/nop.h>
1213

1314
namespace tensorpipe {
1415
namespace channel {
1516

16-
Channel::TDescriptor saveDescriptor(const google::protobuf::MessageLite& pb) {
17-
Channel::TDescriptor out;
18-
const auto success = pb.SerializeToString(&out);
19-
TP_DCHECK(success) << "Failed to serialize protobuf message";
17+
Channel::TDescriptor saveDescriptor(const AbstractNopHolder& object) {
18+
const size_t len = object.getSize();
19+
Channel::TDescriptor out(len, '\0');
20+
nop::BufferWriter writer(reinterpret_cast<uint8_t*>(out.data()), len);
21+
22+
nop::Status<void> status = object.write(writer);
23+
TP_THROW_ASSERT_IF(status.has_error())
24+
<< "Error saving descriptor: " << status.GetErrorMessage();
25+
2026
return out;
2127
}
2228

23-
void loadDescriptor(
24-
google::protobuf::MessageLite& pb,
25-
const Channel::TDescriptor& in) {
26-
const auto success = pb.ParseFromString(in);
27-
TP_DCHECK(success) << "Failed to parse protobuf message";
29+
void loadDescriptor(AbstractNopHolder& object, const Channel::TDescriptor& in) {
30+
const size_t len = in.size();
31+
nop::BufferReader reader(reinterpret_cast<const uint8_t*>(in.data()), len);
32+
33+
nop::Status<void> status = object.read(reader);
34+
TP_THROW_ASSERT_IF(status.has_error())
35+
<< "Error loading descriptor: " << status.GetErrorMessage();
2836
}
2937

3038
} // namespace channel

‎tensorpipe/channel/helpers.h

+3-6
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,15 @@
1010

1111
// Note: never include this file from headers!
1212

13-
#include <google/protobuf/message_lite.h>
14-
1513
#include <tensorpipe/channel/channel.h>
14+
#include <tensorpipe/common/nop.h>
1615

1716
namespace tensorpipe {
1817
namespace channel {
1918

20-
Channel::TDescriptor saveDescriptor(const google::protobuf::MessageLite& pb);
19+
Channel::TDescriptor saveDescriptor(const AbstractNopHolder& object);
2120

22-
void loadDescriptor(
23-
google::protobuf::MessageLite& pb,
24-
const Channel::TDescriptor& in);
21+
void loadDescriptor(AbstractNopHolder& object, const Channel::TDescriptor& in);
2522

2623
} // namespace channel
2724
} // namespace tensorpipe

‎tensorpipe/channel/xth/channel.cc

+20-7
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,29 @@
88

99
#include <tensorpipe/channel/xth/channel.h>
1010

11+
#include <nop/serializer.h>
12+
#include <nop/structure.h>
13+
1114
#include <tensorpipe/channel/error.h>
1215
#include <tensorpipe/channel/helpers.h>
1316
#include <tensorpipe/common/callback.h>
1417
#include <tensorpipe/common/defs.h>
1518
#include <tensorpipe/common/error.h>
1619
#include <tensorpipe/common/error_macros.h>
17-
#include <tensorpipe/proto/channel/xth.pb.h>
1820

1921
namespace tensorpipe {
2022
namespace channel {
2123
namespace xth {
2224

25+
namespace {
26+
27+
struct Descriptor {
28+
uint64_t ptr;
29+
NOP_STRUCTURE(Descriptor, ptr);
30+
};
31+
32+
} // namespace
33+
2334
class Channel::Impl : public std::enable_shared_from_this<Channel::Impl> {
2435
public:
2536
Impl(
@@ -208,10 +219,11 @@ void Channel::Impl::sendFromLoop_(
208219
callback(impl.error_);
209220
}));
210221

211-
proto::Descriptor pbDescriptor;
212-
pbDescriptor.set_ptr(reinterpret_cast<std::uintptr_t>(ptr));
222+
NopHolder<Descriptor> nopHolder;
223+
Descriptor& nopDescriptor = nopHolder.getObject();
224+
nopDescriptor.ptr = reinterpret_cast<std::uintptr_t>(ptr);
213225

214-
descriptorCallback(Error::kSuccess, saveDescriptor(pbDescriptor));
226+
descriptorCallback(Error::kSuccess, saveDescriptor(nopHolder));
215227
}
216228

217229
// Receive memory region from peer.
@@ -262,9 +274,10 @@ void Channel::Impl::recvFromLoop_(
262274
return;
263275
}
264276

265-
proto::Descriptor pbDescriptor;
266-
loadDescriptor(pbDescriptor, descriptor);
267-
void* remotePtr = reinterpret_cast<void*>(pbDescriptor.ptr());
277+
NopHolder<Descriptor> nopHolder;
278+
loadDescriptor(nopHolder, descriptor);
279+
Descriptor& nopDescriptor = nopHolder.getObject();
280+
void* remotePtr = reinterpret_cast<void*>(nopDescriptor.ptr);
268281
TP_VLOG(6) << "Channel " << id_ << " is copying payload (#" << sequenceNumber
269282
<< ")";
270283
context_->requestCopy(

‎tensorpipe/common/nop.h

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <nop/serializer.h>
12+
#include <nop/status.h>
13+
#include <nop/utility/buffer_reader.h>
14+
#include <nop/utility/buffer_writer.h>
15+
16+
namespace tensorpipe {
17+
18+
// Libnop makes heavy use of templates, whereas TensorPipe is designed around
19+
// polymorphism (abstract interfaces and concrete derived classes). The two
20+
// don't mix well: for example, one can't have virtual method templates. One
21+
// technique to get around this is type erasure, which is however tricky to get
22+
// right because the "fundamental" operation(s) of libnop, (de)serialization,
23+
// are simultaneously templated on two types: the reader/writer and the object.
24+
// Ideally we'd like for both these sets of types to be dynamically extensible,
25+
// as we want to allow transpors to provide their own specialized readers and
26+
// writers, and channels could have their own custom objects that they want to
27+
// (de)serialize. New transports and channel could be implemented by third
28+
// parties and plugged in at runtime, so the sets of reader/writers and of
29+
// objects that we must support can't be known in advance.
30+
31+
// We had originally found a solution to this pickle by doing two type erasures
32+
// one after the other, first on the reader/writer, which deals with bytes and
33+
// not objects and is thus not templated, and then on objects, leveraging the
34+
// fact that there is one libnop (de)serializer that takes a *pointer* to a
35+
// reader/writer giving us a "hook" on which to do polymorphism, by hardcoding a
36+
// pointer to the base reader/writer class as template parameter, but then
37+
// passing in an instance of a concrete subclass at runtime.
38+
39+
// However it turned out that this performed poorly, apparently due to the
40+
// (de)serialization process consisting in many small calls to the reader/writer
41+
// which each had to perform a vtable lookup. So, instead, we decided to not
42+
// allow transports to utilize custom specialized readers/writers and to provide
43+
// a single global reader/writer class that is able to cover the two main usage
44+
// patterns we think are most likely to come up: reading/writing to a temporary
45+
// contiguous buffer, and reading/writing to a ringbuffer.
46+
47+
// The helpers to perform type erasure of the object type: a untemplated base
48+
// class exposing the methods we need for (de)serialization, and then templated
49+
// subclasses allowing to create a holder for each concrete libnop type.
50+
51+
class AbstractNopHolder {
52+
public:
53+
virtual size_t getSize() const = 0;
54+
virtual nop::Status<void> write(nop::BufferWriter& writer) const = 0;
55+
virtual nop::Status<void> read(nop::BufferReader& reader) = 0;
56+
virtual ~AbstractNopHolder() = default;
57+
};
58+
59+
template <typename T>
60+
class NopHolder : public AbstractNopHolder {
61+
public:
62+
T& getObject() {
63+
return object_;
64+
}
65+
66+
const T& getObject() const {
67+
return object_;
68+
}
69+
70+
size_t getSize() const override {
71+
return nop::Encoding<T>::Size(object_);
72+
}
73+
74+
nop::Status<void> write(nop::BufferWriter& writer) const override {
75+
return nop::Encoding<T>::Write(object_, &writer);
76+
}
77+
78+
nop::Status<void> read(nop::BufferReader& reader) override {
79+
return nop::Encoding<T>::Read(&object_, &reader);
80+
}
81+
82+
private:
83+
T object_;
84+
};
85+
86+
} // namespace tensorpipe

‎tensorpipe/proto/channel/cma.proto

-16
This file was deleted.

‎tensorpipe/proto/channel/xth.proto

-15
This file was deleted.

‎third_party/libnop

Submodule libnop added at aa95422

0 commit comments

Comments
 (0)
This repository has been archived.