Skip to content

Commit 8a67137

Browse files
committed
Separation and generalization of ChannelwiseAffineLayer into BiasLayer
and ScaleLayer. The behavior of ChannelwiseAffineLayer can be reproduced by a ScaleLayer with `scale_param { bias_term: true }`. BiasLayer and ScaleLayer each take 1 or 2 bottoms, with the output having the same shape as the first. The second input -- either another bottom or a learned parameter -- will have its axes (virtually) broadcast and tiled to have the same shape as the first, after which elementwise addition (Bias) or multiplication (Scale) is performed.
1 parent ec04197 commit 8a67137

13 files changed

+1702
-551
lines changed

include/caffe/layers/bias_layer.hpp

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#ifndef CAFFE_BIAS_LAYER_HPP_
2+
#define CAFFE_BIAS_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
namespace caffe {
11+
12+
/**
13+
* @brief Computes a sum of two input Blobs, with the shape of the
14+
* latter Blob "broadcast" to match the shape of the former.
15+
* Equivalent to tiling the latter Blob, then computing the elementwise
16+
* sum.
17+
*
18+
* The second input may be omitted, in which case it's learned as a parameter
19+
* of the layer.
20+
*/
21+
template <typename Dtype>
22+
class BiasLayer : public Layer<Dtype> {
23+
public:
24+
explicit BiasLayer(const LayerParameter& param)
25+
: Layer<Dtype>(param) {}
26+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
27+
const vector<Blob<Dtype>*>& top);
28+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
29+
const vector<Blob<Dtype>*>& top);
30+
31+
virtual inline const char* type() const { return "Bias"; }
32+
virtual inline int MinBottomBlobs() const { return 1; }
33+
virtual inline int MaxBottomBlobs() const { return 2; }
34+
virtual inline int ExactNumTopBlobs() const { return 1; }
35+
36+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
37+
const vector<Blob<Dtype>*>& top);
38+
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
39+
const vector<Blob<Dtype>*>& top);
40+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
41+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
42+
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
43+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
44+
45+
private:
46+
Blob<Dtype> bias_multiplier_;
47+
int outer_dim_, bias_dim_, inner_dim_, dim_;
48+
};
49+
50+
51+
52+
} // namespace caffe
53+
54+
#endif // CAFFE_BIAS_LAYER_HPP_

include/caffe/layers/channelwise_affine_layer.hpp

-103
This file was deleted.

include/caffe/layers/scale_layer.hpp

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#ifndef CAFFE_SCALE_LAYER_HPP_
2+
#define CAFFE_SCALE_LAYER_HPP_
3+
4+
#include <vector>
5+
6+
#include "caffe/blob.hpp"
7+
#include "caffe/layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
#include "caffe/layers/bias_layer.hpp"
11+
12+
namespace caffe {
13+
14+
/**
15+
* @brief Computes a product of two input Blobs, with the shape of the
16+
* latter Blob "broadcast" to match the shape of the former.
17+
* Equivalent to tiling the latter Blob, then computing the elementwise
18+
* product.
19+
*
20+
* The second input may be omitted, in which case it's learned as a parameter
21+
* of the layer.
22+
*/
23+
template <typename Dtype>
24+
class ScaleLayer: public Layer<Dtype> {
25+
public:
26+
explicit ScaleLayer(const LayerParameter& param)
27+
: Layer<Dtype>(param) {}
28+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
29+
const vector<Blob<Dtype>*>& top);
30+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
31+
const vector<Blob<Dtype>*>& top);
32+
33+
virtual inline const char* type() const { return "Scale"; }
34+
// Scale
35+
virtual inline int MinBottomBlobs() const { return 1; }
36+
virtual inline int MaxBottomBlobs() const { return 2; }
37+
virtual inline int ExactNumTopBlobs() const { return 1; }
38+
39+
protected:
40+
/**
41+
* In the below shape specifications, @f$ i @f$ denotes the value of the
42+
* `axis` field given by `this->layer_param_.scale_param().axis()`, after
43+
* canonicalization (i.e., conversion from negative to positive index,
44+
* if applicable).
45+
*
46+
* @param bottom input Blob vector (length 2)
47+
* -# @f$ (d_0 \times ... \times
48+
* d_i \times ... \times d_j \times ... \times d_n) @f$
49+
* the first factor @f$ x @f$
50+
* -# @f$ (d_i \times ... \times d_j) @f$
51+
* the second factor @f$ y @f$
52+
* @param top output Blob vector (length 1)
53+
* -# @f$ (d_0 \times ... \times
54+
* d_i \times ... \times d_j \times ... \times d_n) @f$
55+
* the product @f$ z = x y @f$ computed after "broadcasting" y.
56+
* Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
57+
* then computing the elementwise product.
58+
*/
59+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
60+
const vector<Blob<Dtype>*>& top);
61+
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
62+
const vector<Blob<Dtype>*>& top);
63+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
64+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
65+
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
66+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
67+
68+
shared_ptr<Layer<Dtype> > bias_layer_;
69+
vector<Blob<Dtype>*> bias_bottom_vec_;
70+
vector<bool> bias_propagate_down_;
71+
int bias_param_id_;
72+
73+
Blob<Dtype> sum_multiplier_;
74+
Blob<Dtype> sum_result_;
75+
Blob<Dtype> temp_;
76+
int axis_;
77+
int outer_dim_, scale_dim_, inner_dim_;
78+
};
79+
80+
81+
} // namespace caffe
82+
83+
#endif // CAFFE_SCALE_LAYER_HPP_

src/caffe/layers/bias_layer.cpp

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#include <vector>
2+
3+
#include "caffe/filler.hpp"
4+
#include "caffe/layers/bias_layer.hpp"
5+
#include "caffe/util/math_functions.hpp"
6+
7+
namespace caffe {
8+
9+
template <typename Dtype>
10+
void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
11+
const vector<Blob<Dtype>*>& top) {
12+
if (bottom.size() == 1 && this->blobs_.size() > 0) {
13+
LOG(INFO) << "Skipping parameter initialization";
14+
} else if (bottom.size() == 1) {
15+
// bias is a learned parameter; initialize it
16+
const BiasParameter& param = this->layer_param_.bias_param();
17+
const int axis = bottom[0]->CanonicalAxisIndex(param.axis());
18+
const int num_axes = param.num_axes();
19+
CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
20+
<< "or -1 to extend to the end of bottom[0]";
21+
if (num_axes >= 0) {
22+
CHECK_GE(bottom[0]->num_axes(), axis + num_axes)
23+
<< "bias blob's shape extends past bottom[0]'s shape when applied "
24+
<< "starting with bottom[0] axis = " << axis;
25+
}
26+
this->blobs_.resize(1);
27+
const vector<int>::const_iterator& shape_start =
28+
bottom[0]->shape().begin() + axis;
29+
const vector<int>::const_iterator& shape_end =
30+
(num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
31+
vector<int> bias_shape(shape_start, shape_end);
32+
this->blobs_[0].reset(new Blob<Dtype>(bias_shape));
33+
shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(param.filler()));
34+
filler->Fill(this->blobs_[0].get());
35+
}
36+
this->param_propagate_down_.resize(this->blobs_.size(), true);
37+
}
38+
39+
template <typename Dtype>
40+
void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
41+
const vector<Blob<Dtype>*>& top) {
42+
const BiasParameter& param = this->layer_param_.bias_param();
43+
Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
44+
// Always set axis == 0 in special case where bias is a scalar
45+
// (num_axes == 0). Mathematically equivalent for any choice of axis, so the
46+
// actual setting can be safely ignored; and computation is most efficient
47+
// with axis == 0 and (therefore) outer_dim_ == 1.
48+
const int axis = (bias->num_axes() == 0) ?
49+
0 : bottom[0]->CanonicalAxisIndex(param.axis());
50+
CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes())
51+
<< "bias blob's shape extends past bottom[0]'s shape when applied "
52+
<< "starting with bottom[0] axis = " << axis;
53+
for (int i = 0; i < bias->num_axes(); ++i) {
54+
CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i))
55+
<< "dimension mismatch between bottom[0]->shape(" << axis + i
56+
<< ") and bias->shape(" << i << ")";
57+
}
58+
outer_dim_ = bottom[0]->count(0, axis);
59+
bias_dim_ = bias->count();
60+
inner_dim_ = bottom[0]->count(axis + bias->num_axes());
61+
dim_ = bias_dim_ * inner_dim_;
62+
if (bottom[0] != top[0]) {
63+
top[0]->ReshapeLike(*bottom[0]);
64+
}
65+
bias_multiplier_.Reshape(vector<int>(1, inner_dim_));
66+
if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) {
67+
caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data());
68+
}
69+
}
70+
71+
template <typename Dtype>
72+
void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
73+
const vector<Blob<Dtype>*>& top) {
74+
const Dtype* bias_data =
75+
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
76+
Dtype* top_data = top[0]->mutable_cpu_data();
77+
if (bottom[0] != top[0]) {
78+
const Dtype* bottom_data = bottom[0]->cpu_data();
79+
caffe_copy(bottom[0]->count(), bottom_data, top_data);
80+
}
81+
for (int n = 0; n < outer_dim_; ++n) {
82+
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_,
83+
inner_dim_, Dtype(1), Dtype(1), bias_data,
84+
bias_multiplier_.cpu_data(), Dtype(1), top_data);
85+
top_data += dim_;
86+
}
87+
}
88+
89+
template <typename Dtype>
90+
void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
91+
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
92+
if (propagate_down[0] && bottom[0] != top[0]) {
93+
const Dtype* top_diff = top[0]->cpu_diff();
94+
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
95+
caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
96+
}
97+
// in-place, we don't need to do anything with the data diff
98+
const bool bias_param = (bottom.size() == 1);
99+
if ((!bias_param && propagate_down[1]) ||
100+
(bias_param && this->param_propagate_down_[0])) {
101+
const Dtype* top_diff = top[0]->cpu_diff();
102+
Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
103+
->mutable_cpu_diff();
104+
bool accum = bias_param;
105+
for (int n = 0; n < outer_dim_; ++n) {
106+
caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
107+
top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff);
108+
top_diff += dim_;
109+
accum = true;
110+
}
111+
}
112+
}
113+
114+
#ifdef CPU_ONLY
115+
STUB_GPU(BiasLayer);
116+
#endif
117+
118+
INSTANTIATE_CLASS(BiasLayer);
119+
REGISTER_LAYER_CLASS(Bias);
120+
121+
} // namespace caffe

0 commit comments

Comments
 (0)