Skip to content

Commit ec04197

Browse files
ducha-aikijeffdonahue
authored andcommitted
Add ChannelwiseAffine for batch norm
1 parent a7ac8bc commit ec04197

File tree

5 files changed

+554
-1
lines changed

5 files changed

+554
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#ifndef CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_
2+
#define CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_
3+
4+
#include <vector>
5+
#include "caffe/blob.hpp"
6+
#include "caffe/layer.hpp"
7+
#include "caffe/layers/neuron_layer.hpp"
8+
#include "caffe/proto/caffe.pb.h"
9+
10+
namespace caffe {
11+
/**
12+
* @brief Affine non-linearity function @f$
13+
* y = ax+b
14+
* @f$, could be used after batch normalization layer
15+
*
16+
*/
17+
template <typename Dtype>
18+
class ChannelwiseAffineLayer : public NeuronLayer<Dtype> {
19+
public:
20+
/**
21+
* @param param provides ChannelwiseAffineParameter ChannelwiseAffine_param,
22+
* with ChannelwiseAffineLayer options:
23+
* - slope_filler (\b optional, FillerParameter,
24+
* default {'type': constant 'value':1.0001}).
25+
* - bias_filler (\b optional, FillerParameter,
26+
* default {'type': constant 'value':0.0001}).
27+
* - channel_shared (\b optional, default false).
28+
* slopes and biases are shared across channels.
29+
*/
30+
explicit ChannelwiseAffineLayer(const LayerParameter& param)
31+
: NeuronLayer<Dtype>(param) {}
32+
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
33+
const vector<Blob<Dtype>*>& top);
34+
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
35+
const vector<Blob<Dtype>*>& top);
36+
virtual inline const char* type() const { return "ChannelwiseAffine"; }
37+
38+
protected:
39+
/**
40+
* @param bottom input Blob vector (length 1)
41+
* -# @f$ (N \times C \times ...) @f$
42+
* the inputs @f$ x @f$
43+
* @param top output Blob vector (length 1)
44+
* -# @f$ (N \times C \times ...) @f$
45+
* the computed outputs for each channel @f$i@f$ @f$
46+
* y_i = a_i x_i + b_i
47+
* @f$.
48+
*/
49+
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
50+
const vector<Blob<Dtype>*>& top);
51+
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
52+
const vector<Blob<Dtype>*>& top);
53+
/**
54+
* @brief Computes the error gradient w.r.t. the ChannelwiseAffine inputs.
55+
*
56+
* @param top output Blob vector (length 1), providing the error gradient with
57+
* respect to the outputs
58+
* -# @f$ (N \times C \times ...) @f$
59+
* containing error gradients @f$ \frac{\partial E}{\partial y} @f$
60+
* with respect to computed outputs @f$ y @f$
61+
* @param propagate_down see Layer::Backward.
62+
* @param bottom input Blob vector (length 1)
63+
* -# @f$ (N \times C \times ...) @f$
64+
* the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their
65+
* diff with gradients @f$
66+
* \frac{\partial E}{\partial x_i} = \left\{
67+
* \begin{array}{lr}
68+
* a_i \frac{\partial E}{\partial y_i}
69+
* \end{array} \right.
70+
* @f$.
71+
* If param_propagate_down_[0] is true, it fills the diff with gradients
72+
* @f$
73+
* \frac{\partial E}{\partial a_i} = \left\{
74+
* \begin{array}{lr}
75+
* \sum_{x_i} x_i \frac{\partial E}{\partial y_i}
76+
* \end{array} \right.
77+
* @f$.
78+
* If param_propagate_down_[1] is true, it fills the diff with gradients
79+
* @f$
80+
* \frac{\partial E}{\partial b_i} = \left\{
81+
* \begin{array}{lr}
82+
* frac{\partial E}{\partial y_i}
83+
* \end{array} \right.
84+
* @f$.
85+
*/
86+
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
87+
const vector<bool>& propagate_down,
88+
const vector<Blob<Dtype>*>& bottom);
89+
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
90+
const vector<bool>& propagate_down,
91+
const vector<Blob<Dtype>*>& bottom);
92+
bool channel_shared_;
93+
Blob<Dtype> multiplier_;
94+
// dot multiplier for backward computation of params
95+
Blob<Dtype> bias_multiplier_;
96+
Blob<Dtype> backward_buff_;
97+
// temporary buffer for backward computation
98+
Blob<Dtype> bottom_memory_;
99+
// memory for in-place computation
100+
};
101+
} // namespace caffe
102+
103+
#endif // CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
#include <algorithm>
2+
#include <vector>
3+
4+
#include "caffe/filler.hpp"
5+
#include "caffe/layer.hpp"
6+
#include "caffe/layers/channelwise_affine_layer.hpp"
7+
8+
namespace caffe {
9+
10+
template <typename Dtype>
11+
void ChannelwiseAffineLayer<Dtype>::LayerSetUp(
12+
const vector<Blob<Dtype>*>& bottom,
13+
const vector<Blob<Dtype>*>& top) {
14+
CHECK_GE(bottom[0]->num_axes(), 2)
15+
<< "Number of axes of bottom blob must be >=2.";
16+
ChannelwiseAffineParameter channelwise_affine_param =
17+
this->layer_param().channelwise_affine_param();
18+
int channels = bottom[0]->channels();
19+
channel_shared_ = channelwise_affine_param.channel_shared();
20+
if (this->blobs_.size() > 0) {
21+
LOG(INFO) << "Skipping parameter initialization";
22+
} else {
23+
this->blobs_.resize(2);
24+
if (channel_shared_) {
25+
this->blobs_[0].reset(new Blob<Dtype>(vector<int>(0)));
26+
this->blobs_[1].reset(new Blob<Dtype>(vector<int>(0)));
27+
28+
} else {
29+
this->blobs_[0].reset(new Blob<Dtype>(vector<int>(1, channels)));
30+
this->blobs_[1].reset(new Blob<Dtype>(vector<int>(1, channels)));
31+
}
32+
shared_ptr<Filler<Dtype> > filler;
33+
if (channelwise_affine_param.has_slope_filler()) {
34+
filler.reset(GetFiller<Dtype>(channelwise_affine_param.slope_filler()));
35+
} else {
36+
FillerParameter filler_param;
37+
filler_param.set_type("constant");
38+
filler_param.set_value(1.0001);
39+
filler.reset(GetFiller<Dtype>(filler_param));
40+
}
41+
filler->Fill(this->blobs_[0].get());
42+
43+
if (channelwise_affine_param.has_bias_filler()) {
44+
filler.reset(GetFiller<Dtype>(channelwise_affine_param.bias_filler()));
45+
} else {
46+
FillerParameter filler_param;
47+
filler_param.set_type("constant");
48+
filler_param.set_value(0.0001);
49+
filler.reset(GetFiller<Dtype>(filler_param));
50+
}
51+
filler->Fill(this->blobs_[1].get());
52+
}
53+
if (channel_shared_) {
54+
CHECK_EQ(this->blobs_[0]->count(), 1)
55+
<< "Slope size is inconsistent with prototxt config";
56+
} else {
57+
CHECK_EQ(this->blobs_[0]->count(), channels)
58+
<< "Slope size is inconsistent with prototxt config";
59+
}
60+
61+
// Propagate gradients to the parameters (as directed by backward pass).
62+
this->param_propagate_down_.resize(this->blobs_.size(), true);
63+
multiplier_.Reshape(vector<int>(1, bottom[0]->count(1)));
64+
bias_multiplier_.Reshape(vector<int>(1, bottom[0]->count(1)));
65+
backward_buff_.Reshape(vector<int>(1, bottom[0]->count(1)));
66+
caffe_set(multiplier_.count(), Dtype(1.0),
67+
multiplier_.mutable_cpu_data());
68+
caffe_set(bias_multiplier_.count(), Dtype(1.0),
69+
bias_multiplier_.mutable_cpu_data());
70+
}
71+
72+
template <typename Dtype>
73+
void ChannelwiseAffineLayer<Dtype>::Reshape(
74+
const vector<Blob<Dtype>*>& bottom,
75+
const vector<Blob<Dtype>*>& top) {
76+
CHECK_GE(bottom[0]->num_axes(), 2)
77+
<< "Number of axes of bottom blob must be >=2.";
78+
top[0]->ReshapeLike(*bottom[0]);
79+
if (bottom[0] == top[0]) {
80+
// For in-place computation
81+
bottom_memory_.ReshapeLike(*bottom[0]);
82+
}
83+
int height = 1;
84+
int width = 1;
85+
if (bottom[0]->num_axes() > 2) {
86+
height = bottom[0]->shape(2);
87+
width = bottom[0]->shape(3);
88+
}
89+
vector<int> bias_multiplier_shape(1, height * width);
90+
bias_multiplier_.Reshape(bias_multiplier_shape);
91+
caffe_set(bias_multiplier_.count(), Dtype(1),
92+
bias_multiplier_.mutable_cpu_data());
93+
}
94+
95+
template <typename Dtype>
96+
void ChannelwiseAffineLayer<Dtype>::Forward_cpu(
97+
const vector<Blob<Dtype>*>& bottom,
98+
const vector<Blob<Dtype>*>& top) {
99+
const Dtype* bottom_data = bottom[0]->cpu_data();
100+
Dtype* top_data = top[0]->mutable_cpu_data();
101+
const int count = bottom[0]->count();
102+
const int dim = bottom[0]->count(2);
103+
const int channels = bottom[0]->channels();
104+
const Dtype* slope_data = this->blobs_[0]->cpu_data();
105+
const Dtype* bias_data = this->blobs_[1]->cpu_data();
106+
// For in-place computation
107+
if (bottom[0] == top[0]) {
108+
caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data());
109+
}
110+
// if channel_shared, channel index in the following computation becomes
111+
// always zero.
112+
const int div_factor = channel_shared_ ? channels : 1;
113+
for (int i = 0; i < count; ++i) {
114+
int c = (i / dim) % channels / div_factor;
115+
top_data[i] = bottom_data[i] * slope_data[c] + bias_data[c];
116+
}
117+
}
118+
119+
template <typename Dtype>
120+
void ChannelwiseAffineLayer<Dtype>::Backward_cpu(
121+
const vector<Blob<Dtype>*>& top,
122+
const vector<bool>& propagate_down,
123+
const vector<Blob<Dtype>*>& bottom) {
124+
const Dtype* bottom_data = bottom[0]->cpu_data();
125+
const Dtype* slope_data = this->blobs_[0]->cpu_data();
126+
127+
const Dtype* top_diff = top[0]->cpu_diff();
128+
const int count = bottom[0]->count();
129+
const int dim = bottom[0]->count(2);
130+
const int channels = bottom[0]->shape(1);
131+
const int num = bottom[0]->shape(0);
132+
int height = 1;
133+
int width = 1;
134+
if (bottom[0]->num_axes() > 2) {
135+
height = bottom[0]->shape(2);
136+
width = bottom[0]->shape(3);
137+
}
138+
139+
// For in-place computation
140+
if (top[0] == bottom[0]) {
141+
bottom_data = bottom_memory_.cpu_data();
142+
}
143+
144+
// if channel_shared, channel index in the following computation becomes
145+
// always zero.
146+
const int div_factor = channel_shared_ ? channels : 1;
147+
148+
// Propagte to param
149+
// Since to write bottom diff will affect top diff if top and bottom blobs
150+
// are identical (in-place computaion), we first compute param backward to
151+
// keep top_diff unchanged.
152+
153+
if (this->param_propagate_down_[1]) {
154+
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
155+
caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
156+
for (int n = 0; n < num; ++n) {
157+
caffe_cpu_gemv<Dtype>(CblasNoTrans, channels, height * width, 1.,
158+
top_diff + top[0]->offset(n),
159+
bias_multiplier_.cpu_data(), 1., bias_diff);
160+
}
161+
}
162+
if (this->param_propagate_down_[0]) {
163+
Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff();
164+
caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff);
165+
for (int i = 0; i < count; ++i) {
166+
int c = (i / dim) % channels / div_factor;
167+
slope_diff[c] += top_diff[i] * bottom_data[i];
168+
}
169+
}
170+
171+
// Propagate to bottom
172+
if (propagate_down[0]) {
173+
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
174+
for (int i = 0; i < count; ++i) {
175+
int c = (i / dim) % channels / div_factor;
176+
bottom_diff[i] = slope_data[c] * top_diff[i];
177+
}
178+
}
179+
}
180+
181+
182+
#ifdef CPU_ONLY
183+
STUB_GPU(ChannelwiseAffineLayer);
184+
#endif
185+
186+
INSTANTIATE_CLASS(ChannelwiseAffineLayer);
187+
REGISTER_LAYER_CLASS(ChannelwiseAffine);
188+
189+
} // namespace caffe

0 commit comments

Comments
 (0)