diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index e6b42c14587..5d018e7389d 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -180,6 +180,69 @@ class EltwiseLayer : public Layer<Dtype> {
   bool stable_prod_grad_;
 };
 
+/**
+ * @brief Takes two+ Blobs, interprets last Blob as a selector and
+ *  filter remaining Blobs accordingly with selector data (0 means that
+ * the corresponding item has to be filtered, non-zero means that corresponding
+ * item needs to stay).
+ */
+template <typename Dtype>
+class FilterLayer : public Layer<Dtype> {
+ public:
+  explicit FilterLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "Filter"; }
+  virtual inline int MinBottomBlobs() const { return 2; }
+  virtual inline int MinTopBlobs() const { return 1; }
+
+ protected:
+  /**
+   * @param bottom input Blob vector (length 2+)
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      the inputs to be filtered @f$ x_1 @f$
+   *   -# ...
+   *   -# @f$ (N \times C \times H \times W) @f$
+   *      the inputs to be filtered @f$ x_K @f$
+   *   -# @f$ (N \times 1 \times 1 \times 1) @f$
+   *      the selector blob
+   * @param top output Blob vector (length 1+)
+   *   -# @f$ (S \times C \times H \times W) @f$ () 
+   *        the filtered output @f$ x_1 @f$ 
+   *        where S is the number of items
+   *        that haven't been filtered
+   *      @f$ (S \times C \times H \times W) @f$
+   *        the filtered output @f$ x_K @f$ 
+   *        where S is the number of items
+   *        that haven't been filtered
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top);
+
+  /**
+   * @brief Computes the error gradient w.r.t. the forwarded inputs.
+   *
+   * @param top output Blob vector (length 1+), providing the error gradient with
+   *        respect to the outputs
+   * @param propagate_down see Layer::Backward.
+   * @param bottom input Blob vector (length 2+), into which the top error
+   *        gradient is copied
+   */
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  bool first_reshape_;
+  vector<int> indices_to_forward_;
+};
+
 /**
  * @brief Reshapes the input Blob into flat vectors.
  *
diff --git a/src/caffe/layers/filter_layer.cpp b/src/caffe/layers/filter_layer.cpp
new file mode 100644
index 00000000000..d7fc59e0ebe
--- /dev/null
+++ b/src/caffe/layers/filter_layer.cpp
@@ -0,0 +1,128 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void FilterLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(top.size(), bottom.size()-1) <<
+      "Top.size() should be equal to bottom.size() - 1";
+  first_reshape_ = true;
+}
+
+template <typename Dtype>
+void FilterLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  // bottom[0...k-1] are the blobs to filter
+  // bottom[last] is the "selector_blob"
+  int selector_index = bottom.size() - 1;
+  for (int i = 1; i < bottom[selector_index]->num_axes(); ++i) {
+    CHECK_EQ(bottom[selector_index]->shape(i), 1)
+        << "Selector blob must have all shapes == 1 (except the first one)";
+  }
+  for (int i = 0; i < bottom.size()-1; i++) {
+    CHECK_EQ(bottom[selector_index]->shape(0), bottom[i]->shape(0)) <<
+        "Each bottom should have the same dimension as the selector blob";
+  }
+
+  const Dtype* bottom_data_selector = bottom[selector_index]->cpu_data();
+  indices_to_forward_.clear();
+
+  // look for non-zero elements in bottom[0]. Items of each bottom that
+  // have the same index as the items in bottom[0] with value == non-zero
+  // will be forwarded
+  for (int item_id = 0; item_id < bottom[selector_index]->shape(0); ++item_id) {
+    // we don't need an offset because item size == 1
+    const Dtype* tmp_data_selector = bottom_data_selector + item_id;
+    if (*tmp_data_selector) {
+      indices_to_forward_.push_back(item_id);
+    }
+  }
+  // only filtered items will be forwarded
+  int new_tops_num = indices_to_forward_.size();
+  // init
+  if (first_reshape_) {
+    new_tops_num = bottom[0]->shape(0);
+    first_reshape_ = false;
+  }
+  for (int t = 0; t < top.size(); t++) {
+    int num_axes = bottom[t]->num_axes();
+    vector<int> shape_top(num_axes);
+    shape_top[0] = new_tops_num;
+    for (int ts = 1; ts < num_axes; ts++)
+      shape_top[ts] = bottom[t]->shape(ts);
+    top[t]->Reshape(shape_top);
+  }
+}
+
+template <typename Dtype>
+void FilterLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  int new_tops_num = indices_to_forward_.size();
+  // forward all filtered items for all bottoms but the Selector (bottom[last])
+  for (int t = 0; t < top.size(); t++) {
+    const Dtype* bottom_data = bottom[t]->cpu_data();
+    Dtype* top_data = top[t]->mutable_cpu_data();
+    int dim = bottom[t]->count() / bottom[t]->shape(0);
+    for (int n = 0; n < new_tops_num; n++) {
+      int data_offset_top = top[t]->offset(n);
+      int data_offset_bottom =  bottom[t]->offset(indices_to_forward_[n]);
+      caffe_copy(dim, bottom_data + data_offset_bottom,
+          top_data + data_offset_top);
+    }
+  }
+}
+
+template <typename Dtype>
+void FilterLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[bottom.size() - 1]) {
+    LOG(FATAL) << this->type()
+               << "Layer cannot backpropagate to filter index inputs";
+  }
+  for (int i = 0; i < top.size(); i++) {
+    // bottom[last] is the selector and never needs backpropagation
+    // so we can iterate over top vector because top.size() == bottom.size() -1
+    if (propagate_down[i]) {
+      const int dim = top[i]->count() / top[i]->shape(0);
+      int next_to_backward_offset = 0;
+      int batch_offset = 0;
+      int data_offset_bottom = 0;
+      int data_offset_top = 0;
+      for (int n = 0; n < bottom[i]->shape(0); n++) {
+        data_offset_bottom = bottom[i]->offset(n);
+        if (next_to_backward_offset >= indices_to_forward_.size()) {
+          // we already visited all items that were been forwarded, so
+          // just set to zero remaining ones
+          caffe_set(dim, Dtype(0),
+              bottom[i]->mutable_cpu_diff() + data_offset_bottom);
+        } else {
+          batch_offset = indices_to_forward_[next_to_backward_offset];
+          if (n != batch_offset) {  // this data was not been forwarded
+            caffe_set(dim, Dtype(0),
+                bottom[i]->mutable_cpu_diff() + data_offset_bottom);
+          } else {  // this data was been forwarded
+            data_offset_top = top[i]->offset(next_to_backward_offset);
+            next_to_backward_offset++;  // point to next forwarded item index
+            caffe_copy(dim, top[i]->mutable_cpu_diff() + data_offset_top,
+                bottom[i]->mutable_cpu_diff() + data_offset_bottom);
+          }
+        }
+      }
+    }
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(FilterLayer);
+#endif
+
+INSTANTIATE_CLASS(FilterLayer);
+REGISTER_LAYER_CLASS(Filter);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/filter_layer.cu b/src/caffe/layers/filter_layer.cu
new file mode 100644
index 00000000000..4a9e674de1a
--- /dev/null
+++ b/src/caffe/layers/filter_layer.cu
@@ -0,0 +1,70 @@
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/util/math_functions.hpp"
+#include "caffe/vision_layers.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void FilterLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  int new_tops_num = indices_to_forward_.size();
+  // forward all filtered items for all bottoms but the Selector (bottom[last])
+  for (int t = 0; t < top.size(); t++) {
+    const Dtype* bottom_data = bottom[t]->gpu_data();
+    Dtype* top_data = top[t]->mutable_gpu_data();
+    int dim = bottom[t]->count() / bottom[t]->shape(0);
+    for (int n = 0; n < new_tops_num; n++) {
+      int data_offset_top = top[t]->offset(n);
+      int data_offset_bottom =  bottom[t]->offset(indices_to_forward_[n]);
+      caffe_copy(dim, bottom_data + data_offset_bottom,
+          top_data + data_offset_top);
+    }
+  }
+}
+
+template <typename Dtype>
+void FilterLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[bottom.size() - 1]) {
+    LOG(FATAL) << this->type()
+                << "Layer cannot backpropagate to filter index inputs";
+  }
+  for (int i = 0; i < top.size(); i++) {
+    // bottom[last] is the selector and never needs backpropagation
+    // so we can iterate over top vector because top.size() == bottom.size() -1
+    if (propagate_down[i]) {
+      const int dim = top[i]->count() / top[i]->shape(0);
+      int next_to_backward_offset = 0;
+      int batch_offset = 0;
+      int data_offset_bottom = 0;
+      int data_offset_top = 0;
+      for (int n = 0; n < bottom[i]->shape(0); n++) {
+        if (next_to_backward_offset >= indices_to_forward_.size()) {
+          // we already visited all items that were been forwarded, so
+          // just set to zero remaining ones
+          data_offset_bottom = top[i]->offset(n);
+          caffe_set(dim, Dtype(0),
+              bottom[i]->mutable_gpu_diff() + data_offset_bottom);
+        } else {
+          batch_offset = indices_to_forward_[next_to_backward_offset];
+          data_offset_bottom = top[i]->offset(n);
+          if (n != batch_offset) {  // this data was not been forwarded
+            caffe_set(dim, Dtype(0),
+                bottom[i]->mutable_gpu_diff() + data_offset_bottom);
+          } else {  // this data was been forwarded
+            data_offset_top = top[i]->offset(next_to_backward_offset);
+            next_to_backward_offset++;  // point to next forwarded item index
+            caffe_copy(dim, top[i]->mutable_gpu_diff() + data_offset_top,
+                bottom[i]->mutable_gpu_diff() + data_offset_bottom);
+          }
+        }
+      }
+    }
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(FilterLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/test/test_filter_layer.cpp b/src/caffe/test/test_filter_layer.cpp
new file mode 100644
index 00000000000..c641b6ef6e8
--- /dev/null
+++ b/src/caffe/test/test_filter_layer.cpp
@@ -0,0 +1,128 @@
+#include <cstring>
+#include <limits>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/vision_layers.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class FilterLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  FilterLayerTest()
+      : blob_bottom_data_(new Blob<Dtype>(4, 3, 6, 4)),
+        blob_bottom_labels_(new Blob<Dtype>(4, 1, 1, 1)),
+        blob_bottom_selector_(new Blob<Dtype>(4, 1, 1, 1)),
+        blob_top_data_(new Blob<Dtype>()),
+        blob_top_labels_(new Blob<Dtype>()) {}
+  virtual void SetUp() {
+    // fill the values
+    Caffe::set_random_seed(1890);
+    FillerParameter filler_param;
+    GaussianFiller<Dtype> filler(filler_param);
+    // fill the selector blob
+    Dtype* bottom_data_selector_ = blob_bottom_selector_->mutable_cpu_data();
+    bottom_data_selector_[0] = 0;
+    bottom_data_selector_[1] = 1;
+    bottom_data_selector_[2] = 1;
+    bottom_data_selector_[3] = 0;
+    // fill the other bottom blobs
+    filler.Fill(blob_bottom_data_);
+    for (int i = 0; i < blob_bottom_labels_->count(); ++i) {
+      blob_bottom_labels_->mutable_cpu_data()[i] = caffe_rng_rand() % 5;
+    }
+    blob_bottom_vec_.push_back(blob_bottom_data_);
+    blob_bottom_vec_.push_back(blob_bottom_labels_);
+    blob_bottom_vec_.push_back(blob_bottom_selector_);
+    blob_top_vec_.push_back(blob_top_data_);
+    blob_top_vec_.push_back(blob_top_labels_);
+  }
+  virtual ~FilterLayerTest() {
+    delete blob_bottom_data_;
+    delete blob_bottom_labels_;
+    delete blob_bottom_selector_;
+    delete blob_top_data_;
+    delete blob_top_labels_;
+  }
+  Blob<Dtype>* const blob_bottom_data_;
+  Blob<Dtype>* const blob_bottom_labels_;
+  Blob<Dtype>* const blob_bottom_selector_;
+  // blobs for the top of FilterLayer
+  Blob<Dtype>* const blob_top_data_;
+  Blob<Dtype>* const blob_top_labels_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(FilterLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(FilterLayerTest, TestReshape) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  FilterLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Reshape(this->blob_bottom_vec_, this->blob_top_vec_);
+  // In the test first and last items should have been filtered
+  // so we just expect 2 remaining items
+  EXPECT_EQ(this->blob_top_data_->shape(0), 2);
+  EXPECT_EQ(this->blob_top_labels_->shape(0), 2);
+  EXPECT_GT(this->blob_bottom_data_->shape(0),
+      this->blob_top_data_->shape(0));
+  EXPECT_GT(this->blob_bottom_labels_->shape(0),
+      this->blob_top_labels_->shape(0));
+  for (int i = 1; i < this->blob_bottom_labels_->num_axes(); i++) {
+    EXPECT_EQ(this->blob_bottom_labels_->shape(i),
+        this->blob_top_labels_->shape(i));
+  }
+}
+
+TYPED_TEST(FilterLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  FilterLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Reshape(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_labels_->data_at(0, 0, 0, 0),
+      this->blob_bottom_labels_->data_at(1, 0, 0, 0));
+  EXPECT_EQ(this->blob_top_labels_->data_at(1, 0, 0, 0),
+      this->blob_bottom_labels_->data_at(2, 0, 0, 0));
+
+  int dim = this->blob_top_data_->count() /
+      this->blob_top_data_->shape(0);
+  const Dtype* top_data = this->blob_top_data_->cpu_data();
+  const Dtype* bottom_data = this->blob_bottom_data_->cpu_data();
+  // selector is 0 1 1 0, so we need to compare bottom(1,c,h,w)
+  // with top(0,c,h,w) and bottom(2,c,h,w) with top(1,c,h,w)
+  bottom_data += dim;  // bottom(1,c,h,w)
+  for (size_t n = 0; n < dim; n++)
+    EXPECT_EQ(top_data[n], bottom_data[n]);
+
+  bottom_data += dim;  // bottom(2,c,h,w)
+  top_data += dim;  // top(1,c,h,w)
+  for (size_t n = 0; n < dim; n++)
+    EXPECT_EQ(top_data[n], bottom_data[n]);
+}
+
+TYPED_TEST(FilterLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  FilterLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  // check only input 0 (data) because labels and selector
+  // don't need backpropagation
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+}  // namespace caffe