/*! * * * \brief Implements a model applying a convolution to an image * * * * \author * \date 2017 * * * \par Copyright 1995-2017 Shark Development Team * *

* This file is part of Shark. * * * Shark is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Shark is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Shark. If not, see . * */ #ifndef SHARK_MODELS_CONV2DModel_H #define SHARK_MODELS_CONV2DModel_H #include #include #include #include #include namespace shark { /// /// \brief Convolutional Model for 2D image data. /// /// \par /// This model computes the result of /// \f$ y = f(x) = g(\text{convolution}(w, x) + b) \f$, where g is an arbitrary activation function and /// convolution is the convolution of the input image x with the filters w. b is a vector with one entry for each filter which is then applied to each response above /// /// The image is allowed to have several channels and are linearized to a single vector of size width * height * numChannels. /// This is done by itnerleaving channels, i.e. for a pixel all channels are stored contiguously. Then the pixels are stored in /// a row-major scheme. /// /// For handling edge condition, the Conv2D model handles two different convolution modes: /// /// Padding::Valid: /// The output is only computed on patches which are fully inside the unpadded image as a linearized vector in the same format /// of size (width - filter_width+1) * (height - filter_height+1) * numFilters. /// /// Padding::ZeroPad /// The output input is padded with zeros and the output has the same size as the input /// of size width * height * numFilters. template class Conv2DModel : public AbstractModel{ private: typedef AbstractModel base_type; typedef Conv2DModel self_type; typedef typename VectorType::value_type value_type; typedef typename VectorType::device_type device_type; static_assert(!std::is_same::value, "Conv2D not implemented for sparse inputs"); public: typedef typename base_type::BatchOutputType BatchOutputType; typedef typename base_type::BatchInputType BatchInputType; typedef typename base_type::ParameterVectorType ParameterVectorType; /// Default Constructor; use setStructure later. Conv2DModel(){ base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; } ///\brief Sets the structure by setting the dimensionalities of image and filters. /// /// \arg imageShape Shape of the image imHeight x imWidth x channel /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth x channel /// \arg type Type of convolution padding to perform Conv2DModel( Shape const& imageShape, Shape const& filterShape, Padding type = Padding::ZeroPad ){ base_type::m_features |= base_type::HAS_FIRST_PARAMETER_DERIVATIVE; base_type::m_features |= base_type::HAS_FIRST_INPUT_DERIVATIVE; setStructure(imageShape, filterShape, type); } std::string name() const { return "Conv2DModel"; } ///\brief Returns the expected shape of the input Shape inputShape() const{ return {m_imageHeight, m_imageWidth, m_numChannels}; } ///\brief Returns the shape of the output Shape outputShape() const{ if(m_type != Padding::Valid){ return {m_imageHeight, m_imageWidth, m_numFilters}; }else{ return {m_imageHeight - m_filterHeight + 1, m_imageWidth - m_filterWidth + 1, m_numFilters}; } } /// \brief Returns the activation function. ActivationFunction const& activationFunction()const{ return m_activation; } /// \brief Returns the activation function. ActivationFunction& activationFunction(){ return m_activation; } /// \brief Obtain the parameter vector. ParameterVectorType parameterVector() const{ return m_filters | m_offset; } /// \brief Set the new parameters of the model. void setParameterVector(ParameterVectorType const& newParameters){ SIZE_CHECK(newParameters.size() == numberOfParameters()); noalias(m_filters) = subrange(newParameters,0,m_filters.size()); noalias(m_offset) = subrange(newParameters,m_filters.size(),newParameters.size()); updateBackpropFilters(); } /// \brief Return the number of parameters. size_t numberOfParameters() const{ return m_filters.size() + m_offset.size(); } ///\brief Sets the structure by setting the shape of image and filters. /// /// \arg imageShape Shape of the image imHeight x imWidth x channel /// \arg filterShape Shape of the filter matrix numFilters x fiHeight x fiWidth /// \arg type Type of convolution padding to perform void setStructure( Shape const& imageShape, Shape const& filterShape, Padding type = Padding::ZeroPad ){ m_type = type; m_imageHeight = imageShape[0]; m_imageWidth = imageShape[1]; m_numChannels = imageShape[2]; m_numFilters = filterShape[0]; m_filterHeight = filterShape[1]; m_filterWidth = filterShape[2]; m_filters.resize(m_filterHeight * m_filterWidth * m_numFilters * m_numChannels); m_offset.resize(m_numFilters); updateBackpropFilters(); } boost::shared_ptr createState()const{ return boost::shared_ptr(new typename ActivationFunction::State()); } using base_type::eval; /// Evaluate the model void eval(BatchInputType const& inputs, BatchOutputType& outputs, State& state)const{ SIZE_CHECK(inputs.size2() == inputShape().numElements()); outputs.resize(inputs.size1(),outputShape().numElements()); outputs.clear(); //geometry for "zero pad" std::size_t outputsForFilter = outputShape().numElements()/m_numFilters; std::size_t paddingHeight = (m_type != Padding::Valid) ? m_filterHeight - 1: 0; std::size_t paddingWidth = (m_type != Padding::Valid) ? m_filterWidth - 1: 0; blas::kernels::conv2d(inputs, m_filters, outputs, m_numChannels, m_numFilters, m_imageHeight, m_imageWidth, m_filterHeight, m_filterWidth, paddingHeight, paddingWidth ); //reshape matrix for offset auto reshapedOutputs = to_matrix(to_vector(outputs), outputsForFilter * inputs.size1(), m_numFilters); noalias(reshapedOutputs ) += blas::repeat(m_offset,outputsForFilter * inputs.size1()); m_activation.evalInPlace(outputs, state.toState()); } ///\brief Calculates the first derivative w.r.t the parameters and summing them up over all inputs of the last computed batch void weightedParameterDerivative( BatchInputType const& inputs, BatchOutputType const& outputs, BatchOutputType const& coefficients, State const& state, ParameterVectorType& gradient )const{ SIZE_CHECK(coefficients.size2()==outputShape().numElements()); SIZE_CHECK(coefficients.size1()==inputs.size1()); std::size_t n = inputs.size1(); auto outputHeight = outputShape()[0]; auto outputWidth = outputShape()[1]; BatchOutputType delta = coefficients; m_activation.multiplyDerivative(outputs,delta, state.toState()); gradient.resize(numberOfParameters()); auto weightGradient = subrange(gradient,0,m_filters.size()); auto offsetGradient = subrange(gradient, m_filters.size(),gradient.size()); std::size_t paddingHeight = (m_type != Padding::Valid) ? m_filterHeight - 1: 0; std::size_t paddingWidth = (m_type != Padding::Valid) ? m_filterWidth - 1: 0; //derivatives of offset parameters //reshape coefficient matrix into a matrix where the rows are the single output pixels auto delta_pixels = to_matrix(to_vector(delta), coefficients.size1() * coefficients.size2()/m_numFilters, m_numFilters); noalias(offsetGradient) = sum_rows(delta_pixels); //derivative of filters: //the idea is to phrase this derivative in terms of another convolution. //for this we swap for coefficients and inputs the batch-size with the number of channels // i.e. we transform NHWC to CHWN. // afterwards the derivative is just convolving the coefficients with the inputs (padding the inputs as normal). // after convolving, the output has the filters as channels, therefore the derivative has to be reordered back // from CHWN to NHWC format. BatchOutputType delta_CHWN(m_numFilters, outputHeight * outputWidth * n); BatchOutputType inputs_CHWN(m_numChannels, inputShape().numElements() / m_numChannels * n); image::reorder( to_vector(delta), to_vector(delta_CHWN), {n, outputHeight, outputWidth, m_numFilters}, ImageFormat::NHWC, ImageFormat::CHWN ); image::reorder( to_vector(inputs), to_vector(inputs_CHWN), {n, m_imageHeight, m_imageWidth, m_numChannels}, ImageFormat::NHWC, ImageFormat::CHWN ); BatchInputType responses_CHWN(m_numChannels, m_filters.size() / m_numChannels); blas::kernels::conv2d(inputs_CHWN, to_vector(delta_CHWN), responses_CHWN, n, m_numFilters, m_imageHeight, m_imageWidth, outputHeight, outputWidth, paddingHeight, paddingWidth ); image::reorder( to_vector(responses_CHWN), weightGradient, {m_numChannels, m_filterHeight, m_filterWidth, m_numFilters}, ImageFormat::CHWN, ImageFormat::NHWC ); } ///\brief Calculates the first derivative w.r.t the inputs and summs them up over all inputs of the last computed batch void weightedInputDerivative( BatchInputType const & inputs, BatchOutputType const& outputs, BatchOutputType const & coefficients, State const& state, BatchInputType& derivatives )const{ SIZE_CHECK(coefficients.size2() == outputShape().numElements()); SIZE_CHECK(coefficients.size1() == inputs.size1()); BatchOutputType delta = coefficients; m_activation.multiplyDerivative(outputs,delta, state.toState()); Shape shape = outputShape(); std::size_t paddingHeight = m_filterHeight - 1; std::size_t paddingWidth = m_filterWidth - 1; if(m_type == Padding::Valid){ paddingHeight *=2; paddingWidth *=2; } derivatives.resize(inputs.size1(),inputShape().numElements()); derivatives.clear(); blas::kernels::conv2d(delta, m_backpropFilters, derivatives, m_numFilters, m_numChannels, shape[0], shape[1], m_filterHeight, m_filterWidth, paddingHeight, paddingWidth ); } /// From ISerializable void read(InArchive& archive){ archive >> m_filters; archive >> m_offset; archive >> m_imageHeight; archive >> m_imageWidth; archive >> m_filterHeight; archive >> m_filterWidth; archive >> m_numChannels; archive >> m_numFilters; archive >> (int&) m_type; updateBackpropFilters(); } /// From ISerializable void write(OutArchive& archive) const{ archive << m_filters; archive << m_offset; archive << m_imageHeight; archive << m_imageWidth; archive << m_filterHeight; archive << m_filterWidth; archive << m_numChannels; archive << m_numFilters; archive << (int&) m_type; } private: ///\brief Converts the filters into the backprop filters /// /// for computing the derivatie wrt the inputs in the chain rule, we /// have to convove the outer derivative with the "transposed" filters /// the transposition is done by switching the order of channels and filters in the storage void updateBackpropFilters(){ m_backpropFilters.resize(m_filters.size()); std::size_t filterImSize = m_filterWidth * m_filterHeight; std::size_t filterSize = m_numChannels * m_filterWidth * m_filterHeight; std::size_t bpFilterSize = m_numFilters * m_filterWidth * m_filterHeight; //Note: this looks a bit funny, but this way on a gpu only m_numChannel kernels need to be run for(std::size_t c = 0; c != m_numChannels; ++c){ auto channel_mat = subrange( to_matrix(m_filters, m_numFilters, filterSize), //create matrix where each row is one filter 0, m_numFilters, c * filterImSize, (c+1) * filterImSize //cut out all columns belonging to the current channel ); //Todo: commented out, because we also need to flip, which is not implemented in remora //~ auto channel_vec = to_vector(flip(channel_mat));//flip and linearize to vector (flip not implemented) //~ //cut out target are and set values //~ noalias(subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize)) = channel_vec; //instead use this cpu-only version auto target_vec = subrange(m_backpropFilters, c * bpFilterSize, (c+1) * bpFilterSize); auto target_mat = to_matrix(target_vec,m_numFilters, m_filterWidth * m_filterHeight); for(std::size_t f = 0; f != m_numFilters; ++f){ for(std::size_t i = 0; i != m_filterWidth * m_filterHeight; ++i){ target_mat(f,i) = channel_mat(f, m_filterWidth * m_filterHeight-i-1); } } } } VectorType m_filters; ///< Filters used for performing the convolution VectorType m_backpropFilters;///< Same as filter just with the storage order of filters and channels reversed. VectorType m_offset;///< offset applied to each filters response ActivationFunction m_activation;///< The activation function to use std::size_t m_imageHeight; std::size_t m_imageWidth; std::size_t m_filterHeight; std::size_t m_filterWidth; std::size_t m_numChannels; std::size_t m_numFilters; Padding m_type; }; } #endif