diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp index 70310f643..e6913dc34 100644 --- a/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp +++ b/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp @@ -29,21 +29,26 @@ namespace ops { CUSTOM_OP_IMPL(dynamic_stitch, 2, 1, false, 0, 0) { int numOfData = block.width(); // int k = 0; + // checking input data size REQUIRE_TRUE(numOfData % 2 == 0, 0, "dynamic_stitch: The input params should contains" " both indeces and data lists with same length."); + // split input data list on two equal parts numOfData /= 2; + // form input lists to use with helpers - both indices and float data inputs auto output = OUTPUT_VARIABLE(0); std::vector inputs(numOfData); std::vector indices(numOfData); + for (int e = 0; e < numOfData; e++) { auto data = INPUT_VARIABLE(numOfData + e); auto index = INPUT_VARIABLE(e); + inputs[e] = data; indices[e] = index; } - + // run helper return helpers::dynamicStitchFunctor(block.launchContext(), inputs, indices, output); } @@ -59,17 +64,17 @@ namespace ops { numOfData /= 2; // only index part it's needed to review auto restShape = inputShape->at(numOfData); auto firstShape = inputShape->at(0); + // check up inputs to avoid non-int indices and calculate max value from indices to output shape length for(int i = 0; i < numOfData; i++) { auto input = INPUT_VARIABLE(i); REQUIRE_TRUE(input->isZ(), 0, "dynamic_stitch: Indices should be integer, but %d type given.", (int)input->dataType() ); - // FIXME: we have reduce::Max, cinsider using it instead auto maxV = input->reduceNumber(reduce::Max); if (maxV.e(0) > maxValue) maxValue = maxV.e(0); } - - int outRank = shape::rank(restShape) - shape::rank(firstShape) + 1; + // calculate output rank - difference between indices shape and data shape + int outRank = shape::rank(restShape) - shape::rank(firstShape) + 1; // at least 1D tensor std::vector outShape(outRank); - + // fill up output shape template: the first to max index, and rests - to vals from the first data input outShape[0] = maxValue + 1; for(int i = 1; i < outRank; ++i) outShape[i] = shape::sizeAt(restShape, i); diff --git a/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h b/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h index 89e4c385a..d3a4c042d 100644 --- a/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h +++ b/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h @@ -33,12 +33,13 @@ namespace nd4j { * 0: 1D row-vector (or with shape (1, m)) * 1: 1D integer vector with slice nums * 2: 1D float-point values vector with same shape as above + * 3: 2D float-point matrix with data to search * * Int args: * 0: N - number of slices * * Output: - * 0: 1D vector with edge forces for input and values + * 0: 2D matrix with the same shape and type as the 3th argument */ #if NOT_EXCLUDED(OP_barnes_edge_forces) DECLARE_CUSTOM_OP(barnes_edge_forces, 4, 1, false, 0, 1); @@ -52,9 +53,11 @@ namespace nd4j { * 0: 1D int row-vector * 1: 1D int col-vector * 2: 1D float vector with values - * + * * Output: - * 0: symmetric 2D matrix with given values on given places + * 0: 1D int result row-vector + * 1: 1D int result col-vector + * 2: a float-point tensor with shape 1xN, with values from the last input vector */ #if NOT_EXCLUDED(OP_barnes_symmetrized) DECLARE_CUSTOM_OP(barnes_symmetrized, 3, 3, false, 0, -1); diff --git a/libnd4j/include/ops/declarable/headers/list.h b/libnd4j/include/ops/declarable/headers/list.h index 01c2d225c..756895a1f 100644 --- a/libnd4j/include/ops/declarable/headers/list.h +++ b/libnd4j/include/ops/declarable/headers/list.h @@ -120,7 +120,7 @@ namespace nd4j { #endif /** - * This operation unstacks given NDArray into NDArrayList + * This operation unstacks given NDArray into NDArrayList by the first dimension */ #if NOT_EXCLUDED(OP_unstack_list) DECLARE_LIST_OP(unstack_list, 1, 1, 0, 0); diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h index c86f28499..bb7f306bd 100644 --- a/libnd4j/include/ops/declarable/headers/parity_ops.h +++ b/libnd4j/include/ops/declarable/headers/parity_ops.h @@ -594,21 +594,46 @@ namespace nd4j { /** + * This operation rearranges data from depth into blocks of spatial data. This is the reverse transformation + * of space_to_depth op. This op output is a copy of the input tensor where values from the depth dimension + * are moved in spatial blocks to the height and width dimensions. Int attr 0 indicates the input + * block size and how the data is moved. + * Input: + * 0 - 4D tensor on given type + * Output: + * 0 - 4D tensor of given type and proper shape * - * - * + * Int arguments: + * 0 - block size + * 1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels } + * 1 ("NCHW"): shape{ batch, channels, height, width } + * 2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 } + * optional (default 0) */ #if NOT_EXCLUDED(OP_depth_to_space) - DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, 2); + DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, -1); #endif /** + * This operation rearranges blocks of spatial data, into depth.This op output is a copy of the input tensor + * where values from the height and width dimensions are moved to the depth dimension. Int attr 0 indicates + * the input block size. * + * Input: + * - 4D tensor of given type + * Output: + * - 4D tensor * + * Int arguments: + * 0 - block size + * 1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels } + * 1 ("NCHW"): shape{ batch, channels, height, width } + * 2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 } + * optional (default 0) * */ #if NOT_EXCLUDED(OP_space_to_depth) - DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, 2); + DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, -1); #endif /** @@ -622,13 +647,42 @@ namespace nd4j { #endif /** + * Zero-pads and then rearranges (permutes) blocks of spatial data into batch. More specifically, this op + * outputs a copy of the input tensor where values from the height and width dimensions are moved to the + * batch dimension. After the zero-padding, both height and width of the input must be divisible by the block + * size. * + * Inputs: + * 0 - input tensor + * 1 - 2D paddings tensor (shape {M, 2}) + * + * Output: + * - result tensor + * + * Int args: + * 0 - block size (M) * */ #if NOT_EXCLUDED(OP_space_to_batch) DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1); #endif + /* + * This operation divides "spatial" dimensions [1, ..., M] of the input into a grid of blocks of shape + * block_shape, and interleaves these blocks with the "batch" dimension (0) such that in the output, + * the spatial dimensions [1, ..., M] correspond to the position within the grid, and the batch dimension + * combines both the position within a spatial block and the original batch position. Prior to division into + * blocks, the spatial dimensions of the input are optionally zero padded according to paddings. + * + * Inputs: + * 0 - input (N-D tensor) + * 1 - block_shape - int 1D tensor with M length + * 2 - paddings - int 2D tensor with shape {M, 2} + * + * Output: + * - N-D tensor with the same type as input 0. + * + * */ #if NOT_EXCLUDED(OP_space_to_batch_nd) DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0); #endif @@ -973,7 +1027,7 @@ namespace nd4j { * return value: * tensor with min values according to indices sets. */ - #if NOT_EXCLUDED(OP_segment_min_bp) + #if NOT_EXCLUDED(OP_segment_min) DECLARE_CUSTOM_OP(segment_min, 2, 1, false, 0, 0); #endif #if NOT_EXCLUDED(OP_segment_min_bp) diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu index 7d520478e..75b541b72 100644 --- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu +++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu @@ -118,19 +118,19 @@ namespace nd4j { PointersManager pm(context, "dynamicPartition"); - if (sourceDimsLen) { + if (sourceDimsLen) { // non-linear case std::vector sourceDims(sourceDimsLen); for (int i = sourceDimsLen; i > 0; i--) sourceDims[sourceDimsLen - i] = input->rankOf() - i; - + //compute tad array for given dimensions auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), sourceDims); std::vector outBuffers(outSize); std::vector tadShapes(outSize); std::vector tadOffsets(outSize); std::vector numTads(outSize); - + // fill up dimensions array for before kernel for (unsigned int i = 0; i < outSize; i++) { outputs[i].first = outputList[i]; std::vector outDims(outputs[i].first->rankOf() - 1); @@ -151,10 +151,10 @@ namespace nd4j { auto dOutBuffers = reinterpret_cast(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *))); auto dOutTadShapes = reinterpret_cast(pm.replicatePointer(tadShapes.data(), tadShapes.size() * sizeof(Nd4jLong *))); auto dOutTadOffsets = reinterpret_cast(pm.replicatePointer(tadOffsets.data(), tadOffsets.size() * sizeof(Nd4jLong *))); - + // run kernel on device dynamicPartitionTadKernel<<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize); - } else { + } else { // linear case auto numThreads = 256; auto shmemSize = numThreads * sizeof(Y) * 2 + 1024; @@ -169,7 +169,6 @@ namespace nd4j { auto dOutBuffers = reinterpret_cast(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *))); auto dOutShapes = reinterpret_cast(pm.replicatePointer(outShapes.data(), outShapes.size() * sizeof(Nd4jLong *))); - dynamicPartitionScalarKernel<<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), indices->getSpecialBuffer(), indices-> getSpecialShapeInfo(), dOutBuffers, dOutShapes, outSize); } diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp index 87ac417be..2ef9e2309 100644 --- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp +++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp @@ -544,8 +544,8 @@ TEST_F(DeclarableOpsTests13, adjustSaturation_1) { //////////////////////////////////////////////////////////////////// TEST_F(DeclarableOpsTests13, adjustSaturation_2) { - NDArray input('c', {2,2,3}, {0,100,56, 17,220,5, 150,97,230, 255,2,13}, nd4j::DataType::FLOAT32); - NDArray exp ('c', {2,2,3}, {0.,100.,56., 12.279087,220.,0., 91.654228,0.,230., 255.,0.,11.087015}, nd4j::DataType::FLOAT32); + NDArray input('c', {2,2,3}, {0,100,56, 17,220,5, 150,97,230, 255,2,13}, nd4j::DataType::DOUBLE); + NDArray exp ('c', {2,2,3}, {0.,100.,56., 12.279087,220.,0., 91.654228,0.,230., 255.,0.,11.087015}, nd4j::DataType::DOUBLE); nd4j::ops::adjust_saturation op; auto results = op.execute({&input}, {10}, {2}); @@ -553,7 +553,8 @@ TEST_F(DeclarableOpsTests13, adjustSaturation_2) { ASSERT_EQ(ND4J_STATUS_OK, results->status()); auto result = results->at(0); - // result->printIndexedBuffer(); +// result->printIndexedBuffer("Result2"); +// exp.printIndexedBuffer("Expect2"); ASSERT_TRUE(exp.isSameShape(result)); ASSERT_TRUE(exp.equalsTo(result));