diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp
index 70310f643..e6913dc34 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/dynamic_stitch.cpp
@@ -29,21 +29,26 @@ namespace ops {
     CUSTOM_OP_IMPL(dynamic_stitch, 2, 1, false, 0, 0) {
         int numOfData = block.width();
 //        int k = 0;
+        // checking input data size
         REQUIRE_TRUE(numOfData % 2 == 0, 0, 
             "dynamic_stitch: The input params should contains"
             " both indeces and data lists with same length.");
+        // split input data list on two equal parts
         numOfData /= 2;
 
+        // form input lists to use with helpers - both indices and float data inputs
         auto output = OUTPUT_VARIABLE(0);
         std::vector<NDArray*> inputs(numOfData);
         std::vector<NDArray*> indices(numOfData);
+
         for (int e = 0; e < numOfData; e++) {
             auto data = INPUT_VARIABLE(numOfData + e);
             auto index = INPUT_VARIABLE(e);
+
             inputs[e] = data;
             indices[e] = index;
         }
-
+        // run helper
         return helpers::dynamicStitchFunctor(block.launchContext(), inputs, indices, output);
     }
 
@@ -59,17 +64,17 @@ namespace ops {
         numOfData /= 2; // only index part it's needed to review
         auto restShape = inputShape->at(numOfData);
         auto firstShape = inputShape->at(0);
+        // check up inputs to avoid non-int indices and calculate max value from indices to output shape length
         for(int i = 0; i < numOfData; i++) {
             auto input = INPUT_VARIABLE(i);
             REQUIRE_TRUE(input->isZ(), 0, "dynamic_stitch: Indices should be integer, but %d type given.", (int)input->dataType() );
-            // FIXME: we have reduce::Max, cinsider using it instead
             auto maxV = input->reduceNumber(reduce::Max);
             if (maxV.e<Nd4jLong>(0) > maxValue) maxValue = maxV.e<Nd4jLong>(0);
         }
-
-        int outRank = shape::rank(restShape) - shape::rank(firstShape) + 1;
+        // calculate output rank - difference between indices shape and data shape
+        int outRank = shape::rank(restShape) - shape::rank(firstShape) + 1; // at least 1D tensor
         std::vector<Nd4jLong> outShape(outRank);
-
+        // fill up output shape template: the first to max index, and rests - to vals from the first data input
         outShape[0] = maxValue + 1;
         for(int i = 1; i < outRank; ++i)
             outShape[i] = shape::sizeAt(restShape, i);
diff --git a/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h b/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h
index 89e4c385a..d3a4c042d 100644
--- a/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h
+++ b/libnd4j/include/ops/declarable/headers/BarnesHutTsne.h
@@ -33,12 +33,13 @@ namespace nd4j {
          * 0: 1D row-vector (or with shape (1, m))
          * 1: 1D integer vector with slice nums
          * 2: 1D float-point values vector with same shape as above
+         * 3: 2D float-point matrix with data to search
          * 
          * Int args:
          * 0: N - number of slices
          * 
          * Output:
-         * 0: 1D vector with edge forces for input and values
+         * 0: 2D matrix with the same shape and type as the 3th argument
          */
         #if NOT_EXCLUDED(OP_barnes_edge_forces)
         DECLARE_CUSTOM_OP(barnes_edge_forces, 4, 1, false, 0, 1);
@@ -52,9 +53,11 @@ namespace nd4j {
          * 0: 1D int row-vector
          * 1: 1D int col-vector
          * 2: 1D float vector with values
-         * 
+         *
          * Output:
-         * 0: symmetric 2D matrix with given values on given places
+         * 0: 1D int result row-vector
+         * 1: 1D int result col-vector
+         * 2: a float-point tensor with shape 1xN, with values from the last input vector
          */
         #if NOT_EXCLUDED(OP_barnes_symmetrized)
         DECLARE_CUSTOM_OP(barnes_symmetrized, 3, 3, false, 0, -1);
diff --git a/libnd4j/include/ops/declarable/headers/list.h b/libnd4j/include/ops/declarable/headers/list.h
index 01c2d225c..756895a1f 100644
--- a/libnd4j/include/ops/declarable/headers/list.h
+++ b/libnd4j/include/ops/declarable/headers/list.h
@@ -120,7 +120,7 @@ namespace nd4j {
         #endif
 
         /**
-         * This operation unstacks given NDArray into NDArrayList
+         * This operation unstacks given NDArray into NDArrayList by the first dimension
          */
         #if NOT_EXCLUDED(OP_unstack_list)
         DECLARE_LIST_OP(unstack_list, 1, 1, 0, 0);
diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h
index c86f28499..bb7f306bd 100644
--- a/libnd4j/include/ops/declarable/headers/parity_ops.h
+++ b/libnd4j/include/ops/declarable/headers/parity_ops.h
@@ -594,21 +594,46 @@ namespace nd4j {
 
 
         /**
+         * This operation rearranges data from depth into blocks of spatial data. This is the reverse transformation
+         * of space_to_depth op. This op output is a copy of the input tensor where values from the depth dimension
+         * are moved in spatial blocks to the height and width dimensions. Int attr 0 indicates the input
+         * block size and how the data is moved.
+         * Input:
+         *     0 - 4D tensor on given type
+         * Output:
+         *     0 - 4D tensor of given type and proper shape
          *
-         *
-         *
+         * Int arguments:
+         *     0 - block size
+         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
+         *                             1 ("NCHW"): shape{ batch, channels, height, width }
+         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
+         *                             optional (default 0)
          */
         #if NOT_EXCLUDED(OP_depth_to_space)
-        DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, 2);
+        DECLARE_CUSTOM_OP(depth_to_space, 1, 1, false, 0, -1);
         #endif
 
         /**
+         * This operation rearranges blocks of spatial data, into depth.This op output is a copy of the input tensor
+         * where values from the height and width dimensions are moved to the depth dimension. Int attr 0 indicates
+         * the input block size.
          *
+         * Input:
+         *     - 4D tensor of given type
+         * Output:
+         *     - 4D tensor
          *
+         * Int arguments:
+         *     0 - block size
+         *     1 - output data format: 0 ("NHWC"): shape{ batch, height, width, channels }
+         *                             1 ("NCHW"): shape{ batch, channels, height, width }
+         *                             2 ("NCHW_VECT_C"): int8 shape{ batch, channels / 4, height, width, 4 }
+         *                             optional (default 0)
          *
          */
         #if NOT_EXCLUDED(OP_space_to_depth)
-        DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, 2);
+        DECLARE_CUSTOM_OP(space_to_depth, 1, 1, false, 0, -1);
         #endif
 
         /**
@@ -622,13 +647,42 @@ namespace nd4j {
         #endif
 
         /**
+         * Zero-pads and then rearranges (permutes) blocks of spatial data into batch. More specifically, this op
+         * outputs a copy of the input tensor where values from the height and width dimensions are moved to the
+         * batch dimension. After the zero-padding, both height and width of the input must be divisible by the block
+         * size.
          *
+         * Inputs:
+         *  0 - input tensor
+         *  1 - 2D paddings tensor (shape {M, 2})
+         *
+         *  Output:
+         *    - result tensor
+         *
+         *  Int args:
+         *      0 - block size (M)
          *
          */
         #if NOT_EXCLUDED(OP_space_to_batch)
         DECLARE_CUSTOM_OP(space_to_batch, 2, 1, false, 0, 1);
         #endif
 
+        /*
+         * This operation divides "spatial" dimensions [1, ..., M] of the input into a grid of blocks of shape
+         * block_shape, and interleaves these blocks with the "batch" dimension (0) such that in the output,
+         * the spatial dimensions [1, ..., M] correspond to the position within the grid, and the batch dimension
+         * combines both the position within a spatial block and the original batch position. Prior to division into
+         * blocks, the spatial dimensions of the input are optionally zero padded according to paddings.
+         *
+         * Inputs:
+         *      0 - input (N-D tensor)
+         *      1 - block_shape - int 1D tensor with M length
+         *      2 - paddings - int 2D tensor with shape {M, 2}
+         *
+         * Output:
+         *      - N-D tensor with the same type as input 0.
+         *
+         * */
         #if NOT_EXCLUDED(OP_space_to_batch_nd)
         DECLARE_CUSTOM_OP(space_to_batch_nd, 3, 1, false, 0, 0);
         #endif
@@ -973,7 +1027,7 @@ namespace nd4j {
          * return value:
          *    tensor with min values according to indices sets.
          */
-        #if NOT_EXCLUDED(OP_segment_min_bp)
+        #if NOT_EXCLUDED(OP_segment_min)
         DECLARE_CUSTOM_OP(segment_min, 2, 1, false, 0, 0);
         #endif
         #if NOT_EXCLUDED(OP_segment_min_bp)
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
index 7d520478e..75b541b72 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/dynamic.cu
@@ -118,19 +118,19 @@ namespace nd4j {
 
                 PointersManager pm(context, "dynamicPartition");
 
-                if (sourceDimsLen) {
+                if (sourceDimsLen) { // non-linear case
                     std::vector<int> sourceDims(sourceDimsLen);
 
                     for (int i = sourceDimsLen; i > 0; i--)
                         sourceDims[sourceDimsLen - i] = input->rankOf() - i;
-
+                    //compute tad array for given dimensions
                     auto packX = ConstantTadHelper::getInstance()->tadForDimensions(input->getShapeInfo(), sourceDims);
 
                     std::vector<void *> outBuffers(outSize);
                     std::vector<Nd4jLong *> tadShapes(outSize);
                     std::vector<Nd4jLong *> tadOffsets(outSize);
                     std::vector<Nd4jLong> numTads(outSize);
-
+                    // fill up dimensions array for before kernel
                     for (unsigned int i = 0; i < outSize; i++) {
                         outputs[i].first = outputList[i];
                         std::vector<int> outDims(outputs[i].first->rankOf() - 1);
@@ -151,10 +151,10 @@ namespace nd4j {
                     auto dOutBuffers = reinterpret_cast<void **>(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *)));
                     auto dOutTadShapes = reinterpret_cast<Nd4jLong **>(pm.replicatePointer(tadShapes.data(), tadShapes.size() * sizeof(Nd4jLong *)));
                     auto dOutTadOffsets = reinterpret_cast<Nd4jLong **>(pm.replicatePointer(tadOffsets.data(), tadOffsets.size() * sizeof(Nd4jLong *)));
-
+                    // run kernel on device
                     dynamicPartitionTadKernel<X,Y><<<256, 256, 1024, *context->getCudaStream()>>>(input->getSpecialBuffer(), packX.platformShapeInfo(), packX.platformOffsets(), shape::length(packX.primaryShapeInfo()), indices->getSpecialBuffer(), indices->getSpecialShapeInfo(), indices->lengthOf(), dOutBuffers, dOutTadShapes, dOutTadOffsets, outSize);
 
-                } else {
+                } else { // linear case
                     auto numThreads = 256;
                     auto shmemSize = numThreads * sizeof(Y) * 2 + 1024;
 
@@ -169,7 +169,6 @@ namespace nd4j {
                     auto dOutBuffers = reinterpret_cast<void **>(pm.replicatePointer(outBuffers.data(), outBuffers.size() * sizeof(void *)));
                     auto dOutShapes = reinterpret_cast<Nd4jLong **>(pm.replicatePointer(outShapes.data(), outShapes.size() * sizeof(Nd4jLong *)));
 
-
                     dynamicPartitionScalarKernel<X,Y><<<256, numThreads, shmemSize, *context->getCudaStream()>>>(input->getSpecialBuffer(), input->getSpecialShapeInfo(), indices->getSpecialBuffer(), indices-> getSpecialShapeInfo(), dOutBuffers, dOutShapes, outSize);
                 }
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
index 87ac417be..2ef9e2309 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests13.cpp
@@ -544,8 +544,8 @@ TEST_F(DeclarableOpsTests13, adjustSaturation_1) {
 ////////////////////////////////////////////////////////////////////
 TEST_F(DeclarableOpsTests13, adjustSaturation_2) {
 
-    NDArray input('c', {2,2,3}, {0,100,56,    17,220,5,          150,97,230,        255,2,13}, nd4j::DataType::FLOAT32);
-    NDArray exp  ('c', {2,2,3}, {0.,100.,56., 12.279087,220.,0., 91.654228,0.,230., 255.,0.,11.087015}, nd4j::DataType::FLOAT32);
+    NDArray input('c', {2,2,3}, {0,100,56,    17,220,5,          150,97,230,        255,2,13}, nd4j::DataType::DOUBLE);
+    NDArray exp  ('c', {2,2,3}, {0.,100.,56., 12.279087,220.,0., 91.654228,0.,230., 255.,0.,11.087015}, nd4j::DataType::DOUBLE);
 
     nd4j::ops::adjust_saturation op;
     auto results = op.execute({&input}, {10}, {2});
@@ -553,7 +553,8 @@ TEST_F(DeclarableOpsTests13, adjustSaturation_2) {
     ASSERT_EQ(ND4J_STATUS_OK, results->status());
 
     auto result = results->at(0);
-    // result->printIndexedBuffer();
+//    result->printIndexedBuffer("Result2");
+//    exp.printIndexedBuffer("Expect2");
 
     ASSERT_TRUE(exp.isSameShape(result));
     ASSERT_TRUE(exp.equalsTo(result));