diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
index 632a85e22..91264f51f 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/LossFunctionGradientCheck.java
@@ -19,6 +19,8 @@ package org.deeplearning4j.gradientcheck;
import lombok.extern.slf4j.Slf4j;
import org.deeplearning4j.BaseDL4JTest;
import org.deeplearning4j.TestUtils;
+import org.deeplearning4j.gradientcheck.sdlosscustom.SDLossMAE;
+import org.deeplearning4j.gradientcheck.sdlosscustom.SDLossMSE;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
@@ -83,7 +85,8 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(),
new LossMultiLabel(), new LossWasserstein(),
- new LossSparseMCXENT()
+ new LossSparseMCXENT(),
+ new SDLossMAE(), new SDLossMSE()
};
Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent
@@ -119,6 +122,12 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
Activation.TANH, // MultiLabel, doesn't require any special activation, but tanh was used in paper
Activation.IDENTITY, // Wasserstein
Activation.SOFTMAX, //sparse MCXENT
+ Activation.SOFTMAX, // SDLossMAE
+ Activation.SIGMOID, // SDLossMAE
+ Activation.TANH, // SDLossMAE
+ Activation.SOFTMAX, // SDLossMSE
+ Activation.SIGMOID, // SDLossMSE
+ Activation.TANH //SDLossMSE
};
int[] nOut = new int[] {1, //xent
@@ -154,6 +163,12 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
10, // MultiLabel
2, // Wasserstein
4, //sparse MCXENT
+ 3, // SDLossMAE
+ 3, // SDLossMAE
+ 3, // SDLossMAE
+ 3, // SDLossMSE
+ 3, // SDLossMSE
+ 3, // SDLossMSE
};
int[] minibatchSizes = new int[] {1, 3};
@@ -520,6 +535,8 @@ public class LossFunctionGradientCheck extends BaseDL4JTest {
break;
case "LossMAE":
case "LossMSE":
+ case "SDLossMAE":
+ case "SDLossMSE":
case "LossL1":
case "LossL2":
ret[1] = Nd4j.rand(labelsShape).muli(2).subi(1);
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMAE.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMAE.java
new file mode 100644
index 000000000..dbef14bf2
--- /dev/null
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMAE.java
@@ -0,0 +1,30 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.gradientcheck.sdlosscustom;
+
+import lombok.EqualsAndHashCode;
+import org.nd4j.autodiff.samediff.SDVariable;
+import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.linalg.lossfunctions.SameDiffLoss;
+
+@EqualsAndHashCode(callSuper = false)
+public class SDLossMAE extends SameDiffLoss {
+
+ @Override
+ public SDVariable defineLoss(SameDiff sd, SDVariable layerInput, SDVariable labels) {
+ return sd.math.abs(labels.sub(layerInput)).mean(1);
+ }
+}
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMSE.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMSE.java
new file mode 100644
index 000000000..6edce7a49
--- /dev/null
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/gradientcheck/sdlosscustom/SDLossMSE.java
@@ -0,0 +1,30 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.deeplearning4j.gradientcheck.sdlosscustom;
+
+import lombok.EqualsAndHashCode;
+import org.nd4j.autodiff.samediff.SDVariable;
+import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.linalg.lossfunctions.*;
+
+@EqualsAndHashCode(callSuper = false)
+public class SDLossMSE extends SameDiffLoss {
+
+ @Override
+ public SDVariable defineLoss(SameDiff sd, SDVariable layerInput, SDVariable labels) {
+ return labels.squaredDifference(layerInput).mean(1);
+ }
+}
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java
index 22ac01c14..e43e4ca74 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b6.java
@@ -111,7 +111,7 @@ public class RegressionTest100b6 extends BaseDL4JTest {
assertEquals(dtype, net.getLayerWiseConfigurations().getDataType());
assertEquals(dtype, net.params().dataType());
boolean eq = outExp.equalsWithEps(outAct, 0.01);
- assertTrue(outExp + " vs " + outAct, eq);
+ assertTrue("Test for dtype: " + dtypeName + " - " + outExp + " vs " + outAct, eq);
}
}
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java
index 30f3d0bf5..5c45ecf50 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/BaseOp.java
@@ -28,6 +28,7 @@ import org.nd4j.base.Preconditions;
import org.nd4j.imports.NoOpNameFoundException;
import org.nd4j.linalg.api.buffer.DataBuffer;
import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.memory.MemoryWorkspace;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.shape.Shape;
import org.nd4j.linalg.exception.ND4JIllegalStateException;
@@ -348,7 +349,9 @@ public abstract class BaseOp extends DifferentialFunction implements Op {
if (dimensions == null || dimensions.length == 0)
dimensions = new int[]{Integer.MAX_VALUE};
- this.dimensionz = Shape.ndArrayDimFromInt(dimensions);
+ try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+ this.dimensionz = Shape.ndArrayDimFromInt(dimensions);
+ }
}
public INDArray dimensions() {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java
new file mode 100644
index 000000000..2a3a05663
--- /dev/null
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/lossfunctions/SameDiffLoss.java
@@ -0,0 +1,186 @@
+/*******************************************************************************
+ * Copyright (c) 2020 Konduit K.K.
+ *
+ * This program and the accompanying materials are made available under the
+ * terms of the Apache License, Version 2.0 which is available at
+ * https://www.apache.org/licenses/LICENSE-2.0.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ ******************************************************************************/
+package org.nd4j.linalg.lossfunctions;
+
+import org.nd4j.autodiff.samediff.SDVariable;
+import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.base.Preconditions;
+import org.nd4j.linalg.activations.IActivation;
+import org.nd4j.linalg.api.buffer.DataType;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.primitives.Pair;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * SameDiff loss function.
+ *
+ * This class can be extended to create Deeplearning4j loss functions by defining one single method only:
+ * {@link #defineLoss(SameDiff, SDVariable, SDVariable)}. This method is used to define the loss function on a
+ * per example basis - i.e., the output should be an array with shape [minibatch].
+ *
+ * For example, the mean squared error (MSE) loss function can be defined using:
+ * {@code return labels.squaredDifference(layerInput).mean(1);}
+ *
+ */
+public abstract class SameDiffLoss implements ILossFunction {
+ protected transient SameDiff sd;
+ protected transient SDVariable scoreVariable;
+
+ protected SameDiffLoss() {
+
+ }
+
+ /**
+ * Define the loss function.
+ * NOTE: The score on a *per example* basis - should return a SDVariable with shape [minibatch], where out[i]
+ * is the score for the ith minibatch
+ *
+ * @param sd SameDiff instance to define the loss on
+ * @param layerInput Input to the SameDiff loss function
+ * @param labels Labels placeholder
+ * @return The score on a per example basis (SDVariable with shape [minibatch])
+ */
+ public abstract SDVariable defineLoss(SameDiff sd, SDVariable layerInput, SDVariable labels);
+
+ protected void createSameDiffInstance(DataType dataType){
+ sd = SameDiff.create();
+ SDVariable layerInput = sd.placeHolder("layerInput", dataType, -1);
+ SDVariable labels = sd.placeHolder("labels", dataType, -1);
+ scoreVariable = this.defineLoss(sd, layerInput, labels);
+ sd.createGradFunction("layerInput");
+ }
+
+ /**
+ * Compute the score (loss function value) for the given inputs.
+ *
+ * @param labels Label/expected preOutput
+ * @param preOutput Output of the model (neural network)
+ * @param activationFn Activation function that should be applied to preOutput
+ * @param mask Mask array; may be null
+ * @param average Whether the score should be averaged (divided by number of rows in labels/preOutput) or not @return Loss function value
+ */
+ @Override
+ public double computeScore(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask, boolean average) {
+ if(sd == null){
+ createSameDiffInstance(preOutput.dataType());
+ }
+
+ INDArray scoreArr = computeScoreArray(labels, preOutput, activationFn, mask);
+
+ double score = scoreArr.sumNumber().doubleValue();
+ if (average) {
+ score /= scoreArr.size(0);
+ }
+ return score;
+ }
+
+
+ /**
+ * Compute the score (loss function value) for each example individually.
+ * For input [numExamples,nOut] returns scores as a column vector: [numExamples,1]
+ *
+ * @param labels Labels/expected output
+ * @param preOutput Output of the model (neural network)
+ * @param activationFn Activation function that should be applied to preOutput
+ * @param mask @return Loss function value for each example; column vector
+ */
+ @Override
+ public INDArray computeScoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
+ if(sd == null){
+ createSameDiffInstance(preOutput.dataType());
+ }
+
+ Preconditions.checkArgument((labels.size(1) == preOutput.size(1)), "Labels array numColumns (size(1) = %s) does not match output layer number of outputs (nOut = %s)", labels.size(1), preOutput.size(1));
+
+ INDArray output = activationFn.getActivation(preOutput.dup(), true);
+
+ Map m = new HashMap<>();
+ m.put("labels", labels);
+ m.put("layerInput", output);
+
+ INDArray scoreArr = sd.outputSingle(m,scoreVariable.name());
+
+ if (mask != null) {
+ LossUtil.applyMask(scoreArr, mask);
+ }
+ return scoreArr;
+ }
+
+
+ /**
+ * Compute the gradient of the loss function with respect to the inputs: dL/dOutput
+ *
+ * @param labels Label/expected output
+ * @param preOutput Output of the model (neural network), before the activation function is applied
+ * @param activationFn Activation function that should be applied to preOutput
+ * @param mask Mask array; may be null
+ * @return Gradient dL/dPreOut
+ */
+ @Override
+ public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
+ if(sd == null){
+ createSameDiffInstance(preOutput.dataType());
+ }
+
+
+ Map m = new HashMap<>();
+ INDArray output = activationFn.getActivation(preOutput.dup(), true);
+ m.put("labels", labels);
+ m.put("layerInput", output);
+
+ Map grads = sd.calculateGradients(m, "layerInput");
+
+ INDArray gradAtActivationOutput = grads.get("layerInput");
+ INDArray gradAtInput = activationFn.backprop(preOutput.dup(), gradAtActivationOutput).getFirst();
+
+ if (mask != null) {
+ LossUtil.applyMask(gradAtInput, mask);
+ }
+ return gradAtInput;
+ }
+
+ /**
+ * Compute both the score (loss function value) and gradient. This is equivalent to calling {@link #computeScore(INDArray, INDArray, IActivation, INDArray, boolean)}
+ * and {@link #computeGradient(INDArray, INDArray, IActivation, INDArray)} individually
+ *
+ * @param labels Label/expected output
+ * @param preOutput Output of the model (neural network)
+ * @param activationFn Activation function that should be applied to preOutput
+ * @param mask Mask array; may be null
+ * @param average Whether the score should be averaged (divided by number of rows in labels/output) or not
+ * @return The score (loss function value) and gradient
+ */
+ @Override
+ public Pair computeGradientAndScore(INDArray labels, INDArray preOutput, IActivation activationFn,
+ INDArray mask, boolean average) {
+
+ Pair GradientAndScore = new Pair<>();
+ GradientAndScore.setFirst(this.computeScore(labels, preOutput, activationFn, mask, average));
+ GradientAndScore.setSecond(this.computeGradient(labels, preOutput, activationFn, mask));
+
+ return GradientAndScore;
+ }
+
+ @Override
+ public String name() {
+ return getClass().getSimpleName();
+ }
+}
+
+
+
+
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java
index e9b021955..0c43ff9ca 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/workspace/BasicWorkspaceTests.java
@@ -683,7 +683,7 @@ public class BasicWorkspaceTests extends BaseNd4jTest {
workspace.initializeWorkspace();
- long reqMemory = 12 * Nd4j.sizeOfDataType(arrayCold.dataType());
+ long reqMemory = 11 * Nd4j.sizeOfDataType(arrayCold.dataType());
assertEquals(reqMemory + reqMemory % 8, workspace.getCurrentSize());