diff --git a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java index 9d9db6261..c64a06040 100644 --- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java +++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/computationgraph/TestGraphLocalExecution.java @@ -305,7 +305,7 @@ public class TestGraphLocalExecution { @Test public void testLocalExecutionEarlyStopping() throws Exception { EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder() - .epochTerminationConditions(new MaxEpochsTerminationCondition(6)) + .epochTerminationConditions(new MaxEpochsTerminationCondition(4)) .scoreCalculator(new ScoreProvider()) .modelSaver(new InMemoryModelSaver()).build(); Map commands = new HashMap<>(); @@ -348,7 +348,7 @@ public class TestGraphLocalExecution { .dataProvider(dataProvider) .scoreFunction(ScoreFunctions.testSetF1()) .modelSaver(new FileModelSaver(modelSavePath)) - .terminationConditions(new MaxTimeCondition(30, TimeUnit.SECONDS), + .terminationConditions(new MaxTimeCondition(45, TimeUnit.SECONDS), new MaxCandidatesCondition(10)) .build(); diff --git a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java index 1e652cdbe..4416dd8cf 100644 --- a/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java +++ b/arbiter/arbiter-deeplearning4j/src/test/java/org/deeplearning4j/arbiter/util/TestDataFactoryProviderMnist.java @@ -32,7 +32,7 @@ public class TestDataFactoryProviderMnist implements DataSetIteratorFactory { private int terminationIter; public TestDataFactoryProviderMnist(){ - this(16, 10); + this(16, 4); } @Override diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java new file mode 100644 index 000000000..5c456e206 --- /dev/null +++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/recurrent/TestTimeDistributed.java @@ -0,0 +1,88 @@ +package org.deeplearning4j.nn.layers.recurrent; + +import org.deeplearning4j.BaseDL4JTest; +import org.deeplearning4j.TestUtils; +import org.deeplearning4j.nn.conf.MultiLayerConfiguration; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.WorkspaceMode; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.DenseLayer; +import org.deeplearning4j.nn.conf.layers.LSTM; +import org.deeplearning4j.nn.conf.layers.RnnOutputLayer; +import org.deeplearning4j.nn.conf.layers.recurrent.TimeDistributed; +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork; +import org.junit.Test; +import org.nd4j.linalg.activations.Activation; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.dataset.DataSet; +import org.nd4j.linalg.factory.Nd4j; +import org.nd4j.linalg.learning.config.Adam; +import org.nd4j.linalg.lossfunctions.LossFunctions; + +import static org.junit.Assert.assertEquals; + +public class TestTimeDistributed extends BaseDL4JTest { + + @Test + public void testTimeDistributed(){ + for(WorkspaceMode wsm : new WorkspaceMode[]{WorkspaceMode.ENABLED, WorkspaceMode.NONE}) { + + MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder() + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .seed(12345) + .updater(new Adam(0.1)) + .list() + .layer(new LSTM.Builder().nIn(3).nOut(3).build()) + .layer(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build()) + .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .setInputType(InputType.recurrent(3)) + .build(); + + MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() + .trainingWorkspaceMode(wsm) + .inferenceWorkspaceMode(wsm) + .seed(12345) + .updater(new Adam(0.1)) + .list() + .layer(new LSTM.Builder().nIn(3).nOut(3).build()) + .layer(new TimeDistributed(new DenseLayer.Builder().nIn(3).nOut(3).activation(Activation.TANH).build(), 2)) + .layer(new RnnOutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX) + .lossFunction(LossFunctions.LossFunction.MCXENT).build()) + .setInputType(InputType.recurrent(3)) + .build(); + + MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); + MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); + net1.init(); + net2.init(); + + for( int mb : new int[]{1, 5}) { + for(char inLabelOrder : new char[]{'c', 'f'}) { + INDArray in = Nd4j.rand(DataType.FLOAT, mb, 3, 5).dup(inLabelOrder); + + INDArray out1 = net1.output(in); + INDArray out2 = net2.output(in); + + assertEquals(out1, out2); + + INDArray labels = TestUtils.randomOneHotTimeSeries(mb, 3, 5).dup(inLabelOrder); + + DataSet ds = new DataSet(in, labels); + net1.fit(ds); + net2.fit(ds); + + assertEquals(net1.params(), net2.params()); + + MultiLayerNetwork net3 = TestUtils.testModelSerialization(net2); + out2 = net2.output(in); + INDArray out3 = net3.output(in); + + assertEquals(out2, out3); + } + } + } + } +} diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java new file mode 100644 index 000000000..bd9685ef9 --- /dev/null +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/recurrent/TimeDistributed.java @@ -0,0 +1,81 @@ +package org.deeplearning4j.nn.conf.layers.recurrent; + +import lombok.Data; +import lombok.EqualsAndHashCode; +import lombok.NonNull; +import org.deeplearning4j.nn.conf.InputPreProcessor; +import org.deeplearning4j.nn.conf.NeuralNetConfiguration; +import org.deeplearning4j.nn.conf.inputs.InputType; +import org.deeplearning4j.nn.conf.layers.Layer; +import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.layers.recurrent.TimeDistributedLayer; +import org.deeplearning4j.optimize.api.TrainingListener; +import org.nd4j.linalg.api.buffer.DataType; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.shade.jackson.annotation.JsonProperty; + +import java.util.Collection; + +/** + * TimeDistributed wrapper layer.
+ * Note: only the "Feed forward layer time distributed in an RNN" is currently supported. + * For example, a time distributed dense layer.
+ * Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}
+ * Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength] + * + * @author Alex Black + */ +@Data +@EqualsAndHashCode(callSuper = true) +public class TimeDistributed extends BaseWrapperLayer { + + private final int timeAxis; + + /** + * @param underlying Underlying (internal) layer - should be a feed forward type such as DenseLayer + * @param timeAxis Time axis, should be 2 for DL4J RNN activations (shape [minibatch, size, sequenceLength]) + */ + public TimeDistributed(@JsonProperty("underlying") @NonNull Layer underlying, @JsonProperty("timeAxis") int timeAxis) { + super(underlying); + this.timeAxis = timeAxis; + } + + + @Override + public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection trainingListeners, + int layerIndex, INDArray layerParamsView, boolean initializeParams, DataType networkDataType) { + NeuralNetConfiguration conf2 = conf.clone(); + conf2.setLayer(((TimeDistributed) conf2.getLayer()).getUnderlying()); + return new TimeDistributedLayer(underlying.instantiate(conf2, trainingListeners, layerIndex, layerParamsView, + initializeParams, networkDataType), timeAxis); + } + + @Override + public InputType getOutputType(int layerIndex, InputType inputType) { + if (inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer (layer #" + layerIndex + ")"); + } + + InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; + InputType ff = InputType.feedForward(rnn.getSize()); + InputType ffOut = underlying.getOutputType(layerIndex, ff); + return InputType.recurrent(ffOut.arrayElementsPerExample(), rnn.getTimeSeriesLength()); + } + + @Override + public void setNIn(InputType inputType, boolean override) { + if (inputType.getType() != InputType.Type.RNN) { + throw new IllegalStateException("Only RNN input type is supported as input to TimeDistributed layer"); + } + + InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; + InputType ff = InputType.feedForward(rnn.getSize()); + underlying.setNIn(ff, override); + } + + @Override + public InputPreProcessor getPreProcessorForInputType(InputType inputType) { + //No preprocessor - the wrapper layer operates as the preprocessor + return null; + } +} diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java new file mode 100644 index 000000000..874fb136f --- /dev/null +++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/TimeDistributedLayer.java @@ -0,0 +1,110 @@ +package org.deeplearning4j.nn.layers.recurrent; + +import org.deeplearning4j.nn.api.Layer; +import org.deeplearning4j.nn.gradient.Gradient; +import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer; +import org.deeplearning4j.nn.workspace.ArrayType; +import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr; +import org.nd4j.linalg.api.ndarray.INDArray; +import org.nd4j.linalg.primitives.Pair; +import org.nd4j.linalg.util.ArrayUtil; + +/** + * TimeDistributed wrapper layer.
+ * Note: only the "Feed forward layer time distributed in an RNN" is currently supported. + * For example, a time distributed dense layer.
+ * Usage: {@code .layer(new TimeDistributed(new DenseLayer.Builder()....build(), timeAxis))}
+ * Note that for DL4J RNNs, time axis is always 2 - i.e., RNN activations have shape [minibatch, size, sequenceLength] + * + * @author Alex Black + */ +public class TimeDistributedLayer extends BaseWrapperLayer { + + private final int timeAxis; + + public TimeDistributedLayer(Layer underlying, int timeAxis) { + super(underlying); + this.timeAxis = timeAxis; + } + + + @Override + public Pair backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { + INDArray reshapedEps = reshape(epsilon); + Pair p = underlying.backpropGradient(reshapedEps, workspaceMgr); + INDArray reverted = revertReshape(p.getSecond(), epsilon.size(0)); + reverted = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, reverted); + p.setSecond(reverted); + return p; + } + + @Override + public INDArray activate(boolean training, LayerWorkspaceMgr workspaceMgr) { + return activate(input(), training, workspaceMgr); + } + + @Override + public INDArray activate(INDArray input, boolean training, LayerWorkspaceMgr workspaceMgr) { + INDArray reshaped = reshape(input); + INDArray out = underlying.activate(reshaped, training, workspaceMgr); + INDArray ret = revertReshape(out, input.size(0)); + return workspaceMgr.dup(ArrayType.ACTIVATIONS, ret); + } + + protected INDArray reshape(INDArray array){ + //Reshape the time axis to the minibatch axis + //For example, for RNN -> FF (dense time distributed): [mb, size, seqLen] -> [mb x seqLen, size] + int axis = timeAxis; + if(axis < 0) + axis += array.rank(); + + int[] permuteAxis = permuteAxes(array.rank(), axis); + INDArray permute = array.permute(permuteAxis); + + long[] newShape = new long[array.rank()-1]; + newShape[0] = array.size(0) * array.size(axis); + int j=1; + for( int i=1; ideeplearning4j-ui ${deeplearning4j.version} test + + + net.jpountz.lz4 + lz4 + + diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml index 8a621b40b..5e5ae75c1 100644 --- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml +++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-vertx/pom.xml @@ -434,4 +434,13 @@ + + + + test-nd4j-native + + + test-nd4j-cuda-10.1 + + \ No newline at end of file