diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4e75d7bfe..0a25d9775 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -9,7 +9,7 @@ Deeplearning4j's [open issues are here](https://github.com/eclipse/deeplearning4
 
 Note that you will need to [build dl4j from source](https://deeplearning4j.org/docs/latest/deeplearning4j-build-from-source)
 
-For some tips on contributing to open source, this [post is helpful](http://blog.smartbear.com/programming/14-ways-to-contribute-to-open-source-without-being-a-programming-genius-or-a-rock-star/).
+For some tips on contributing to open source, this [post is helpful](https://smartbear.com/blog/test-and-monitor/14-ways-to-contribute-to-open-source-without-being/).
 
 ## Contributions
 
diff --git a/arbiter/arbiter-core/src/assembly/bin.xml b/arbiter/arbiter-core/src/assembly/bin.xml
index cc6920b24..c99d6b144 100644
--- a/arbiter/arbiter-core/src/assembly/bin.xml
+++ b/arbiter/arbiter-core/src/assembly/bin.xml
@@ -61,7 +61,7 @@
 			<outputDirectory>examples</outputDirectory>
 			<!--
 				<lineEnding>unix</lineEnding>
-				http://stackoverflow.com/questions/2958282/stranges-files-in-my-assembly-since-switching-to-lineendingunix-lineending
+				https://stackoverflow.com/questions/2958282/stranges-files-in-my-assembly-since-switching-to-lineendingunix-lineending
 			-->
 		</fileSet>
 
diff --git a/datavec/datavec-api/pom.xml b/datavec/datavec-api/pom.xml
index 022f2e38b..b3401b431 100644
--- a/datavec/datavec-api/pom.xml
+++ b/datavec/datavec-api/pom.xml
@@ -52,11 +52,6 @@
             <artifactId>joda-time</artifactId>
             <version>${jodatime.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.yaml</groupId>
-            <artifactId>snakeyaml</artifactId>
-            <version>${snakeyaml.version}</version>
-        </dependency>
         <!-- ND4J Shaded Jackson Dependencies -->
         <dependency>
             <groupId>org.nd4j</groupId>
diff --git a/datavec/datavec-arrow/pom.xml b/datavec/datavec-arrow/pom.xml
index 645971a45..6134bbf27 100644
--- a/datavec/datavec-arrow/pom.xml
+++ b/datavec/datavec-arrow/pom.xml
@@ -29,21 +29,11 @@
     <name>datavec-arrow</name>
 
     <dependencies>
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-arrow</artifactId>
-            <version>${project.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.datavec</groupId>
             <artifactId>datavec-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>com.carrotsearch</groupId>
-            <artifactId>hppc</artifactId>
-            <version>${hppc.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-vector</artifactId>
diff --git a/datavec/datavec-data/datavec-data-nlp/pom.xml b/datavec/datavec-data/datavec-data-nlp/pom.xml
index 17ad11211..12df0fb08 100644
--- a/datavec/datavec-data/datavec-data-nlp/pom.xml
+++ b/datavec/datavec-data/datavec-data-nlp/pom.xml
@@ -44,26 +44,6 @@
             <artifactId>datavec-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-            <version>${commons-logging.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-core</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-context</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-beans</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.cleartk</groupId>
             <artifactId>cleartk-snowball</artifactId>
diff --git a/datavec/datavec-geo/pom.xml b/datavec/datavec-geo/pom.xml
index 15c22ba3b..50e843555 100644
--- a/datavec/datavec-geo/pom.xml
+++ b/datavec/datavec-geo/pom.xml
@@ -31,36 +31,6 @@
             <artifactId>datavec-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.dataformat</groupId>
-            <artifactId>jackson-dataformat-yaml</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.dataformat</groupId>
-            <artifactId>jackson-dataformat-xml</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-joda</artifactId>
-            <version>${geo.jackson.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.maxmind.geoip2</groupId>
             <artifactId>geoip2</artifactId>
diff --git a/datavec/datavec-hadoop/pom.xml b/datavec/datavec-hadoop/pom.xml
index c95e6d3bc..5ec6d4c3f 100644
--- a/datavec/datavec-hadoop/pom.xml
+++ b/datavec/datavec-hadoop/pom.xml
@@ -35,41 +35,11 @@
             <version>${project.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>com.sun.xml.bind</groupId>
-            <artifactId>jaxb-core</artifactId>
-            <version>${jaxb.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.sun.xml.bind</groupId>
-            <artifactId>jaxb-impl</artifactId>
-            <version>${jaxb.version}</version>
-        </dependency>
         <dependency>
             <groupId>io.netty</groupId>
             <artifactId>netty</artifactId>
             <version>${netty.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-compress</artifactId>
-            <version>${commons-compress.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.zookeeper</groupId>
-            <artifactId>zookeeper</artifactId>
-            <version>${zookeeper.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>log4j</groupId>
-                    <artifactId>log4j</artifactId>
-                </exclusion>
-                <exclusion>
-                    <groupId>org.slf4j</groupId>
-                    <artifactId>slf4j-log4j12</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
         <dependency>
             <groupId>org.apache.hadoop</groupId>
             <artifactId>hadoop-common</artifactId>
diff --git a/datavec/datavec-local/pom.xml b/datavec/datavec-local/pom.xml
index f286eeb95..d2b15ffed 100644
--- a/datavec/datavec-local/pom.xml
+++ b/datavec/datavec-local/pom.xml
@@ -73,42 +73,7 @@
         </dependency>
 
 
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.dataformat</groupId>
-            <artifactId>jackson-dataformat-yaml</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.dataformat</groupId>
-            <artifactId>jackson-dataformat-xml</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-joda</artifactId>
-            <version>${geo.jackson.version}</version>
-            <scope>test</scope>
-        </dependency>
+
         <dependency>
             <groupId>org.datavec</groupId>
             <artifactId>datavec-python</artifactId>
diff --git a/datavec/datavec-perf/pom.xml b/datavec/datavec-perf/pom.xml
index fb4eaaa89..95f3135e5 100644
--- a/datavec/datavec-perf/pom.xml
+++ b/datavec/datavec-perf/pom.xml
@@ -41,11 +41,6 @@
             <artifactId>slf4j-api</artifactId>
             <version>${slf4j.version}</version>
         </dependency>
-        <dependency>
-            <groupId>com.github.oshi</groupId>
-            <artifactId>oshi-core</artifactId>
-            <version>${oshi.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.datavec</groupId>
             <artifactId>datavec-data-image</artifactId>
diff --git a/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/pom.xml b/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/pom.xml
index 076c22ab9..95f13081f 100644
--- a/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/pom.xml
+++ b/datavec/datavec-spark-inference-parent/datavec-spark-inference-client/pom.xml
@@ -41,26 +41,6 @@
             <version>1.0.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons-codec.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpclient</artifactId>
-            <version>${httpclient.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpcore</artifactId>
-            <version>${httpcore.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpmime</artifactId>
-            <version>${httpmime.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.mashape.unirest</groupId>
             <artifactId>unirest-java</artifactId>
diff --git a/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/pom.xml b/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/pom.xml
index 8bef216a7..77eff8758 100644
--- a/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/pom.xml
+++ b/datavec/datavec-spark-inference-parent/datavec-spark-inference-server/pom.xml
@@ -94,12 +94,6 @@
             <version>${scala.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>org.yaml</groupId>
-            <artifactId>snakeyaml</artifactId>
-            <version>${snakeyaml.version}</version>
-        </dependency>
-
         <dependency>
             <groupId>com.typesafe.play</groupId>
             <artifactId>play-java_2.11</artifactId>
diff --git a/datavec/datavec-spark/pom.xml b/datavec/datavec-spark/pom.xml
index f7143c6ea..72f0b105f 100644
--- a/datavec/datavec-spark/pom.xml
+++ b/datavec/datavec-spark/pom.xml
@@ -39,11 +39,6 @@
             <artifactId>scala-library</artifactId>
             <version>${scala.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-reflect</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
 
         <dependency>
             <groupId>org.apache.spark</groupId>
diff --git a/deeplearning4j/deeplearning4j-common/src/main/java/org/deeplearning4j/common/resources/DL4JResources.java b/deeplearning4j/deeplearning4j-common/src/main/java/org/deeplearning4j/common/resources/DL4JResources.java
index a28ad375d..fab713e8e 100644
--- a/deeplearning4j/deeplearning4j-common/src/main/java/org/deeplearning4j/common/resources/DL4JResources.java
+++ b/deeplearning4j/deeplearning4j-common/src/main/java/org/deeplearning4j/common/resources/DL4JResources.java
@@ -64,7 +64,7 @@ public class DL4JResources {
     /**
      * Set the base download URL for (most) DL4J datasets and models.<br>
      * This usually doesn't need to be set manually unless there is some issue with the default location
-     * @param baseDownloadURL Base download URL to set. For example, http://blob.deeplearning4j.org/
+     * @param baseDownloadURL Base download URL to set. For example, https://dl4jdata.blob.core.windows.net/
      */
     public static void setBaseDownloadURL(@NonNull String baseDownloadURL){
         baseURL = baseDownloadURL;
@@ -79,8 +79,8 @@ public class DL4JResources {
 
     /**
      * Get the URL relative to the base URL.<br>
-     * For example, if baseURL is "http://blob.deeplearning4j.org/", and relativeToBase is "/datasets/iris.dat"
-     * this simply returns "http://blob.deeplearning4j.org/datasets/iris.dat"
+     * For example, if baseURL is "https://dl4jdata.blob.core.windows.net/", and relativeToBase is "/datasets/iris.dat"
+     * this simply returns "https://dl4jdata.blob.core.windows.net/datasets/iris.dat"
      *
      * @param relativeToBase Relative URL
      * @return URL
@@ -92,8 +92,8 @@ public class DL4JResources {
 
     /**
      * Get the URL relative to the base URL as a String.<br>
-     * For example, if baseURL is "http://blob.deeplearning4j.org/", and relativeToBase is "/datasets/iris.dat"
-     * this simply returns "http://blob.deeplearning4j.org/datasets/iris.dat"
+     * For example, if baseURL is "https://dl4jdata.blob.core.windows.net/", and relativeToBase is "/datasets/iris.dat"
+     * this simply returns "https://dl4jdata.blob.core.windows.net/datasets/iris.dat"
      *
      * @param relativeToBase Relative URL
      * @return URL
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java
index 59ef8c28e..e3923c4ff 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/LayerHelperValidationUtil.java
@@ -35,6 +35,7 @@ import org.nd4j.linalg.indexing.conditions.Conditions;
 import org.nd4j.linalg.ops.transforms.Transforms;
 
 import java.lang.reflect.Field;
+import java.lang.reflect.Method;
 import java.util.*;
 
 import static org.junit.Assert.*;
@@ -63,6 +64,30 @@ public class LayerHelperValidationUtil {
         private DataSetIterator data;
     }
 
+    public static void disableCppHelpers(){
+        try {
+            Class<?> c = Class.forName("org.nd4j.nativeblas.Nd4jCpu$Environment");
+            Method m = c.getMethod("getInstance");
+            Object instance = m.invoke(null);
+            Method m2 = c.getMethod("allowHelpers", boolean.class);
+            m2.invoke(instance, false);
+        } catch (Throwable t){
+            throw new RuntimeException(t);
+        }
+    }
+
+    public static void enableCppHelpers(){
+        try{
+            Class<?> c = Class.forName("org.nd4j.nativeblas.Nd4jCpu$Environment");
+            Method m = c.getMethod("getInstance");
+            Object instance = m.invoke(null);
+            Method m2 = c.getMethod("allowHelpers", boolean.class);
+            m2.invoke(instance, true);
+        } catch (Throwable t){
+            throw new RuntimeException(t);
+        }
+    }
+
     public static void validateMLN(MultiLayerNetwork netOrig, TestCase t){
         assertNotNull(t.getAllowHelpersForClasses());
         assertFalse(t.getAllowHelpersForClasses().isEmpty());
@@ -95,7 +120,13 @@ public class LayerHelperValidationUtil {
             for (boolean train : new boolean[]{false, true}) {
                 assertEquals(net1NoHelper.params(), net2With.params());
                 String s = "Feed forward test - " + t.getTestName() + " - " + (train ? "Train: " : "Test: ");
-                List<INDArray> ff1 = net1NoHelper.feedForward(t.getFeatures(), train);
+                List<INDArray> ff1;
+                try {
+                    disableCppHelpers();
+                    ff1 = net1NoHelper.feedForward(t.getFeatures(), train);
+                } finally {
+                    enableCppHelpers();
+                }
                 List<INDArray> ff2 = net2With.feedForward(t.getFeatures(), train);
                 List<String> paramKeys = new ArrayList<>(net1NoHelper.paramTable().keySet());
                 Collections.sort(paramKeys);
@@ -131,7 +162,13 @@ public class LayerHelperValidationUtil {
                     log.info("Forward pass, max relative error: " + layerName + " - " + maxRE);
                 }
 
-                INDArray out1 = net1NoHelper.output(t.getFeatures(), train);
+                INDArray out1;
+                try {
+                    disableCppHelpers();
+                    out1 = net1NoHelper.output(t.getFeatures(), train);
+                } finally {
+                    enableCppHelpers();
+                }
                 INDArray out2 = net2With.output(t.getFeatures(), train);
                 INDArray relError = relError(out1, out2, t.getMinAbsError());
                 double maxRE = relError.maxNumber().doubleValue();
@@ -148,7 +185,13 @@ public class LayerHelperValidationUtil {
             Preconditions.checkNotNull(t.getLabels(), "Labels are not set (null)");
 
             log.info("Validation - checking scores");
-            double s1 = net1NoHelper.score(new DataSet(t.getFeatures(), t.getLabels()));
+            double s1;
+            try {
+                disableCppHelpers();
+                s1 = net1NoHelper.score(new DataSet(t.getFeatures(), t.getLabels()));
+            } finally {
+                enableCppHelpers();
+            }
             double s2 = net2With.score(new DataSet(t.getFeatures(), t.getLabels()));
 
             double re = relError(s1, s2);
@@ -168,7 +211,12 @@ public class LayerHelperValidationUtil {
             net2With.setInput(t.getFeatures());
             net2With.setLabels(t.getLabels());
 
-            net1NoHelper.computeGradientAndScore();
+            try {
+                disableCppHelpers();
+                net1NoHelper.computeGradientAndScore();
+            } finally {
+                enableCppHelpers();
+            }
             net2With.computeGradientAndScore();
 
             List<String> paramKeys = new ArrayList<>(net1NoHelper.paramTable().keySet());
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestBatchNormBp.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestBatchNormBp.java
deleted file mode 100644
index f34ce65f0..000000000
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/TestBatchNormBp.java
+++ /dev/null
@@ -1,107 +0,0 @@
-package org.deeplearning4j;
-
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
-import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
-import org.deeplearning4j.nn.conf.WorkspaceMode;
-import org.deeplearning4j.nn.conf.layers.BatchNormalization;
-import org.deeplearning4j.nn.gradient.Gradient;
-import org.deeplearning4j.nn.layers.mkldnn.MKLDNNBatchNormHelper;
-import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
-import org.junit.Test;
-import org.nd4j.linalg.api.buffer.DataType;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.DynamicCustomOp;
-import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.primitives.Pair;
-
-import java.lang.reflect.Field;
-
-import static junit.framework.TestCase.*;
-
-public class TestBatchNormBp {
-
-    @Test
-    public void test(){
-        Nd4j.getRandom().setSeed(12345);
-//        INDArray in = Nd4j.rand(DataType.FLOAT, 1, 3, 4, 4);
-        INDArray in = Nd4j.rand(DataType.FLOAT, 1, 3, 15, 15);
-        INDArray mean = in.mean(0, 2, 3);   //Nd4j.rand(DataType.FLOAT, 3);
-        INDArray var = in.var(0, 2, 3); //Nd4j.rand(DataType.FLOAT, 3);
-        INDArray eps = Nd4j.rand(DataType.FLOAT, in.shape());
-//        INDArray gamma = Nd4j.ones(DataType.FLOAT, 3);
-//        INDArray beta = Nd4j.zeros(DataType.FLOAT, 3);
-        INDArray gamma = Nd4j.rand(DataType.FLOAT, 3);
-        INDArray beta = Nd4j.rand(DataType.FLOAT, 3);
-        double e = 1e-5;
-
-        INDArray dLdIn = in.ulike();
-        INDArray dLdm = mean.ulike();
-        INDArray dLdv = var.ulike();
-        INDArray dLdg = gamma.ulike();
-        INDArray dLdb = beta.ulike();
-
-        DynamicCustomOp op = DynamicCustomOp.builder("batchnorm_bp")
-                .addInputs(in, mean, var, eps, gamma, beta)
-                .addIntegerArguments(
-                        1,          //Apply scale
-                        1,           //Apply beta
-                        1)           //Axis (NCHW)
-                .addFloatingPointArguments(e)
-                .addOutputs(dLdIn, dLdm, dLdv, dLdg, dLdb)
-                .build();
-
-        Nd4j.exec(op);
-        System.out.println(dLdIn);
-    }
-
-    @Test
-    public void compareImpls() throws Exception {
-
-        Nd4j.getRandom().setSeed(12345);
-        INDArray in = Nd4j.rand(DataType.FLOAT, 1, 3, 15, 15);
-        INDArray mean = in.mean(0, 2, 3).reshape(1,3);
-        INDArray var = in.var(0, 2, 3).reshape(1,3);
-        INDArray eps = Nd4j.rand(DataType.FLOAT, in.shape());
-        INDArray gamma = Nd4j.rand(DataType.FLOAT, 1,3);
-        INDArray beta = Nd4j.rand(DataType.FLOAT, 1,3);
-        double e = 1e-3;
-
-        INDArray dLdIn = in.ulike();
-        INDArray dLdm = mean.ulike();
-        INDArray dLdv = var.ulike();
-        INDArray dLdg = gamma.ulike();
-        INDArray dLdb = beta.ulike();
-
-
-        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-                .inferenceWorkspaceMode(WorkspaceMode.NONE)
-                .trainingWorkspaceMode(WorkspaceMode.NONE)
-                .list()
-                .layer(new BatchNormalization.Builder().nIn(3).nOut(3).build())
-                .build();
-        MultiLayerNetwork net = new MultiLayerNetwork(conf);
-        net.init();
-        org.deeplearning4j.nn.layers.normalization.BatchNormalization bn = (org.deeplearning4j.nn.layers.normalization.BatchNormalization) net.getLayer(0);
-        assertNotNull(bn.getHelper());
-        Field f = bn.getClass().getDeclaredField("helper");
-        f.setAccessible(true);
-        f.set(bn, null);
-        assertNull(bn.getHelper());
-
-
-        MKLDNNBatchNormHelper h = new MKLDNNBatchNormHelper(DataType.FLOAT);
-
-        net.output(in, true);
-        bn.setInput(in, LayerWorkspaceMgr.noWorkspaces());
-        Pair<Gradient,INDArray> p = net.backpropGradient(eps, LayerWorkspaceMgr.noWorkspaces());
-
-        h.preOutput(in, true, new long[]{1,3}, gamma, beta, mean, var, 0.5, e, LayerWorkspaceMgr.noWorkspaces());
-        Pair<Gradient,INDArray> pmkl = h.backpropGradient(in, eps, new long[]{1,3}, gamma, beta, dLdg, dLdb, e, LayerWorkspaceMgr.noWorkspaces());
-
-        INDArray dldin_dl4j = p.getSecond();
-
-        System.out.println("dl4j == mkldnn: " + p.getSecond().equals(pmkl.getSecond()));
-    }
-
-}
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
index daf657d0a..3e330d248 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/graph/TestComputationGraphNetwork.java
@@ -2143,4 +2143,23 @@ public class TestComputationGraphNetwork extends BaseDL4JTest {
         INDArray in = Nd4j.create(DataType.FLOAT, 1, 3, 16, 16, 16);
         INDArray out = cg.outputSingle(in);
     }
+
+    @Test
+    public void testDualEmbedding(){
+        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
+                .graphBuilder()
+                .addInputs("in")
+                .addLayer("e1", new EmbeddingLayer.Builder().nIn(10).nOut(5).build(), "in")
+                .addLayer("e2", new EmbeddingLayer.Builder().nIn(10).nOut(5).build(), "in")
+                .addLayer("out", new OutputLayer.Builder().nIn(10).nOut(2).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(), "e1", "e2")
+                .setOutputs("out")
+                .build();
+
+        ComputationGraph cg = new ComputationGraph(conf);
+        cg.init();
+
+        INDArray in = Nd4j.createFromArray(3).reshape(1, 1);
+        INDArray label = Nd4j.createFromArray(1, 0).reshape(1, 2);
+        cg.fit(new DataSet(in, label));
+    }
 }
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java
index 1b8e7ded9..9cbbccaa7 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/MinimalSameDiffDense.java
@@ -70,8 +70,19 @@ public class MinimalSameDiffDense extends SameDiffLayer {
 
     @Override
     public void initializeParameters(Map<String, INDArray> params) {
-        params.get(DefaultParamInitializer.BIAS_KEY).assign(0);
-        initWeights(nIn, nOut, weightInit, params.get(DefaultParamInitializer.WEIGHT_KEY));
+        String b = DefaultParamInitializer.BIAS_KEY;
+        if(paramWeightInit != null && paramWeightInit.containsKey(b)){
+            paramWeightInit.get(b).init(nIn, nOut, params.get(b).shape(), 'c', params.get(b));
+        } else {
+            params.get(DefaultParamInitializer.BIAS_KEY).assign(0);
+        }
+
+        String w = DefaultParamInitializer.WEIGHT_KEY;
+        if(paramWeightInit != null && paramWeightInit.containsKey(w)){
+            paramWeightInit.get(w).init(nIn, nOut, params.get(w).shape(), 'c', params.get(w));
+        } else {
+            initWeights(nIn, nOut, weightInit, params.get(DefaultParamInitializer.WEIGHT_KEY));
+        }
     }
 
     //OPTIONAL methods:
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java
index 778b95dc7..1be09182c 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffConv.java
@@ -109,13 +109,17 @@ public class SameDiffConv extends SameDiffLayer {
     @Override
     public void initializeParameters(Map<String, INDArray> params) {
         try(MemoryWorkspace ws = Nd4j.getWorkspaceManager().scopeOutOfWorkspaces()) {
+            double fanIn = nIn * kernel[0] * kernel[1];
+            double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
             for (Map.Entry<String, INDArray> e : params.entrySet()) {
-                if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) {
-                    e.getValue().assign(0);
+                if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){
+                    paramWeightInit.get(e.getKey()).init(fanIn, fanOut, e.getValue().shape(), 'c', e.getValue());
                 } else {
-                    double fanIn = nIn * kernel[0] * kernel[1];
-                    double fanOut = nOut * kernel[0] * kernel[1] / ((double) stride[0] * stride[1]);
-                    WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue());
+                    if (ConvolutionParamInitializer.BIAS_KEY.equals(e.getKey())) {
+                        e.getValue().assign(0);
+                    } else {
+                        WeightInitUtil.initWeights(fanIn, fanOut, e.getValue().shape(), weightInit, null, 'c', e.getValue());
+                    }
                 }
             }
         }
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java
index 3da6e8f1c..630b6059c 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/layers/samediff/testlayers/SameDiffDense.java
@@ -88,11 +88,15 @@ public class SameDiffDense extends SameDiffLayer {
     @Override
     public void initializeParameters(Map<String,INDArray> params){
         for(Map.Entry<String,INDArray> e : params.entrySet()){
-            if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){
-                e.getValue().assign(0.0);
+            if(paramWeightInit != null && paramWeightInit.containsKey(e.getKey())){
+                paramWeightInit.get(e.getKey()).init(nIn, nOut, e.getValue().shape(), 'c', e.getValue());
             } else {
-                //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayer
-                WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue());
+                if(DefaultParamInitializer.BIAS_KEY.equals(e.getKey())){
+                    e.getValue().assign(0.0);
+                } else {
+                    //Normally use 'c' order, but use 'f' for direct comparison to DL4J DenseLayer
+                    WeightInitUtil.initWeights(nIn, nOut, new long[]{nIn, nOut}, weightInit, null, 'f', e.getValue());
+                }
             }
         }
     }
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java
index 7e3ae6720..7013311ba 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/nn/mkldnn/ValidateMKLDNN.java
@@ -50,6 +50,7 @@ import static org.junit.Assume.assumeTrue;
 
 public class ValidateMKLDNN extends BaseDL4JTest {
 
+
     @Test
     public void validateConvSubsampling() throws Exception {
         //Only run test if using nd4j-native backend
@@ -138,52 +139,55 @@ public class ValidateMKLDNN extends BaseDL4JTest {
         ConvolutionMode cm = ConvolutionMode.Truncate;
 
         for (int minibatch : new int[]{1, 3}) {
+            for (boolean b : new boolean[]{true, false}) {
 
-            inputSize[0] = minibatch;
-            INDArray f = Nd4j.rand(Nd4j.defaultFloatingPointType(), inputSize);
-            INDArray l = TestUtils.randomOneHot(minibatch, 10);
+                inputSize[0] = minibatch;
+                INDArray f = Nd4j.rand(Nd4j.defaultFloatingPointType(), inputSize);
+                INDArray l = TestUtils.randomOneHot(minibatch, 10);
 
-            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-                    .updater(new Adam(0.01))
-                    .convolutionMode(cm)
-                    .seed(12345)
-                    .list()
-                    .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
-                            .kernelSize(kernel)
-                            .stride(stride)
-                            .padding(0, 0)
-                            .nOut(3)
-                            .build())
-                    .layer(new BatchNormalization.Builder().helperAllowFallback(false)/*.eps(0)*/.build())
-                    .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
-                            .kernelSize(kernel)
-                            .stride(stride)
-                            .padding(0, 0)
-                            .nOut(3)
-                            .build())
-                    .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
-                    .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1]))
-                    .build();
+                MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
+                        .dataType(DataType.FLOAT)
+                        .updater(new Adam(0.01))
+                        .convolutionMode(cm)
+                        .seed(12345)
+                        .list()
+                        .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
+                                .kernelSize(kernel)
+                                .stride(stride)
+                                .padding(0, 0)
+                                .nOut(3)
+                                .build())
+                        .layer(new BatchNormalization.Builder().useLogStd(b).helperAllowFallback(false)/*.eps(0)*/.build())
+                        .layer(new ConvolutionLayer.Builder().activation(Activation.TANH)
+                                .kernelSize(kernel)
+                                .stride(stride)
+                                .padding(0, 0)
+                                .nOut(3)
+                                .build())
+                        .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build())
+                        .setInputType(InputType.convolutional(inputSize[2], inputSize[3], inputSize[1]))
+                        .build();
 
-            MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone());
-            netWith.init();
+                MultiLayerNetwork netWith = new MultiLayerNetwork(conf.clone());
+                netWith.init();
 
-            MultiLayerNetwork netWithout = new MultiLayerNetwork(conf.clone());
-            netWithout.init();
+                MultiLayerNetwork netWithout = new MultiLayerNetwork(conf.clone());
+                netWithout.init();
 
-            LayerHelperValidationUtil.TestCase tc = LayerHelperValidationUtil.TestCase.builder()
-                    .allowHelpersForClasses(Collections.<Class<?>>singletonList(org.deeplearning4j.nn.layers.normalization.BatchNormalization.class))
-                    .testForward(true)
-                    .testScore(true)
-                    .testBackward(true)
-                    .testTraining(true)
-                    .features(f)
-                    .labels(l)
-                    .data(new SingletonDataSetIterator(new DataSet(f, l)))
-                    .maxRelError(1e-4)
-                    .build();
+                LayerHelperValidationUtil.TestCase tc = LayerHelperValidationUtil.TestCase.builder()
+                        .allowHelpersForClasses(Collections.<Class<?>>singletonList(org.deeplearning4j.nn.layers.normalization.BatchNormalization.class))
+                        .testForward(true)
+                        .testScore(true)
+                        .testBackward(true)
+                        .testTraining(true)
+                        .features(f)
+                        .labels(l)
+                        .data(new SingletonDataSetIterator(new DataSet(f, l)))
+                        .maxRelError(1e-4)
+                        .build();
 
-            LayerHelperValidationUtil.validateMLN(netWith, tc);
+                LayerHelperValidationUtil.validateMLN(netWith, tc);
+            }
         }
     }
 
@@ -265,6 +269,7 @@ public class ValidateMKLDNN extends BaseDL4JTest {
 
     @Test
     public void compareBatchNormBackward() throws Exception {
+        assumeTrue(Nd4j.getBackend().getClass().getName().toLowerCase().contains("native"));
 
         Nd4j.getRandom().setSeed(12345);
         INDArray in = Nd4j.rand(DataType.FLOAT, 1, 3, 15, 15);
diff --git a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java
index d1112899f..a4883ea07 100644
--- a/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java
+++ b/deeplearning4j/deeplearning4j-core/src/test/java/org/deeplearning4j/regressiontest/RegressionTest100b4.java
@@ -339,7 +339,13 @@ public class RegressionTest100b4 extends BaseDL4JTest {
 
         INDArray outAct = net.output(in);
 
-        assertEquals(outExp, outAct);
+        //19 layers - CPU vs. GPU difference accumulates notably, but appears to be correct
+        if(Nd4j.getBackend().getClass().getName().toLowerCase().contains("native")){
+            assertEquals(outExp, outAct);
+        } else {
+            boolean eq = outExp.equalsWithEps(outAct, 0.1);
+            assertTrue(eq);
+        }
     }
 
     @Test
diff --git a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/SamplingDataSetIterator.java b/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/SamplingDataSetIterator.java
index 62ee85407..32e4c61d3 100755
--- a/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/SamplingDataSetIterator.java
+++ b/deeplearning4j/deeplearning4j-data/deeplearning4j-utility-iterators/src/main/java/org/deeplearning4j/datasets/iterator/SamplingDataSetIterator.java
@@ -24,101 +24,11 @@ import org.nd4j.linalg.dataset.api.iterator.DataSetIterator;
 import java.util.List;
 
 /**
- * A wrapper for a dataset to sample from.
- * This will randomly sample from the given dataset.
- * @author Adam GIbson
+ * @deprecated Use {@link org.nd4j.linalg.dataset.api.iterator.SamplingDataSetIterator}
  */
-public class SamplingDataSetIterator implements DataSetIterator {
-
-    /**
-     * 
-     */
-    private static final long serialVersionUID = -2700563801361726914L;
-    private DataSet sampleFrom;
-    private int batchSize;
-    private int totalNumberSamples;
-    private int numTimesSampled;
-    @Getter
-    private DataSetPreProcessor preProcessor;
-
-    /**
-     *
-     * @param sampleFrom the dataset to sample from
-     * @param batchSize the batch size to sample
-     * @param totalNumberSamples the sample size
-     */
+@Deprecated
+public class SamplingDataSetIterator extends org.nd4j.linalg.dataset.api.iterator.SamplingDataSetIterator {
     public SamplingDataSetIterator(DataSet sampleFrom, int batchSize, int totalNumberSamples) {
-        super();
-        this.sampleFrom = sampleFrom;
-        this.batchSize = batchSize;
-        this.totalNumberSamples = totalNumberSamples;
+        super(sampleFrom, batchSize, totalNumberSamples);
     }
-
-    @Override
-    public boolean hasNext() {
-        return numTimesSampled < totalNumberSamples;
-    }
-
-    @Override
-    public DataSet next() {
-        DataSet ret = sampleFrom.sample(batchSize);
-        numTimesSampled += batchSize;
-        return ret;
-    }
-
-    @Override
-    public void remove() {
-        throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public int inputColumns() {
-        return sampleFrom.numInputs();
-    }
-
-    @Override
-    public int totalOutcomes() {
-        return sampleFrom.numOutcomes();
-    }
-
-    @Override
-    public boolean resetSupported() {
-        return true;
-    }
-
-    @Override
-    public boolean asyncSupported() {
-        return true;
-    }
-
-    @Override
-    public void reset() {
-        numTimesSampled = 0;
-    }
-
-    @Override
-    public int batch() {
-        return batchSize;
-    }
-
-    @Override
-    public void setPreProcessor(DataSetPreProcessor preProcessor) {
-        this.preProcessor = preProcessor;
-    }
-
-    @Override
-    public List<String> getLabels() {
-        return null;
-    }
-
-
-    @Override
-    public DataSet next(int num) {
-        DataSet ret = sampleFrom.sample(num);
-        numTimesSampled++;
-        return ret;
-    }
-
-
-
 }
diff --git a/deeplearning4j/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/deepwalk/DeepWalk.java b/deeplearning4j/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/deepwalk/DeepWalk.java
index 0bc633895..0ba9217ec 100644
--- a/deeplearning4j/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/deepwalk/DeepWalk.java
+++ b/deeplearning4j/deeplearning4j-graph/src/main/java/org/deeplearning4j/graph/models/deepwalk/DeepWalk.java
@@ -38,7 +38,7 @@ import java.util.concurrent.atomic.AtomicLong;
 
 /**Implementation of the DeepWalk graph vectorization model, based on the paper
  * <i>DeepWalk: Online Learning of Social Representations</i> by Perozzi, Al-Rfou & Skiena (2014),
- * <a href="http://arxiv.org/abs/1403.6652">http://arxiv.org/abs/1403.6652</a><br>
+ * <a href="https://arxiv.org/abs/1403.6652">https://arxiv.org/abs/1403.6652</a><br>
  * Similar to word2vec in nature, DeepWalk is an unsupervised learning algorithm that learns a vector representation
  * of each vertex in a graph. Vector representations are learned using walks (usually random walks) on the vertices in
  * the graph.<br>
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
index 83d138d5c..a5ea8efca 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/Hdf5Archive.java
@@ -17,6 +17,7 @@
 package org.deeplearning4j.nn.modelimport.keras;
 
 import lombok.extern.slf4j.Slf4j;
+import org.bytedeco.hdf5.*;
 import org.bytedeco.javacpp.BytePointer;
 import org.bytedeco.javacpp.FloatPointer;
 import org.bytedeco.javacpp.Loader;
@@ -32,7 +33,6 @@ import java.lang.Exception;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.bytedeco.hdf5.*;
 import static org.bytedeco.hdf5.global.hdf5.*;
 
 /**
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
index 529cf729c..d163c0776 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/KerasSequentialModel.java
@@ -17,7 +17,6 @@
 package org.deeplearning4j.nn.modelimport.keras;
 
 import lombok.extern.slf4j.Slf4j;
-import org.deeplearning4j.nn.api.layers.IOutputLayer;
 import org.deeplearning4j.nn.conf.BackpropType;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/InvalidKerasConfigurationException.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/InvalidKerasConfigurationException.java
index bea7fa2ad..db51cb499 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/InvalidKerasConfigurationException.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/InvalidKerasConfigurationException.java
@@ -40,6 +40,6 @@ public class InvalidKerasConfigurationException extends Exception {
     }
 
     private static String appendDocumentationURL(String message) {
-        return message + ". For more information, see http://deeplearning4j.org/model-import-keras.";
+        return message + ". For more information, see http://deeplearning4j.org/docs/latest/keras-import-overview";
     }
 }
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/UnsupportedKerasConfigurationException.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/UnsupportedKerasConfigurationException.java
index c540bcd64..6244cf1e8 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/UnsupportedKerasConfigurationException.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/exceptions/UnsupportedKerasConfigurationException.java
@@ -22,7 +22,7 @@ package org.deeplearning4j.nn.modelimport.keras.exceptions;
  * is not currently supported.
  *
  * See <a href="https://deeplearning4j.org/docs/latest/keras-import-overview">https://deeplearning4j.org/docs/latest/keras-import-overview</a>
- * for more information and file an issue at <a href="http://github.com/deeplearning4j/deeplearning4j/issues">http://github.com/deeplearning4j/deeplearning4j/issues</a>.
+ * for more information and file an issue at <a href="https://github.com/eclipse/deeplearning4j/issues">https://github.com/eclipse/deeplearning4j/issues</a>.
  *
  * @author dave@skymind.io
  */
@@ -41,6 +41,6 @@ public class UnsupportedKerasConfigurationException extends Exception {
     }
 
     private static String appendDocumentationURL(String message) {
-        return message + ". Please file an issue at http://github.com/deeplearning4j/deeplearning4j/issues.";
+        return message + ". Please file an issue at https://github.com/eclipse/deeplearning4j/issues.";
     }
 }
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java
index 15de6fc53..8877d8b5a 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/advanced/activations/KerasPReLU.java
@@ -18,7 +18,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers.advanced.activations;
 
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.PReLULayer;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@@ -27,9 +26,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.PReLUParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 import org.nd4j.linalg.util.ArrayUtil;
 
 import java.util.HashMap;
@@ -79,14 +77,12 @@ public class KerasPReLU extends KerasLayer {
         LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, ALPHA_CONSTRAINT, conf, kerasMajorVersion);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, ALPHA_INIT,
+        IWeightInit init = getWeightInitFromConfig(layerConfig, ALPHA_INIT,
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
         long[] axes = getSharedAxes(layerConfig);
 
         PReLULayer.Builder builder = new PReLULayer.Builder().sharedAxes(axes)
-        .weightInit(weightInit.getWeightInitFunction(distribution)).name(layerName);
+        .weightInit(init).name(layerName);
         if (weightConstraint != null){
             builder.constrainWeights(weightConstraint);
         }
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java
index b7fa269f7..d7a4ab699 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution1D.java
@@ -17,14 +17,12 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
 
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.IWeightInit;
 
 import java.util.Map;
 
@@ -83,15 +81,13 @@ public class KerasAtrousConvolution1D extends KerasConvolution {
         LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         Convolution1DLayer.Builder builder = new Convolution1DLayer.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .dilation(getDilationRate(layerConfig, 1, conf, true)[0])
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
index aa602bb3c..dd374992a 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasAtrousConvolution2D.java
@@ -17,14 +17,12 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
 
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.IWeightInit;
 
 import java.util.Map;
 
@@ -84,14 +82,13 @@ public class KerasAtrousConvolution2D extends KerasConvolution {
         LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
 
         ConvolutionLayer.Builder builder = new ConvolutionLayer.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction())
+                .weightInit(init)
                 .dilation(getDilationRate(layerConfig, 2, conf, true))
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
index c4e66f6ef..f1d2f0210 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.ArrayUtils;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
-import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
@@ -30,7 +29,6 @@ import org.nd4j.linalg.factory.Nd4j;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.Set;
 
 import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.removeDefaultWeights;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java
index 33512eb33..3da88d3b1 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution1D.java
@@ -22,7 +22,6 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.ArrayUtils;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution1DLayer;
 import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
@@ -30,10 +29,9 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -94,15 +92,13 @@ public class KerasConvolution1D extends KerasConvolution {
         LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         Convolution1DLayer.Builder builder = new Convolution1DLayer.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 1,  conf, kerasMajorVersion)[0])
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
index 3c1d9f7d2..e9c74e78c 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution2D.java
@@ -21,14 +21,12 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.IWeightInit;
 
 import java.util.Map;
 
@@ -87,10 +85,8 @@ public class KerasConvolution2D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 2, conf, false);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         LayerConstraint biasConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_B_CONSTRAINT(), conf, kerasMajorVersion);
@@ -100,7 +96,7 @@ public class KerasConvolution2D extends KerasConvolution {
         ConvolutionLayer.Builder builder = new ConvolutionLayer.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java
index 8da12a726..ccd776306 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasConvolution3D.java
@@ -21,15 +21,13 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Convolution3D;
 import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.IWeightInit;
 
 import java.util.Map;
 
@@ -88,10 +86,8 @@ public class KerasConvolution3D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 3, conf, false);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         LayerConstraint biasConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_B_CONSTRAINT(), conf, kerasMajorVersion);
@@ -101,7 +97,7 @@ public class KerasConvolution3D extends KerasConvolution {
         Convolution3D.Builder builder = new Convolution3D.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 3, conf, kerasMajorVersion))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java
index 33e02ae6f..92d9f3af8 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDeconvolution2D.java
@@ -20,14 +20,12 @@ import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.Deconvolution2D;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.IWeightInit;
 
 import java.util.Map;
 
@@ -86,10 +84,8 @@ public class KerasDeconvolution2D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 2, conf, false);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         LayerConstraint biasConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_B_CONSTRAINT(), conf, kerasMajorVersion);
@@ -99,7 +95,7 @@ public class KerasDeconvolution2D extends KerasConvolution {
         Deconvolution2D.Builder builder = new Deconvolution2D.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java
index f27d3ff08..c72de75a6 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasDepthwiseConvolution2D.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.DepthwiseConvolution2D;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@@ -30,9 +29,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasRegularizerUtils;
 import org.deeplearning4j.nn.params.SeparableConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.Collections;
 import java.util.HashMap;
@@ -126,10 +124,8 @@ public class KerasDepthwiseConvolution2D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 2, conf, false);
 
-        Pair<WeightInit, Distribution> depthWiseInit = getWeightInitFromConfig(layerConfig,
+        IWeightInit depthWiseInit = getWeightInitFromConfig(layerConfig,
                 conf.getLAYER_FIELD_DEPTH_WISE_INIT(), enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit depthWeightInit = depthWiseInit.getFirst();
-        Distribution depthDistribution = depthWiseInit.getSecond();
 
         val nIn = getNInFromConfig(previousLayers);
 
@@ -152,7 +148,7 @@ public class KerasDepthwiseConvolution2D extends KerasConvolution {
                 .nIn(nIn)
                 .nOut(nIn * depthMultiplier)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(depthWeightInit.getWeightInitFunction(depthDistribution))
+                .weightInit(depthWiseInit)
                 .depthMultiplier(depthMultiplier)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java
index 67eba9bf1..cd052bbb7 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasSeparableConvolution2D.java
@@ -20,7 +20,6 @@ import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.SeparableConvolution2D;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@@ -28,9 +27,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasRegularizerUtils;
 import org.deeplearning4j.nn.params.SeparableConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -93,17 +91,13 @@ public class KerasSeparableConvolution2D extends KerasConvolution {
 
         int depthMultiplier = getDepthMultiplier(layerConfig, conf);
 
-        Pair<WeightInit, Distribution> depthWiseInit = getWeightInitFromConfig(layerConfig,
+        IWeightInit depthWiseInit = getWeightInitFromConfig(layerConfig,
                 conf.getLAYER_FIELD_DEPTH_WISE_INIT(), enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit depthWeightInit = depthWiseInit.getFirst();
-        Distribution depthDistribution = depthWiseInit.getSecond();
 
-        Pair<WeightInit, Distribution> pointWiseInit = getWeightInitFromConfig(layerConfig,
+        IWeightInit pointWiseInit = getWeightInitFromConfig(layerConfig,
                 conf.getLAYER_FIELD_POINT_WISE_INIT(), enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit pointWeightInit = pointWiseInit.getFirst();
-        Distribution pointDistribution = pointWiseInit.getSecond();
 
-        if (depthWeightInit != pointWeightInit || depthDistribution != pointDistribution)
+        if ( !depthWiseInit.getClass().equals(pointWiseInit.getClass()) )
             if (enforceTrainingConfig)
                 throw new UnsupportedKerasConfigurationException(
                         "Specifying different initialization for depth- and point-wise weights not supported.");
@@ -126,7 +120,7 @@ public class KerasSeparableConvolution2D extends KerasConvolution {
         SeparableConvolution2D.Builder builder = new SeparableConvolution2D.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(depthWeightInit.getWeightInitFunction(depthDistribution))
+                .weightInit(depthWiseInit)
                 .depthMultiplier(depthMultiplier)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java
index a9c1054f1..98aabb3ee 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasUpsampling3D.java
@@ -17,7 +17,6 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolutional;
 
 import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.Upsampling2D;
 import org.deeplearning4j.nn.conf.layers.Upsampling3D;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java
index 387b826f5..7c840d301 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/convolutional/KerasZeroPadding3D.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer;
-import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
index d840370d8..296b5dabf 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDense.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.DenseLayer;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@@ -29,9 +28,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.params.DefaultParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -95,15 +93,13 @@ public class KerasDense extends KerasLayer {
         LayerConstraint weightConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_W_CONSTRAINT(), conf, kerasMajorVersion);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         DenseLayer.Builder builder = new DenseLayer.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf))
                 .dropOut(this.dropout).activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .biasInit(0.0)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .hasBias(hasBias);
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
index d2aeb75c3..e0a6628a2 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasFlatten.java
@@ -22,7 +22,6 @@ import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.inputs.InputType.InputTypeConvolutional;
 import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
-import org.deeplearning4j.nn.conf.preprocessor.RnnToFeedForwardPreProcessor;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java
index 45f9ddadd..41254e221 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasRepeatVector.java
@@ -18,7 +18,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
 
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.conf.inputs.InputType;
-import org.deeplearning4j.nn.conf.layers.DropoutLayer;
 import org.deeplearning4j.nn.conf.layers.misc.RepeatVector;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
index 1275cf5a9..6a5e1ff2a 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshape.java
@@ -18,7 +18,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers.core;
 
 
 import lombok.val;
-import org.apache.commons.lang3.ArrayUtils;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@@ -26,7 +25,6 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
-import org.nd4j.linalg.util.ArrayUtil;
 
 import java.util.List;
 import java.util.Map;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
index 6bc1c4129..1ee13c0b0 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbedding.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.EmbeddingSequenceLayer;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
@@ -30,11 +29,10 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.DefaultParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -104,12 +102,10 @@ public class KerasEmbedding extends KerasLayer {
                     "on Embedding layers. Zero Masking for the Embedding layer only works with unidirectional LSTM for now."
                     + " If you want to have this behaviour for your imported model " +
                     "in DL4J, apply masking as a pre-processing step to your input." +
-                    "See https://deeplearning4j.org/usingrnns#masking for more on this.");
+                    "See http://deeplearning4j.org/docs/latest/deeplearning4j-nn-recurrent#masking for more on this.");
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_EMBEDDING_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_EMBEDDING_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
         LayerConstraint embeddingConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_EMBEDDINGS_CONSTRAINT(), conf, kerasMajorVersion);
@@ -121,7 +117,7 @@ public class KerasEmbedding extends KerasLayer {
                 .inferInputLength(inferInputLength)
                 .nOut(getNOutFromConfig(layerConfig, conf))
                 .dropOut(this.dropout).activation(Activation.IDENTITY)
-                .weightInit(weightInit.getWeightInitFunction(distribution))
+                .weightInit(init)
                 .biasInit(0.0)
                 .l1(this.weightL1Regularization)
                 .l2(this.weightL2Regularization)
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java
index f08e462ca..d6fed55fe 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1D.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.LocallyConnected1D;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@@ -29,9 +28,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolution;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -90,11 +88,8 @@ public class KerasLocallyConnected1D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 1, conf, false);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        // TODO: take care of distribution and bias init
-        //Distribution distribution = init.getSecond();
 
         LayerConstraint biasConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_B_CONSTRAINT(), conf, kerasMajorVersion);
@@ -104,7 +99,7 @@ public class KerasLocallyConnected1D extends KerasConvolution {
         LocallyConnected1D.Builder builder = new LocallyConnected1D.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit)
+                .weightInit(conf.getKERAS_PARAM_NAME_W(), init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 1, conf, kerasMajorVersion)[0])
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java
index 5c2ab641b..550c20d01 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2D.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
@@ -29,9 +28,8 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolution;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.params.ConvolutionParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.primitives.Pair;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -39,9 +37,7 @@ import java.util.Map;
 import static org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolutionUtils.*;
 import static org.deeplearning4j.nn.modelimport.keras.utils.KerasActivationUtils.getActivationFromConfig;
 import static org.deeplearning4j.nn.modelimport.keras.utils.KerasInitilizationUtils.getWeightInitFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getHasBiasFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.getNOutFromConfig;
-import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.removeDefaultWeights;
+import static org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils.*;
 
 
 /**
@@ -92,11 +88,9 @@ public class KerasLocallyConnected2D extends KerasConvolution {
         numTrainableParams = hasBias ? 2 : 1;
         int[] dilationRate = getDilationRate(layerConfig, 2, conf, false);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        // TODO: take care of distribution and bias init
-        //Distribution distribution = init.getSecond();
+        // TODO: take care of bias init
 
         LayerConstraint biasConstraint = KerasConstraintUtils.getConstraintsFromConfig(
                 layerConfig, conf.getLAYER_FIELD_B_CONSTRAINT(), conf, kerasMajorVersion);
@@ -106,7 +100,7 @@ public class KerasLocallyConnected2D extends KerasConvolution {
         LocallyConnected2D.Builder builder = new LocallyConnected2D.Builder().name(this.layerName)
                 .nOut(getNOutFromConfig(layerConfig, conf)).dropOut(this.dropout)
                 .activation(getActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit)
+                .weightInit(conf.getKERAS_PARAM_NAME_W(), init)
                 .l1(this.weightL1Regularization).l2(this.weightL2Regularization)
                 .convolutionMode(getConvolutionModeFromConfig(layerConfig, conf))
                 .kernelSize(getKernelSizeFromConfig(layerConfig, 2, conf, kerasMajorVersion))
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
index ff8d4d91f..7f7d8dc4c 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/normalization/KerasBatchNormalization.java
@@ -31,7 +31,6 @@ import org.deeplearning4j.nn.params.BatchNormalizationParamInitializer;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java
index f04752936..7d5603261 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTM.java
@@ -22,7 +22,6 @@ import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
 import org.deeplearning4j.nn.conf.layers.LSTM;
@@ -35,7 +34,7 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.LSTMParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
@@ -151,15 +150,11 @@ public class KerasLSTM extends KerasLayer {
             throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
         super(layerConfig, enforceTrainingConfig);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
-        Pair<WeightInit, Distribution> recurrentInit = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INNER_INIT(),
+        IWeightInit recurrentInit = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INNER_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit recurrentWeightInit = recurrentInit.getFirst();
-        Distribution recurrentDistribution = recurrentInit.getSecond();
 
         boolean hasBias = getHasBiasFromConfig(layerConfig, conf);
 
@@ -186,8 +181,8 @@ public class KerasLSTM extends KerasLayer {
                 .nOut(getNOutFromConfig(layerConfig, conf))
                 .dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
-                .weightInitRecurrent(recurrentWeightInit.getWeightInitFunction(recurrentDistribution))
+                .weightInit(init)
+                .weightInitRecurrent(recurrentInit)
                 .biasInit(0.0) // TODO: this is incorrect
                 .l1(this.weightL1Regularization)
                 .l2(this.weightL2Regularization);
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java
index 615405fae..6f5edf597 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasSimpleRnn.java
@@ -21,7 +21,6 @@ import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
 import org.deeplearning4j.nn.api.layers.LayerConstraint;
 import org.deeplearning4j.nn.conf.InputPreProcessor;
-import org.deeplearning4j.nn.conf.distribution.Distribution;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.InputTypeUtil;
 import org.deeplearning4j.nn.conf.layers.Layer;
@@ -34,7 +33,7 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfig
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasConstraintUtils;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasLayerUtils;
 import org.deeplearning4j.nn.params.SimpleRnnParamInitializer;
-import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.primitives.Pair;
 
@@ -124,15 +123,11 @@ public class KerasSimpleRnn extends KerasLayer {
             throws InvalidKerasConfigurationException, UnsupportedKerasConfigurationException {
         super(layerConfig, enforceTrainingConfig);
 
-        Pair<WeightInit, Distribution> init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
+        IWeightInit init = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit weightInit = init.getFirst();
-        Distribution distribution = init.getSecond();
 
-        Pair<WeightInit, Distribution> recurrentInit = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INNER_INIT(),
+        IWeightInit recurrentInit = getWeightInitFromConfig(layerConfig, conf.getLAYER_FIELD_INNER_INIT(),
                 enforceTrainingConfig, conf, kerasMajorVersion);
-        WeightInit recurrentWeightInit = recurrentInit.getFirst();
-        Distribution recurrentDistribution = recurrentInit.getSecond();
 
         Map<String, Object> innerConfig = KerasLayerUtils.getInnerLayerConfigFromConfig(layerConfig, conf);
         this.returnSequences = (Boolean) innerConfig.get(conf.getLAYER_FIELD_RETURN_SEQUENCES());
@@ -154,8 +149,8 @@ public class KerasSimpleRnn extends KerasLayer {
                 .nOut(getNOutFromConfig(layerConfig, conf))
                 .dropOut(this.dropout)
                 .activation(getIActivationFromConfig(layerConfig, conf))
-                .weightInit(weightInit.getWeightInitFunction(distribution))
-                .weightInitRecurrent(recurrentWeightInit.getWeightInitFunction(recurrentDistribution))
+                .weightInit(init)
+                .weightInitRecurrent(recurrentInit)
                 .biasInit(0.0)
                 .l1(this.weightL1Regularization)
                 .l2(this.weightL2Regularization);
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGenerator.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGenerator.java
index 94498b976..2a81886e0 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGenerator.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGenerator.java
@@ -20,9 +20,7 @@ import com.google.gson.Gson;
 import com.google.gson.reflect.TypeToken;
 import lombok.Data;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
-import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer;
 import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.ops.DynamicCustomOp;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.INDArrayIndex;
 import org.nd4j.linalg.indexing.NDArrayIndex;
@@ -31,7 +29,6 @@ import org.nd4j.linalg.primitives.Pair;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/KerasFlattenRnnPreprocessor.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/KerasFlattenRnnPreprocessor.java
index 3e18ebe3e..25aa73a06 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/KerasFlattenRnnPreprocessor.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/KerasFlattenRnnPreprocessor.java
@@ -22,9 +22,8 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException;
 import org.deeplearning4j.nn.conf.preprocessor.BaseInputPreProcessor;
 import org.deeplearning4j.nn.workspace.ArrayType;
-import org.nd4j.linalg.api.ndarray.INDArray;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
-import org.nd4j.linalg.api.shape.Shape;
+import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.shade.jackson.annotation.JsonProperty;
 
 /**
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java
index f94adf713..77c6369c5 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/ReshapePreprocessor.java
@@ -19,17 +19,15 @@ package org.deeplearning4j.nn.modelimport.keras.preprocessors;
 import lombok.Data;
 import lombok.EqualsAndHashCode;
 import lombok.extern.slf4j.Slf4j;
-
 import lombok.val;
 import org.apache.commons.lang3.ArrayUtils;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.inputs.InvalidInputTypeException;
 import org.deeplearning4j.nn.conf.preprocessor.BaseInputPreProcessor;
 import org.deeplearning4j.nn.workspace.ArrayType;
-import org.nd4j.linalg.api.ndarray.INDArray;
 import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
+import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.shape.Shape;
-import org.nd4j.linalg.util.ArrayUtil;
 import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
 import org.nd4j.shade.jackson.annotation.JsonProperty;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java
index f80863a03..db7d2e990 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/preprocessors/TensorFlowCnnToFeedForwardPreProcessor.java
@@ -20,9 +20,9 @@ import lombok.extern.slf4j.Slf4j;
 import lombok.val;
 import org.deeplearning4j.nn.conf.preprocessor.CnnToFeedForwardPreProcessor;
 import org.deeplearning4j.nn.workspace.ArrayType;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.shape.Shape;
-import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.nd4j.shade.jackson.annotation.JsonCreator;
 import org.nd4j.shade.jackson.annotation.JsonProperty;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/DL4JKerasModelValidator.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/DL4JKerasModelValidator.java
index cd4461082..2ace14aa3 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/DL4JKerasModelValidator.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/DL4JKerasModelValidator.java
@@ -1,28 +1,15 @@
 package org.deeplearning4j.nn.modelimport.keras.utils;
 
 import lombok.NonNull;
-import org.apache.commons.io.IOUtils;
-import org.deeplearning4j.nn.api.Model;
-import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
 import org.deeplearning4j.nn.graph.ComputationGraph;
 import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive;
-import org.deeplearning4j.nn.modelimport.keras.KerasModel;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasModelConfiguration;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.deeplearning4j.util.ModelSerializer;
 import org.nd4j.validation.Nd4jCommonValidator;
 import org.nd4j.validation.ValidationResult;
 
-import java.io.BufferedReader;
 import java.io.File;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
 import java.util.Collections;
-import java.util.List;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
 
 /**
  * A utility for validating serialized Keras sequential and functional models for import into DL4J
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java
index bb2bb1ca0..f0ddfd912 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasActivationUtils.java
@@ -21,7 +21,6 @@ import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurat
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.IActivation;
-import org.nd4j.linalg.activations.impl.*;
 
 import java.util.Map;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java
index b86b83be1..b4b5e6564 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasInitilizationUtils.java
@@ -21,8 +21,7 @@ import org.deeplearning4j.nn.conf.distribution.*;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
-import org.deeplearning4j.nn.weights.WeightInit;
-import org.nd4j.linalg.primitives.Pair;
+import org.deeplearning4j.nn.weights.*;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -42,76 +41,71 @@ public class KerasInitilizationUtils {
      * @return DL4J weight initialization enum
      * @see WeightInit
      */
-    public static Pair<WeightInit, Distribution> mapWeightInitialization(String kerasInit,
-                                                                         KerasLayerConfiguration conf,
-                                                                         Map<String, Object> initConfig,
-                                                                         int kerasMajorVersion)
+    public static IWeightInit mapWeightInitialization(String kerasInit,
+                                                      KerasLayerConfiguration conf,
+                                                      Map<String, Object> initConfig,
+                                                      int kerasMajorVersion)
             throws UnsupportedKerasConfigurationException, InvalidKerasConfigurationException {
 
 
         // TODO: Identity and VarianceScaling need "scale" factor
-        WeightInit init = null;
-        Distribution dist = null;
         if (kerasInit != null) {
             if (kerasInit.equals(conf.getINIT_GLOROT_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_GLOROT_NORMAL_ALIAS())) {
-                init = WeightInit.XAVIER;
+                return WeightInit.XAVIER.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_GLOROT_UNIFORM()) ||
                     kerasInit.equals(conf.getINIT_GLOROT_UNIFORM_ALIAS())) {
-                init = WeightInit.XAVIER_UNIFORM;
+                return WeightInit.XAVIER_UNIFORM.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_LECUN_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_LECUN_NORMAL_ALIAS())) {
-                init = WeightInit.LECUN_NORMAL;
+                return WeightInit.LECUN_NORMAL.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_LECUN_UNIFORM()) ||
                     kerasInit.equals(conf.getINIT_LECUN_UNIFORM_ALIAS())) {
-                init = WeightInit.LECUN_UNIFORM;
+                return WeightInit.LECUN_UNIFORM.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_HE_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_HE_NORMAL_ALIAS())) {
-                init = WeightInit.RELU;
+                return WeightInit.RELU.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_HE_UNIFORM()) ||
                     kerasInit.equals(conf.getINIT_HE_UNIFORM_ALIAS())) {
-                init = WeightInit.RELU_UNIFORM;
+                return WeightInit.RELU_UNIFORM.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_ONE()) ||
                     kerasInit.equals(conf.getINIT_ONES()) ||
                     kerasInit.equals(conf.getINIT_ONES_ALIAS())) {
-                init = WeightInit.ONES;
+                return WeightInit.ONES.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_ZERO()) ||
                     kerasInit.equals(conf.getINIT_ZEROS()) ||
                     kerasInit.equals(conf.getINIT_ZEROS_ALIAS())) {
-                init = WeightInit.ZERO;
+                return WeightInit.ZERO.getWeightInitFunction();
             } else if (kerasInit.equals(conf.getINIT_UNIFORM()) ||
                     kerasInit.equals(conf.getINIT_RANDOM_UNIFORM()) ||
                     kerasInit.equals(conf.getINIT_RANDOM_UNIFORM_ALIAS())) {
                 if (kerasMajorVersion == 2) {
                     double minVal = (double) initConfig.get(conf.getLAYER_FIELD_INIT_MINVAL());
                     double maxVal = (double) initConfig.get(conf.getLAYER_FIELD_INIT_MAXVAL());
-                    dist = new UniformDistribution(minVal, maxVal);
+                    return new WeightInitDistribution(new UniformDistribution(minVal, maxVal));
                 } else {
                     double scale = 0.05;
                     if (initConfig.containsKey(conf.getLAYER_FIELD_INIT_SCALE()))
                         scale = (double) initConfig.get(conf.getLAYER_FIELD_INIT_SCALE());
-                    dist = new UniformDistribution(-scale, scale);
+                    return new WeightInitDistribution(new UniformDistribution(-scale, scale));
                 }
-                init = WeightInit.DISTRIBUTION;
             } else if (kerasInit.equals(conf.getINIT_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_RANDOM_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_RANDOM_NORMAL_ALIAS())) {
                 if (kerasMajorVersion == 2) {
                     double mean = (double) initConfig.get(conf.getLAYER_FIELD_INIT_MEAN());
                     double stdDev = (double) initConfig.get(conf.getLAYER_FIELD_INIT_STDDEV());
-                    dist = new NormalDistribution(mean, stdDev);
+                    return new WeightInitDistribution(new NormalDistribution(mean, stdDev));
                 } else {
                     double scale = 0.05;
                     if (initConfig.containsKey(conf.getLAYER_FIELD_INIT_SCALE()))
                         scale = (double) initConfig.get(conf.getLAYER_FIELD_INIT_SCALE());
-                    dist = new NormalDistribution(0, scale);
+                    return new WeightInitDistribution(new NormalDistribution(0, scale));
                 }
-                init = WeightInit.DISTRIBUTION;
             } else if (kerasInit.equals(conf.getINIT_CONSTANT()) ||
                     kerasInit.equals(conf.getINIT_CONSTANT_ALIAS())) {
                 double value = (double) initConfig.get(conf.getLAYER_FIELD_INIT_VALUE());
-                dist = new ConstantDistribution(value);
-                init = WeightInit.DISTRIBUTION;
+                return new WeightInitDistribution(new ConstantDistribution(value));
             } else if (kerasInit.equals(conf.getINIT_ORTHOGONAL()) ||
                     kerasInit.equals(conf.getINIT_ORTHOGONAL_ALIAS())) {
                 if (kerasMajorVersion == 2) {
@@ -121,34 +115,38 @@ public class KerasInitilizationUtils {
                     } catch (Exception e) {
                         gain = (int) initConfig.get(conf.getLAYER_FIELD_INIT_GAIN());
                     }
-                    dist = new OrthogonalDistribution(gain);
+                    return new WeightInitDistribution(new OrthogonalDistribution(gain));
                 } else {
                     double scale = 1.1;
                     if (initConfig.containsKey(conf.getLAYER_FIELD_INIT_SCALE()))
                         scale = (double) initConfig.get(conf.getLAYER_FIELD_INIT_SCALE());
-                    dist = new OrthogonalDistribution(scale);
+                    return new WeightInitDistribution(new OrthogonalDistribution(scale));
                 }
-                init = WeightInit.DISTRIBUTION;
             } else if (kerasInit.equals(conf.getINIT_TRUNCATED_NORMAL()) ||
                     kerasInit.equals(conf.getINIT_TRUNCATED_NORMAL_ALIAS())) {
                 double mean = (double) initConfig.get(conf.getLAYER_FIELD_INIT_MEAN());
                 double stdDev = (double) initConfig.get(conf.getLAYER_FIELD_INIT_STDDEV());
-                dist = new TruncatedNormalDistribution(mean, stdDev);
-                init = WeightInit.DISTRIBUTION;
+                return new WeightInitDistribution(new TruncatedNormalDistribution(mean, stdDev));
             } else if (kerasInit.equals(conf.getINIT_IDENTITY()) ||
                     kerasInit.equals(conf.getINIT_IDENTITY_ALIAS())) {
                 if (kerasMajorVersion == 2) {
                     double gain = (double) initConfig.get(conf.getLAYER_FIELD_INIT_GAIN());
-                    if (gain != 1.)
-                        log.warn("Scaled identity weight init not supported, setting gain=1");
+                    if (gain != 1.0)
+                    if (gain != 1.0) {
+                        return new WeightInitIdentity(gain);
+                    } else {
+                        return new WeightInitIdentity();
+                    }
                 } else {
                     double scale = 1.;
                     if (initConfig.containsKey(conf.getLAYER_FIELD_INIT_SCALE()))
                         scale = (double) initConfig.get(conf.getLAYER_FIELD_INIT_SCALE());
-                    if (scale != 1.)
-                        log.warn("Scaled identity weight init not supported, setting scale=1");
+                    if (scale != 1.0) {
+                        return new WeightInitIdentity(scale);
+                    } else {
+                        return new WeightInitIdentity();
+                    }
                 }
-                init = WeightInit.IDENTITY;
             } else if (kerasInit.equals(conf.getINIT_VARIANCE_SCALING())) {
                 double scale;
                 try {
@@ -156,32 +154,27 @@ public class KerasInitilizationUtils {
                 } catch (Exception e) {
                     scale = (int) initConfig.get(conf.getLAYER_FIELD_INIT_SCALE());
                 }
-                if (scale != 1.)
-                    log.warn("Scaled identity weight init not supported, setting scale=1");
                 String mode = (String) initConfig.get(conf.getLAYER_FIELD_INIT_MODE());
                 String distribution = (String) initConfig.get(conf.getLAYER_FIELD_INIT_DISTRIBUTION());
                 switch (mode) {
                     case "fan_in":
                         if (distribution.equals("normal")) {
-                            init = WeightInit.VAR_SCALING_NORMAL_FAN_IN;
+                            return new WeightInitVarScalingNormalFanIn(scale);
                         } else {
-                            init = WeightInit.VAR_SCALING_UNIFORM_FAN_IN;
+                            return new WeightInitVarScalingUniformFanIn(scale);
                         }
-                        break;
                     case "fan_out":
                         if (distribution.equals("normal")) {
-                            init = WeightInit.VAR_SCALING_NORMAL_FAN_OUT;
+                            return new WeightInitVarScalingNormalFanOut(scale);
                         } else {
-                            init = WeightInit.VAR_SCALING_UNIFORM_FAN_OUT;
+                            return new WeightInitVarScalingUniformFanOut(scale);
                         }
-                        break;
                     case "fan_avg":
                         if (distribution.equals("normal")) {
-                            init = WeightInit.VAR_SCALING_NORMAL_FAN_AVG;
+                            return new WeightInitVarScalingNormalFanAvg(scale);
                         } else {
-                            init = WeightInit.VAR_SCALING_UNIFORM_FAN_AVG;
+                            return new WeightInitVarScalingUniformFanAvg(scale);
                         }
-                        break;
                     default:
                         throw new InvalidKerasConfigurationException("Initialization argument 'mode' has to be either " +
                                 "fan_in, fan_out or fan_avg");
@@ -190,7 +183,7 @@ public class KerasInitilizationUtils {
                 throw new UnsupportedKerasConfigurationException("Unknown keras weight initializer " + kerasInit);
             }
         }
-        return new Pair<>(init, dist);
+        throw new IllegalStateException("Error getting Keras weight initialization");
     }
 
     /**
@@ -202,7 +195,7 @@ public class KerasInitilizationUtils {
      * @throws InvalidKerasConfigurationException     Invalid Keras config
      * @throws UnsupportedKerasConfigurationException Unsupported Keras config
      */
-    public static Pair<WeightInit, Distribution> getWeightInitFromConfig(Map<String, Object> layerConfig, String initField,
+    public static IWeightInit getWeightInitFromConfig(Map<String, Object> layerConfig, String initField,
                                                                          boolean enforceTrainingConfig,
                                                                          KerasLayerConfiguration conf,
                                                                          int kerasMajorVersion)
@@ -225,14 +218,14 @@ public class KerasInitilizationUtils {
                 throw new UnsupportedKerasConfigurationException("Incomplete initialization class");
             }
         }
-        Pair<WeightInit, Distribution> init;
+        IWeightInit init;
         try {
             init = mapWeightInitialization(kerasInit, conf, initMap, kerasMajorVersion);
         } catch (UnsupportedKerasConfigurationException e) {
             if (enforceTrainingConfig)
                 throw e;
             else {
-                init = new Pair<>(WeightInit.XAVIER, null);
+                init = new WeightInitXavier();
                 log.warn("Unknown weight initializer " + kerasInit + " (Using XAVIER instead).");
             }
         }
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java
index f752b5b03..b33fda9f4 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/main/java/org/deeplearning4j/nn/modelimport/keras/utils/KerasModelUtils.java
@@ -21,7 +21,6 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.lang3.StringUtils;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.Model;
-import org.deeplearning4j.nn.conf.layers.wrapper.BaseWrapperLayer;
 import org.deeplearning4j.nn.graph.ComputationGraph;
 import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java
index bd6561d37..27aa340e8 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/KerasTestUtils.java
@@ -16,7 +16,6 @@
 
 package org.deeplearning4j.nn.modelimport.keras;
 
-import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.conf.layers.BaseLayer;
 import org.deeplearning4j.nn.conf.layers.samediff.AbstractSameDiffLayer;
 import org.nd4j.linalg.learning.regularization.L1Regularization;
@@ -25,7 +24,6 @@ import org.nd4j.linalg.learning.regularization.Regularization;
 
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 
 public class KerasTestUtils {
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/MiscTests.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/MiscTests.java
index 5c288b21c..dcfd53518 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/MiscTests.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/MiscTests.java
@@ -22,8 +22,6 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
-import org.nd4j.linalg.io.ClassPathResource;
-import org.nd4j.linalg.util.Nd4jValidator;
 import org.nd4j.resources.Resources;
 import org.nd4j.validation.ValidationResult;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java
index f0dfb3694..6043d7d48 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/FullModelComparisons.java
@@ -21,7 +21,6 @@ import org.datavec.api.records.reader.SequenceRecordReader;
 import org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader;
 import org.datavec.api.split.NumberedFileInputSplit;
 import org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator;
-
 import org.deeplearning4j.nn.layers.recurrent.LSTM;
 import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
@@ -30,7 +29,6 @@ import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
-import org.junit.Assert;
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
index 6dce1b714..554a2c2d1 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras1ModelConfigurationTest.java
@@ -24,7 +24,6 @@ import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.KerasModel;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Test;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.InputStream;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
index 162dc235a..81103d315 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/Keras2ModelConfigurationTest.java
@@ -30,11 +30,9 @@ import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java
index 7072f1956..8ac231e12 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasInitilizationTest.java
@@ -25,6 +25,8 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.layers.core.KerasDense;
 import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInit;
+import org.deeplearning4j.nn.weights.WeightInitIdentity;
+import org.deeplearning4j.nn.weights.WeightInitVarScalingNormalFanIn;
 import org.junit.Test;
 
 import java.util.HashMap;
@@ -94,11 +96,11 @@ public class KerasInitilizationTest extends BaseDL4JTest {
                 WeightInit.RELU_UNIFORM.getWeightInitFunction(),
                 WeightInit.ONES.getWeightInitFunction(),
                 WeightInit.ZERO.getWeightInitFunction(),
-                WeightInit.IDENTITY.getWeightInitFunction(),
+                new WeightInitIdentity(0.2),
                 WeightInit.DISTRIBUTION.getWeightInitFunction(new NormalDistribution(mean, stdDev)),
                 WeightInit.DISTRIBUTION.getWeightInitFunction(new OrthogonalDistribution(gain)),
                 WeightInit.DISTRIBUTION.getWeightInitFunction(new ConstantDistribution(value)),
-                WeightInit.VAR_SCALING_NORMAL_FAN_IN.getWeightInitFunction()};
+                new WeightInitVarScalingNormalFanIn(0.2)};
     }
 
     private Distribution[] dl4jDistributions() {
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
index a015dc24f..b5d3c9ab6 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/configurations/KerasModelImportTest.java
@@ -17,22 +17,16 @@
 package org.deeplearning4j.nn.modelimport.keras.configurations;
 
 import lombok.extern.slf4j.Slf4j;
-import lombok.val;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.junit.Test;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
-import java.io.File;
 import java.io.IOException;
 
-import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertNotNull;
 
 /**
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasLambdaTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasLambdaTest.java
index 31611283f..97ae4318f 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasLambdaTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasLambdaTest.java
@@ -31,7 +31,6 @@ import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java
index 874931262..b33ff8d1f 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasModelEndToEndTest.java
@@ -24,22 +24,19 @@ import org.deeplearning4j.eval.ROCMultiClass;
 import org.deeplearning4j.gradientcheck.GradientCheckUtil;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.layers.IOutputLayer;
-import org.deeplearning4j.nn.conf.layers.CnnLossLayer;
 import org.deeplearning4j.nn.conf.layers.FeedForwardLayer;
 import org.deeplearning4j.nn.conf.layers.LossLayer;
 import org.deeplearning4j.nn.conf.layers.RnnOutputLayer;
 import org.deeplearning4j.nn.graph.ComputationGraph;
-import org.deeplearning4j.nn.layers.recurrent.LSTM;
-import org.deeplearning4j.nn.layers.recurrent.LastTimeStepLayer;
-import org.deeplearning4j.nn.layers.wrapper.BaseWrapperLayer;
-import org.deeplearning4j.nn.modelimport.keras.*;
+import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
+import org.deeplearning4j.nn.modelimport.keras.Hdf5Archive;
+import org.deeplearning4j.nn.modelimport.keras.KerasModel;
+import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelUtils;
 import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
 import org.deeplearning4j.nn.transferlearning.FineTuneConfiguration;
 import org.deeplearning4j.nn.transferlearning.TransferLearning;
-import org.deeplearning4j.nn.workspace.ArrayType;
-import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
@@ -47,27 +44,25 @@ import org.junit.rules.TemporaryFolder;
 import org.nd4j.linalg.activations.Activation;
 import org.nd4j.linalg.activations.IActivation;
 import org.nd4j.linalg.activations.impl.*;
-import org.nd4j.linalg.api.buffer.DataBuffer;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.linalg.learning.config.NoOp;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.StandardCopyOption;
-import java.util.*;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
 
-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 /**
  * Unit tests for end-to-end Keras model import.
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java
index f1fcc3ded..8bd6e779d 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000PredictTest.java
@@ -21,7 +21,6 @@ import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.graph.ComputationGraph;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.KerasLayer;
-import org.deeplearning4j.nn.modelimport.keras.KerasModel;
 import org.deeplearning4j.nn.modelimport.keras.KerasModelImport;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasSpaceToDepth;
 import org.deeplearning4j.nn.transferlearning.TransferLearning;
@@ -31,11 +30,8 @@ import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 
 import java.io.File;
-import java.nio.file.Files;
-import java.nio.file.StandardCopyOption;
 
 /**
  * Import previously stored YOLO9000 Keras net from https://github.com/allanzelener/YAD2K.
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000Test.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000Test.java
index 403610c10..dcfe7bfda 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000Test.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/e2e/KerasYolo9000Test.java
@@ -26,7 +26,6 @@ import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
index 0a408ac83..eccaeb536 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasAtrousConvolution1DTest.java
@@ -27,16 +27,11 @@ import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasAtrousC
 import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInitXavier;
 import org.junit.Test;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java
index 4737ec128..ff0ba8f3d 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasConvolution3DTest.java
@@ -28,9 +28,6 @@ import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasConvolu
 import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInitXavier;
 import org.junit.Test;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -39,7 +36,6 @@ import java.util.Map;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java
index f356f674f..1676f6136 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping1DTest.java
@@ -24,7 +24,6 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasCropping1D;
 import org.junit.Test;
 
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java
index 1a6f564b4..6ae3065b6 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasCropping3DTest.java
@@ -16,13 +16,11 @@
 
 package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
 
-import org.deeplearning4j.nn.conf.layers.convolutional.Cropping2D;
 import org.deeplearning4j.nn.conf.layers.convolutional.Cropping3D;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
-import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasCropping2D;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasCropping3D;
 import org.junit.Test;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java
index a79fab8da..364c50e72 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasDepthwiseConvolution2DTest.java
@@ -30,15 +30,11 @@ import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInitXavier;
 import org.junit.Test;
 import org.nd4j.base.Preconditions;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
 import java.util.*;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java
index 182054900..aec4278e2 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling1DTest.java
@@ -17,18 +17,14 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
 
 import org.deeplearning4j.nn.conf.layers.Upsampling1D;
-import org.deeplearning4j.nn.conf.layers.Upsampling2D;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling1D;
-import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling2D;
 import org.junit.Test;
 
-import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
index 3c7b30b57..cea117f8f 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasUpsampling2DTest.java
@@ -17,13 +17,11 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
 
 import org.deeplearning4j.nn.conf.layers.Upsampling2D;
-import org.deeplearning4j.nn.conf.layers.ZeroPadding1DLayer;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasUpsampling2D;
-import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasZeroPadding1D;
 import org.junit.Test;
 
 import java.util.ArrayList;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java
index 779d9ce51..c0a60defd 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/convolution/KerasZeroPadding3DTest.java
@@ -17,12 +17,10 @@
 package org.deeplearning4j.nn.modelimport.keras.layers.convolution;
 
 import org.deeplearning4j.nn.conf.layers.ZeroPadding3DLayer;
-import org.deeplearning4j.nn.conf.layers.ZeroPaddingLayer;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
-import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasZeroPadding2D;
 import org.deeplearning4j.nn.modelimport.keras.layers.convolutional.KerasZeroPadding3D;
 import org.junit.Test;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
index 334ab96d3..cca2515a8 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasDenseTest.java
@@ -26,16 +26,11 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInitXavier;
 import org.junit.Test;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasPermuteTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasPermuteTest.java
index 42cb79cfb..1f2400426 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasPermuteTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasPermuteTest.java
@@ -24,10 +24,12 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.PermutePreprocessor;
-import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
 import org.junit.Test;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshapeTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshapeTest.java
index dafafea1d..19d5ce623 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshapeTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/core/KerasReshapeTest.java
@@ -24,11 +24,11 @@ import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.UnsupportedKerasConfigurationException;
 import org.deeplearning4j.nn.modelimport.keras.preprocessors.ReshapePreprocessor;
+import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 import org.junit.Assert;
 import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
 
 import java.util.*;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java
index abeba3da7..b171e063f 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/embeddings/KerasEmbeddingTest.java
@@ -26,11 +26,7 @@ import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 
 import static org.junit.Assert.assertEquals;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java
index f8dc975ea..428d5d99e 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected1DTest.java
@@ -20,7 +20,6 @@ import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.dropout.Dropout;
 import org.deeplearning4j.nn.conf.inputs.InputType;
 import org.deeplearning4j.nn.conf.layers.LocallyConnected1D;
-import org.deeplearning4j.nn.conf.layers.LocallyConnected2D;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.KerasTestUtils;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
@@ -31,10 +30,8 @@ import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
-import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
 /**
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java
index b38b8f783..1ea69e06a 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/local/KerasLocallyConnected2DTest.java
@@ -27,15 +27,14 @@ import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.KerasLayerConfiguration;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.junit.Test;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java
index cb6a66155..9026c7308 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/pooling/KerasPooling3DTest.java
@@ -19,7 +19,6 @@ package org.deeplearning4j.nn.modelimport.keras.layers.pooling;
 import org.deeplearning4j.nn.conf.ConvolutionMode;
 import org.deeplearning4j.nn.conf.layers.PoolingType;
 import org.deeplearning4j.nn.conf.layers.Subsampling3DLayer;
-import org.deeplearning4j.nn.conf.layers.SubsamplingLayer;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras1LayerConfiguration;
 import org.deeplearning4j.nn.modelimport.keras.config.Keras2LayerConfiguration;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
index e2d0b7a03..3b82f14ae 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/layers/recurrent/KerasLSTMTest.java
@@ -33,14 +33,13 @@ import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInitXavier;
 import org.junit.Assert;
 import org.junit.Test;
-import org.nd4j.linalg.learning.regularization.L1Regularization;
-import org.nd4j.linalg.learning.regularization.L2Regularization;
-import org.nd4j.linalg.learning.regularization.Regularization;
 
-import java.util.*;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 
 /**
  * @author Max Pumperla
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/optimizers/OptimizerImport.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/optimizers/OptimizerImport.java
index 8819ca9b9..f2a693d9a 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/optimizers/OptimizerImport.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/optimizers/OptimizerImport.java
@@ -16,15 +16,12 @@
 
 package org.deeplearning4j.nn.modelimport.keras.optimizers;
 
-import org.deeplearning4j.config.DL4JSystemProperties;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.KerasModel;
 import org.deeplearning4j.nn.modelimport.keras.KerasSequentialModel;
-import org.deeplearning4j.nn.modelimport.keras.e2e.KerasModelEndToEndTest;
 import org.deeplearning4j.nn.modelimport.keras.utils.KerasModelBuilder;
 import org.deeplearning4j.util.DL4JFileUtils;
 import org.junit.Test;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
@@ -32,8 +29,6 @@ import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.StandardCopyOption;
 
-import static java.io.File.createTempFile;
-
 public class OptimizerImport extends BaseDL4JTest {
 
     @Test
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGeneratorImportTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGeneratorImportTest.java
index 8753f772c..577e089f9 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGeneratorImportTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/sequence/TimeSeriesGeneratorImportTest.java
@@ -18,9 +18,7 @@ package org.deeplearning4j.nn.modelimport.keras.preprocessing.sequence;
 
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
-import org.deeplearning4j.nn.modelimport.keras.preprocessing.text.KerasTokenizer;
 import org.junit.Test;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.IOException;
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerImportTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerImportTest.java
index f229ec813..45114685b 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerImportTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerImportTest.java
@@ -19,15 +19,11 @@ package org.deeplearning4j.nn.modelimport.keras.preprocessing.text;
 import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.deeplearning4j.nn.modelimport.keras.exceptions.InvalidKerasConfigurationException;
 import org.junit.Test;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.IOException;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.*;
 
 /**
  * Import Keras Tokenizer
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerTest.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerTest.java
index bbcd00372..a4fb6994b 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerTest.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/preprocessing/text/TokenizerTest.java
@@ -20,7 +20,6 @@ import org.deeplearning4j.nn.modelimport.keras.BaseDL4JTest;
 import org.junit.Test;
 import org.nd4j.linalg.api.ndarray.INDArray;
 
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
diff --git a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/weights/KerasWeightSettingTests.java b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/weights/KerasWeightSettingTests.java
index 18cf3305d..7791e3417 100644
--- a/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/weights/KerasWeightSettingTests.java
+++ b/deeplearning4j/deeplearning4j-modelimport/src/test/java/org/deeplearning4j/nn/modelimport/keras/weights/KerasWeightSettingTests.java
@@ -29,7 +29,6 @@ import org.junit.Test;
 import org.junit.rules.TemporaryFolder;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
-import org.nd4j.linalg.io.ClassPathResource;
 import org.nd4j.resources.Resources;
 
 import java.io.File;
diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/pom.xml b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/pom.xml
index 7477c7794..38ee4204c 100644
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/pom.xml
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbor-server/pom.xml
@@ -77,71 +77,6 @@
             <version>${project.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>com.google.protobuf</groupId>
-            <artifactId>protobuf-java</artifactId>
-            <version>${google.protobuf.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>joda-time</groupId>
-            <artifactId>joda-time</artifactId>
-            <version>${jodatime.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.hibernate</groupId>
-            <artifactId>hibernate-validator</artifactId>
-            <version>${hibernate.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-library</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-reflect</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.yaml</groupId>
-            <artifactId>snakeyaml</artifactId>
-            <version>${snakeyaml.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-jdk8</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-jsr310</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe</groupId>
-             <artifactId>config</artifactId>
-            <version>${typesafe.config.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.typesafe.play</groupId>
             <artifactId>play-java_2.11</artifactId>
diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbors-client/pom.xml b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbors-client/pom.xml
index 57248c559..d6b64b025 100644
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbors-client/pom.xml
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/deeplearning4j-nearestneighbors-client/pom.xml
@@ -31,21 +31,6 @@
 
 
     <dependencies>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpclient</artifactId>
-            <version>${httpclient.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpcore</artifactId>
-            <version>${httpcore.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpmime</artifactId>
-            <version>${httpmime.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.mashape.unirest</groupId>
             <artifactId>unirest-java</artifactId>
diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/quadtree/QuadTree.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/quadtree/QuadTree.java
index f1cc2e304..0fbf8afec 100644
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/quadtree/QuadTree.java
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/quadtree/QuadTree.java
@@ -29,7 +29,7 @@ import static java.lang.Math.max;
  * QuadTree: <a href="http://en.wikipedia.org/wiki/Quadtree">http://en.wikipedia.org/wiki/Quadtree</a>
  *
  * Reference impl based on the paper by:
- * <a href="http://arxiv.org/pdf/1301.3342v2.pdf">http://arxiv.org/pdf/1301.3342v2.pdf</a>
+ * <a href="https://arxiv.org/pdf/1301.3342v2.pdf">https://arxiv.org/pdf/1301.3342v2.pdf</a>
  *
  * Primarily focused on 2 dimensions, may expand later if there's a reason.
  *
diff --git a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/util/MathUtils.java b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/util/MathUtils.java
index ce6ddcff7..792231c7e 100755
--- a/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/util/MathUtils.java
+++ b/deeplearning4j/deeplearning4j-nearestneighbors-parent/nearestneighbor-core/src/main/java/org/deeplearning4j/clustering/util/MathUtils.java
@@ -86,7 +86,7 @@ public class MathUtils {
 
 
     /**
-     * See: http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2
+     * See: https://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2
      * @param v the number to getFromOrigin the next power of 2 for
      * @return the next power of 2 for the passed in value
      */
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-chinese/pom.xml b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-chinese/pom.xml
index b72cb721d..35eb2903d 100644
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-chinese/pom.xml
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-chinese/pom.xml
@@ -52,12 +52,6 @@
             <artifactId>deeplearning4j-nlp</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.nutz</groupId>
-            <artifactId>nutz</artifactId>
-            <version>1.r.58</version>
-            <scope>provided</scope>
-        </dependency>
         <dependency>
             <groupId>org.nlpcn</groupId>
             <artifactId>nlp-lang</artifactId>
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/pom.xml b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/pom.xml
index d27625e9f..44fbbcf9d 100644
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/pom.xml
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp-uima/pom.xml
@@ -33,26 +33,6 @@
     </properties>
 
     <dependencies>
-        <dependency>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-            <version>${commons-logging.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-core</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-context</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-beans</artifactId>
-            <version>${spring.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.cleartk</groupId>
             <artifactId>cleartk-snowball</artifactId>
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml
index 62c0c73f9..3f367689c 100644
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/pom.xml
@@ -54,11 +54,6 @@
       <scope>test</scope>
     </dependency>
 
-    <dependency>
-      <groupId>org.objenesis</groupId>
-      <artifactId>objenesis</artifactId>
-      <version>${objenesis.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
@@ -66,16 +61,6 @@
       <scope>test</scope>
     </dependency>
 
-    <!-- TSNE -->
-    <!-- (Previously: dropwizard deps) -->
-
-
-    <dependency>
-      <groupId>org.nd4j</groupId>
-      <artifactId>nd4j-jackson</artifactId>
-      <version>${nd4j.version}</version>
-    </dependency>
-
     <dependency>
       <groupId>ch.qos.logback</groupId>
       <artifactId>logback-classic</artifactId>
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/PopularityWalker.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/PopularityWalker.java
index b48ddb2ea..05d69e94c 100644
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/PopularityWalker.java
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/PopularityWalker.java
@@ -42,7 +42,7 @@ import java.util.*;
  * Instead of rand walks, this walker produces walks based on number of edges coming into each node.
  * This allows you to build walks filtering too rare nodes, or too popular nodes, depending on your demands.
  *
- * Original DeepWalk paper: <a href="http://arxiv.org/pdf/1403.6652v2">http://arxiv.org/pdf/1403.6652v2</a>
+ * Original DeepWalk paper: <a href="https://arxiv.org/pdf/1403.6652v2">https://arxiv.org/pdf/1403.6652v2</a>
  * @author raver119@gmail.com
  */
 public class PopularityWalker<T extends SequenceElement> extends RandomWalker<T> implements GraphWalker<T> {
diff --git a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/RandomWalker.java b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/RandomWalker.java
index 922dbbe27..b422a52d1 100644
--- a/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/RandomWalker.java
+++ b/deeplearning4j/deeplearning4j-nlp-parent/deeplearning4j-nlp/src/main/java/org/deeplearning4j/models/sequencevectors/graph/walkers/impl/RandomWalker.java
@@ -37,7 +37,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 /**
  * This is Random-based walker for SequenceVectors-based DeepWalk implementation
  *
- * Original DeepWalk paper: <a href="http://arxiv.org/pdf/1403.6652v2">http://arxiv.org/pdf/1403.6652v2</a>
+ * Original DeepWalk paper: <a href="https://arxiv.org/pdf/1403.6652v2">https://arxiv.org/pdf/1403.6652v2</a>
  *
  * @author AlexDBlack
  * @author raver119@gmail.com
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java
index 01bd9cf3d..05b1c6638 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/GradientNormalization.java
@@ -52,7 +52,7 @@ package org.deeplearning4j.nn.conf;
  * </ul>
  * Thus, the l2 norm of the scaled gradients will not exceed the specified threshold, though may be smaller than it<br>
  * See: Pascanu, Mikolov, Bengio (2012), <i>On the difficulty of training Recurrent Neural Networks</i>,
- * <a href="http://arxiv.org/abs/1211.5063">http://arxiv.org/abs/1211.5063</a><br>
+ * <a href="https://arxiv.org/abs/1211.5063">https://arxiv.org/abs/1211.5063</a><br>
  * Threshold for clipping can be set in Layer configuration, using gradientNormalizationThreshold(double threshold)
  * </p>
  *
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java
index 8959c2349..dbe7143d4 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/distribution/OrthogonalDistribution.java
@@ -23,7 +23,7 @@ import org.nd4j.shade.jackson.annotation.JsonProperty;
 
 /**
  * Orthogonal distribution, with gain parameter.<br>
- * See <a href="http://arxiv.org/abs/1312.6120">http://arxiv.org/abs/1312.6120</a> for details
+ * See <a href="https://arxiv.org/abs/1312.6120">https://arxiv.org/abs/1312.6120</a> for details
  *
  */
 @EqualsAndHashCode(callSuper = false)
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
index 4c470fec5..f95421585 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/BatchNormalization.java
@@ -236,7 +236,7 @@ public class BatchNormalization extends FeedForwardLayer {
 
         /**
          * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in <a
-         * href="http://arxiv.org/pdf/1502.03167v3.pdf">http://arxiv.org/pdf/1502.03167v3.pdf</a>) to reduce/avoid
+         * href="https://arxiv.org/pdf/1502.03167v3.pdf">https://arxiv.org/pdf/1502.03167v3.pdf</a>) to reduce/avoid
          * underflow issues.<br> Default: 1e-5
          */
         protected double eps = 1e-5;
@@ -365,7 +365,7 @@ public class BatchNormalization extends FeedForwardLayer {
 
         /**
          * Epsilon value for batch normalization; small floating point value added to variance (algorithm 1 in <a
-         * href="http://arxiv.org/pdf/1502.03167v3.pdf">http://arxiv.org/pdf/1502.03167v3.pdf</a>) to reduce/avoid
+         * href="https://arxiv.org/pdf/1502.03167v3.pdf">https://arxiv.org/pdf/1502.03167v3.pdf</a>) to reduce/avoid
          * underflow issues.<br> Default: 1e-5
          *
          * @param eps Epsilon values to use
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java
index d9655a58f..e17535acc 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/layers/samediff/SameDiffLayer.java
@@ -16,13 +16,11 @@
 
 package org.deeplearning4j.nn.conf.layers.samediff;
 
-import lombok.Data;
-import lombok.EqualsAndHashCode;
-import lombok.Getter;
-import lombok.Setter;
+import lombok.*;
 import org.deeplearning4j.nn.api.Layer;
 import org.deeplearning4j.nn.api.MaskState;
 import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
+import org.deeplearning4j.nn.weights.IWeightInit;
 import org.deeplearning4j.nn.weights.WeightInit;
 import org.deeplearning4j.optimize.api.TrainingListener;
 import org.nd4j.autodiff.samediff.SDVariable;
@@ -32,6 +30,7 @@ import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.primitives.Pair;
 
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Map;
 
 /**
@@ -58,10 +57,12 @@ import java.util.Map;
 public abstract class SameDiffLayer extends AbstractSameDiffLayer {
 
     protected WeightInit weightInit;
+    protected Map<String,IWeightInit> paramWeightInit;
 
     protected SameDiffLayer(Builder builder) {
         super(builder);
         this.weightInit = builder.weightInit;
+        this.paramWeightInit = builder.paramWeightInit;
     }
 
     protected SameDiffLayer() {
@@ -115,6 +116,7 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
     public static abstract class Builder<T extends Builder<T>> extends AbstractSameDiffLayer.Builder<T> {
 
         protected WeightInit weightInit = WeightInit.XAVIER;
+        protected Map<String,IWeightInit> paramWeightInit;
 
         /**
          * @param weightInit Weight initialization to use for the layer
@@ -123,5 +125,12 @@ public abstract class SameDiffLayer extends AbstractSameDiffLayer {
             this.setWeightInit(weightInit);
             return (T) this;
         }
+
+        public T weightInit(@NonNull String param, @NonNull IWeightInit weightInit){
+            if(paramWeightInit == null)
+                paramWeightInit = new HashMap<>();
+            paramWeightInit.put(param, weightInit);
+            return (T) this;
+        }
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
index d32488363..a90218946 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/conf/serde/BaseNetConfigDeserializer.java
@@ -53,8 +53,8 @@ import java.util.Map;
  * We deserialize the config using the default deserializer, then handle the new IUpdater (which will be null for
  * 0.8.0 and earlier configs) if necessary
  *
- * Overall design: <a href="http://stackoverflow.com/questions/18313323/how-do-i-call-the-default-deserializer-from-a-custom-deserializer-in-jackson">
- *     http://stackoverflow.com/questions/18313323/how-do-i-call-the-default-deserializer-from-a-custom-deserializer-in-jackson</a>
+ * Overall design: <a href="https://stackoverflow.com/questions/18313323/how-do-i-call-the-default-deserializer-from-a-custom-deserializer-in-jackson">
+ *     https://stackoverflow.com/questions/18313323/how-do-i-call-the-default-deserializer-from-a-custom-deserializer-in-jackson</a>
  *
  * @author Alex Black
  */
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
index 32d7bfb73..1be13ddf3 100755
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/graph/ComputationGraph.java
@@ -2734,7 +2734,12 @@ public class ComputationGraph implements Serializable, Model, NeuralNetwork {
                         if (setVertexEpsilon[gv.getVertexIndex()]) {
                             //This vertex: must output to multiple vertices... we want to add the epsilons here
                             INDArray currentEps = gv.getEpsilon();
-                            gv.setEpsilon(currentEps.addi(epsilons[j++]));  //TODO is this always safe?
+                            if(currentEps == null){
+                                //Edge case: this can be null for dual embedding layer case - in -> e1, in -> e2
+                                gv.setEpsilon(currentEps);
+                            } else {
+                                gv.setEpsilon(currentEps.addi(epsilons[j++]));  //TODO is this always safe?
+                            }
                         } else {
                             gv.setEpsilon(epsilons[j++]);
                         }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java
index 0d9ae18e7..2e8c04aa3 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/mkldnn/MKLDNNBatchNormHelper.java
@@ -67,17 +67,17 @@ public class MKLDNNBatchNormHelper implements BatchNormalizationHelper {
                                                      INDArray beta, INDArray dGammaView, INDArray dBetaView, double eps, LayerWorkspaceMgr workspaceMgr) {
         if(input.dataType() != DataType.FLOAT)
             return null;    //MKL-DNN only supports float
-        /*
+
         //TODO FIXME - AB 2019/11/01 - https://github.com/eclipse/deeplearning4j/issues/8335
         List<INDArray> args = new ArrayList<>();
         args.add(input);
         args.add(meanCache);
         args.add(varCache);
-        args.add(epsilon);
         if(gamma != null)
             args.add(gamma.reshape(gamma.length()));
         if(beta != null)
             args.add(beta.reshape(beta.length()));
+        args.add(epsilon);
 
 
         DynamicCustomOp op = DynamicCustomOp.builder("batchnorm_bp")
@@ -110,8 +110,6 @@ public class MKLDNNBatchNormHelper implements BatchNormalizationHelper {
         g.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView);
 
         return new Pair<>(g, epsAtInput);
-         */
-        return null;
     }
 
     @Override
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
index 8c8f329ea..cd070185c 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/normalization/BatchNormalization.java
@@ -49,8 +49,8 @@ import java.util.*;
 /**
  * Batch normalization layer.<br>
  * Rerences:<br>
- *  <a href="http://arxiv.org/pdf/1502.03167v3.pdf">http://arxiv.org/pdf/1502.03167v3.pdf</a><br>
- *  <a href="http://arxiv.org/pdf/1410.7455v8.pdf">http://arxiv.org/pdf/1410.7455v8.pdf</a><br>
+ *  <a href="https://arxiv.org/pdf/1502.03167v3.pdf">https://arxiv.org/pdf/1502.03167v3.pdf</a><br>
+ *  <a href="https://arxiv.org/pdf/1410.7455v8.pdf">https://arxiv.org/pdf/1410.7455v8.pdf</a><br>
  *  <a href="https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html">
  *      https://kratzert.github.io/2016/02/12/understanding-the-gradient-flow-through-the-batch-normalization-layer.html</a>
  *
@@ -327,7 +327,7 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
             batchMean = input.mean(0, 2, 3);
             batchVar = input.var(false, 0, 2, 3);
         } else {
-            // TODO setup BatchNorm for RNN http://arxiv.org/pdf/1510.01378v1.pdf
+            // TODO setup BatchNorm for RNN https://arxiv.org/pdf/1510.01378v1.pdf
             throw new IllegalStateException( "The layer prior to BatchNorm in the configuration is not currently supported. " + layerId());
         }
 
@@ -476,7 +476,7 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
 
         // xHat = (x-xmean) / sqrt(var + epsilon)
         //Note that for CNNs, mean and variance are calculated per feature map (i.e., per activation) rather than per activation
-        //Pg5 of http://arxiv.org/pdf/1502.03167v3.pdf
+        //Pg5 of https://arxiv.org/pdf/1502.03167v3.pdf
         // "For convolutional layers, we additionally want the normalization to obey the convolutional property – so that
         //  different elements of the same feature map, at different locations, are normalized in the same way. To achieve
         //  this, we jointly normalize all the activations in a minibatch, over all locations."
@@ -560,7 +560,7 @@ public class BatchNormalization extends BaseLayer<org.deeplearning4j.nn.conf.lay
                 activations = Nd4j.getExecutioner().exec(new BroadcastAddOp(activations, beta, activations, 1));
             }
         } else {
-            // TODO setup BatchNorm for RNN http://arxiv.org/pdf/1510.01378v1.pdf
+            // TODO setup BatchNorm for RNN https://arxiv.org/pdf/1510.01378v1.pdf
             throw new IllegalStateException(
                             "The layer prior to BatchNorm in the configuration is not currently supported. "
                                             + layerId());
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
index 6fc96dc80..e0fd80842 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesBidirectionalLSTM.java
@@ -40,7 +40,7 @@ import java.util.Map;
  * <a href="http://www.cs.toronto.edu/~graves/phd.pdf">http://www.cs.toronto.edu/~graves/phd.pdf</a>
  * See also for full/vectorized equations (and a comparison to other LSTM variants):
  * Greff et al. 2015, "LSTM: A Search Space Odyssey", pg11. This is the "vanilla" variant in said paper
- * <a href="http://arxiv.org/pdf/1503.04069.pdf">http://arxiv.org/pdf/1503.04069.pdf</a>
+ * <a href="https://arxiv.org/pdf/1503.04069.pdf">https://arxiv.org/pdf/1503.04069.pdf</a>
  *
  * A high level description of bidirectional LSTM can be found from
  * "Hybrid Speech Recognition with Deep Bidirectional LSTM"
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
index 13f30b8bb..b112672f9 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/GravesLSTM.java
@@ -34,7 +34,7 @@ import org.deeplearning4j.nn.workspace.LayerWorkspaceMgr;
  * <a href="http://www.cs.toronto.edu/~graves/phd.pdf">http://www.cs.toronto.edu/~graves/phd.pdf</a>
  * See also for full/vectorized equations (and a comparison to other LSTM variants):
  * Greff et al. 2015, "LSTM: A Search Space Odyssey", pg11. This is the "vanilla" variant in said paper
- * <a href="http://arxiv.org/pdf/1503.04069.pdf">http://arxiv.org/pdf/1503.04069.pdf</a>
+ * <a href="https://arxiv.org/pdf/1503.04069.pdf">https://arxiv.org/pdf/1503.04069.pdf</a>
  *
  * @author Alex Black
  * @see LSTM LSTM class, for the version without peephole connections
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
index 692713f6e..a55a19e46 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTM.java
@@ -38,7 +38,7 @@ import org.nd4j.util.OneTimeLogger;
  *
  * See also for full/vectorized equations (and a comparison to other LSTM variants):
  * Greff et al. 2015, "LSTM: A Search Space Odyssey", pg11. This is the "no peephole" variant in said paper
- * <a href="http://arxiv.org/pdf/1503.04069.pdf">http://arxiv.org/pdf/1503.04069.pdf</a>
+ * <a href="https://arxiv.org/pdf/1503.04069.pdf">https://arxiv.org/pdf/1503.04069.pdf</a>
  *
  * @author Alex Black
  * @see GravesLSTM GravesLSTM class, for the version with peephole connections
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
index 86079aead..c733ef6c2 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/layers/recurrent/LSTMHelpers.java
@@ -68,7 +68,7 @@ import static org.nd4j.linalg.indexing.NDArrayIndex.*;
  * <p>
  * When 'hasPeepholeConnections' is true, this is the "vanilla" variant in said paper<br>
  * When 'hasPeepholeConnections' is false, this is the "no peephole" variant<br>
- * <a href="http://arxiv.org/pdf/1503.04069.pdf">http://arxiv.org/pdf/1503.04069.pdf</a>
+ * <a href="https://arxiv.org/pdf/1503.04069.pdf">https://arxiv.org/pdf/1503.04069.pdf</a>
  *
  *
  * @author Alex Black (LSTM implementations)
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java
index 076fa2ac8..b25121cd3 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitIdentity.java
@@ -16,11 +16,14 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.INDArrayIndex;
 import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.shade.jackson.annotation.JsonProperty;
 
 import java.util.Arrays;
 
@@ -32,9 +35,17 @@ import java.util.Arrays;
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitIdentity implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitIdentity(@JsonProperty("scale") Double scale){
+        this.scale = scale;
+    }
+
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
         if (shape[0] != shape[1]) {
@@ -59,6 +70,11 @@ public class WeightInitIdentity implements IWeightInit {
         } else {
             ret = Nd4j.createUninitialized(shape, order).assign(Nd4j.eye(shape[0]));
         }
+
+        if(scale != null){
+            ret.muli(scale);
+        }
+
         INDArray flat = Nd4j.toFlattened(order, ret);
         paramView.assign(flat);
         return paramView.reshape(order, shape);
@@ -82,13 +98,16 @@ public class WeightInitIdentity implements IWeightInit {
             indArrayIndices[i] = NDArrayIndex.point(shape[i] / 2);
         }
 
-        paramView.assign(Nd4j.zeros(paramView.shape()));
+        paramView.assign(0);
         final INDArray params =paramView.reshape(order, shape);
         for (int i = 0; i < shape[0]; i++) {
             indArrayIndices[0] = NDArrayIndex.point(i);
             indArrayIndices[1] = NDArrayIndex.point(i);
             params.put(indArrayIndices, Nd4j.ones(1));
         }
+        if(scale != null){
+            params.muli(scale);
+        }
         return params;
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java
index b110bc5a0..17034d408 100755
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitUtil.java
@@ -19,6 +19,7 @@ package org.deeplearning4j.nn.weights;
 
 import org.apache.commons.math3.util.FastMath;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution;
 import org.nd4j.linalg.api.rng.distribution.Distribution;
 import org.nd4j.linalg.api.rng.distribution.impl.OrthogonalDistribution;
 import org.nd4j.linalg.factory.Nd4j;
@@ -146,14 +147,13 @@ public class WeightInitUtil {
                 paramView.assign(flat);
                 break;
             case VAR_SCALING_NORMAL_FAN_IN:
-                // TODO: needs to be truncated normal to match keras.
-                Nd4j.randn(paramView).divi(FastMath.sqrt(fanIn));
+                Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, Math.sqrt(1.0 / fanIn)));
                 break;
             case VAR_SCALING_NORMAL_FAN_OUT:
-                Nd4j.randn(paramView).divi(FastMath.sqrt(fanOut));
+                Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, Math.sqrt(1.0 / fanOut)));
                 break;
             case VAR_SCALING_NORMAL_FAN_AVG:
-                Nd4j.randn(paramView).divi(FastMath.sqrt((fanIn + fanOut) / 2));
+                Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, Math.sqrt(2.0 / (fanIn + fanOut))));
                 break;
             case VAR_SCALING_UNIFORM_FAN_IN:
                 double scalingFanIn = 3.0 / Math.sqrt(fanIn);
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java
index 0be5af0e9..3b9698f10 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanAvg.java
@@ -16,22 +16,39 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.apache.commons.math3.util.FastMath;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution;
 import org.nd4j.linalg.factory.Nd4j;
 
 /**
- * Gaussian distribution with mean 0, variance 1.0/((fanIn + fanOut)/2)
+ * Truncated aussian distribution with mean 0, variance 1.0/((fanIn + fanOut)/2)
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitVarScalingNormalFanAvg implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingNormalFanAvg(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
-        Nd4j.randn(paramView).divi(FastMath.sqrt((fanIn + fanOut) / 2));
+        double std;
+        if(scale == null){
+            std = Math.sqrt(2.0 / (fanIn + fanOut));
+        } else {
+            std = Math.sqrt(2.0 * scale / (fanIn + fanOut));
+        }
+
+        Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std));
         return paramView.reshape(order, shape);
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java
index 3f89ff015..dca457de3 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanIn.java
@@ -16,23 +16,38 @@
 
 package org.deeplearning4j.nn.weights;
 
-import lombok.EqualsAndHashCode;
-import org.apache.commons.math3.util.FastMath;
+import lombok.Data;
+import lombok.NoArgsConstructor;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution;
 import org.nd4j.linalg.factory.Nd4j;
 
 /**
- * Gaussian distribution with mean 0, variance 1.0/(fanIn)
+ * Gaussian distribution with mean 0, variance {@code 1.0/(fanIn)}<br>
+ * If a scale is provided, use variance {@code scale/(fanIn)} instead
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitVarScalingNormalFanIn implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingNormalFanIn(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
-        // TODO: needs to be truncated normal to match keras.
-        Nd4j.randn(paramView).divi(FastMath.sqrt(fanIn));
+        double std;
+        if(scale == null){
+            std = Math.sqrt(1.0 / fanIn);
+        } else {
+            std = Math.sqrt(scale / fanIn);
+        }
+
+        Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std));
         return paramView.reshape(order, shape);
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java
index 6369a19c6..0af43ac88 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingNormalFanOut.java
@@ -16,22 +16,40 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.apache.commons.math3.util.FastMath;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.random.impl.TruncatedNormalDistribution;
 import org.nd4j.linalg.factory.Nd4j;
 
 /**
- * Gaussian distribution with mean 0, variance 1.0/(fanOut)
+ * Truncated normal distribution with mean 0, variance 1.0/(fanOut)<br>
+ * If a scale is provided, variance is scale / fanOut
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitVarScalingNormalFanOut implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingNormalFanOut(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
-        Nd4j.randn(paramView).divi(FastMath.sqrt(fanOut));
+        double std;
+        if(scale == null){
+            std = Math.sqrt(1.0 / fanOut);
+        } else {
+            std = Math.sqrt(scale / fanOut);
+        }
+
+        Nd4j.exec(new TruncatedNormalDistribution(paramView, 0.0, std));
         return paramView.reshape(order, shape);
     }
 }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java
index afb1a1dc8..f2e050e6e 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanAvg.java
@@ -16,7 +16,9 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 
@@ -25,12 +27,22 @@ import org.nd4j.linalg.factory.Nd4j;
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitVarScalingUniformFanAvg implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingUniformFanAvg(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
         double scalingFanAvg = 3.0 / Math.sqrt((fanIn + fanOut) / 2);
+        if(scale != null)
+            scalingFanAvg *= scale;
+
         Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanAvg, scalingFanAvg));
         return paramView.reshape(order, shape);
     }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java
index 0cf26ecc6..7135394a7 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanIn.java
@@ -16,21 +16,34 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 
 /**
- * Uniform U[-a,a] with a=3.0/(fanIn)
+ * Uniform U[-a,a] with a=3.0/(fanIn)<br>
+ * If a scale is provided, a = 3.0 * scale / (fanIn)
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@NoArgsConstructor
+@Data
 public class WeightInitVarScalingUniformFanIn implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingUniformFanIn(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
         double scalingFanIn = 3.0 / Math.sqrt(fanIn);
+        if(scale != null)
+            scalingFanIn *= scale;
+
         Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanIn, scalingFanIn));
         return paramView.reshape(order, shape);
     }
diff --git a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java
index 2d3b116fc..09bf2053d 100644
--- a/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java
+++ b/deeplearning4j/deeplearning4j-nn/src/main/java/org/deeplearning4j/nn/weights/WeightInitVarScalingUniformFanOut.java
@@ -16,21 +16,33 @@
 
 package org.deeplearning4j.nn.weights;
 
+import lombok.Data;
 import lombok.EqualsAndHashCode;
+import lombok.NoArgsConstructor;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 
 /**
- * Uniform U[-a,a] with a=3.0/(fanOut)
+ * Uniform U[-a,a] with a=3.0/(fanOut)<br>
+ * If a scale is provided, a = 3.0 * scale / fanOut
  *
  * @author Adam Gibson
  */
-@EqualsAndHashCode
+@Data
+@NoArgsConstructor
 public class WeightInitVarScalingUniformFanOut implements IWeightInit {
 
+    private Double scale;
+
+    public WeightInitVarScalingUniformFanOut(Double scale){
+        this.scale = scale;
+    }
+
     @Override
     public INDArray init(double fanIn, double fanOut, long[] shape, char order, INDArray paramView) {
         double scalingFanOut = 3.0 / Math.sqrt(fanOut);
+        if(scale != null)
+            scalingFanOut *= scale;
         Nd4j.rand(paramView, Nd4j.getDistributions().createUniform(-scalingFanOut, scalingFanOut));
         return paramView.reshape(order, shape);
     }
diff --git a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/pom.xml b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/pom.xml
index 7c9967ef8..94f66b405 100644
--- a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/pom.xml
+++ b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/pom.xml
@@ -44,184 +44,48 @@
   </properties>
 
   <dependencies>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-reflect</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>commons-logging</groupId>
-      <artifactId>commons-logging</artifactId>
-      <version>${commons-logging.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>joda-time</groupId>
-      <artifactId>joda-time</artifactId>
-      <version>${jodatime.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpclient</artifactId>
-      <version>${httpclient.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpcore</artifactId>
-      <version>${httpcore.version}</version>
-    </dependency>
     <dependency>
       <groupId>com.amazonaws</groupId>
       <artifactId>aws-java-sdk</artifactId>
       <version>1.11.24</version>
     </dependency>
-    <dependency>
-      <groupId>org.deeplearning4j</groupId>
-      <artifactId>deeplearning4j-core</artifactId>
-      <version>${project.parent.version}</version>
-    </dependency>
     <dependency>
       <groupId>args4j</groupId>
       <artifactId>args4j</artifactId>
       <version>2.32</version>
     </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.nd4j</groupId>
+      <artifactId>nd4j-api</artifactId>
+      <version>${nd4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.deeplearning4j</groupId>
+      <artifactId>deeplearning4j-util</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>com.jcraft</groupId>
       <artifactId>jsch</artifactId>
       <version>${jsch.version}</version>
     </dependency>
+
     <dependency>
-      <groupId>com.google.inject</groupId>
-      <artifactId>guice</artifactId>
-      <version>${guice.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <version>${google.protobuf.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <version>${commons-codec.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-collections</groupId>
-      <artifactId>commons-collections</artifactId>
-      <version>${commons-collections.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>${commons-io.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-lang</groupId>
-      <artifactId>commons-lang</artifactId>
-      <version>${commons-lang.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-net</groupId>
-      <artifactId>commons-net</artifactId>
-      <version>${commons-net.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.xml.bind</groupId>
-      <artifactId>jaxb-core</artifactId>
-      <version>${jaxb.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.sun.xml.bind</groupId>
-      <artifactId>jaxb-impl</artifactId>
-      <version>${jaxb.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>io.netty</groupId>
-      <artifactId>netty</artifactId>
-      <version>${netty.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-core</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-      <version>${jackson.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>${servlet.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-compress</artifactId>
-      <version>${commons-compress.version}</version>
+      <groupId>org.threadly</groupId>
+      <artifactId>threadly</artifactId>
+      <version>${threadly.version}</version>
     </dependency>
+
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
       <version>${commons-lang3.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-math3</artifactId>
-      <version>${commons-math3.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-recipes</artifactId>
-      <version>${curator.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.typesafe</groupId>
-       <artifactId>config</artifactId>
-      <version>${typesafe.config.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.11</artifactId>
-      <version>${spark.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.code.findbugs</groupId>
-          <artifactId>jsr305</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>jul-to-slf4j</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-           <artifactId>jcl-over-slf4j</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-log4j12</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>log4j</groupId>
-          <artifactId>log4j</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-        <groupId>org.threadly</groupId>
-        <artifactId>threadly</artifactId>
-        <version>${threadly.version}</version>
-    </dependency>
   </dependencies>
 
   <profiles>
diff --git a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/src/main/java/org/deeplearning4j/aws/emr/SparkEMRClient.java b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/src/main/java/org/deeplearning4j/aws/emr/SparkEMRClient.java
index b1476fa3b..d179cca09 100644
--- a/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/src/main/java/org/deeplearning4j/aws/emr/SparkEMRClient.java
+++ b/deeplearning4j/deeplearning4j-scaleout/deeplearning4j-aws/src/main/java/org/deeplearning4j/aws/emr/SparkEMRClient.java
@@ -27,8 +27,8 @@ import lombok.AllArgsConstructor;
 import lombok.Data;
 import lombok.NoArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
-import org.apache.commons.lang.RandomStringUtils;
-import org.apache.spark.api.java.function.Function;
+import org.apache.commons.lang3.RandomStringUtils;
+import org.nd4j.linalg.function.Function;
 
 import java.io.File;
 import java.util.*;
@@ -157,7 +157,7 @@ public class SparkEMRClient {
     private void submitJob(AmazonElasticMapReduce emr, String mainClass, List<String> args, Map<String, String> sparkConfs, File uberJar) throws Exception {
         AmazonS3URI s3Jar = new AmazonS3URI(sparkS3JarFolder + "/" + uberJar.getName());
         log.info(String.format("Placing uberJar %s to %s", uberJar.getPath(), s3Jar.toString()));
-        PutObjectRequest putRequest = sparkS3PutObjectDecorator.call(
+        PutObjectRequest putRequest = sparkS3PutObjectDecorator.apply(
                 new PutObjectRequest(s3Jar.getBucket(), s3Jar.getKey(), uberJar)
         );
         sparkS3ClientBuilder.build().putObject(putRequest);
@@ -289,7 +289,7 @@ public class SparkEMRClient {
         // This should allow the user to decorate the put call to add metadata to the jar put command, such as security groups,
         protected Function<PutObjectRequest, PutObjectRequest> sparkS3PutObjectDecorator = new Function<PutObjectRequest, PutObjectRequest>() {
             @Override
-            public PutObjectRequest call(PutObjectRequest putObjectRequest) throws Exception {
+            public PutObjectRequest apply(PutObjectRequest putObjectRequest) {
                 return putObjectRequest;
             }
         };
diff --git a/deeplearning4j/deeplearning4j-scaleout/spark/pom.xml b/deeplearning4j/deeplearning4j-scaleout/spark/pom.xml
index 579e042ab..a24676022 100644
--- a/deeplearning4j/deeplearning4j-scaleout/spark/pom.xml
+++ b/deeplearning4j/deeplearning4j-scaleout/spark/pom.xml
@@ -116,7 +116,6 @@
     </build>
 
     <dependencies>
-
         <!-- ND4J Shaded Jackson Dependency -->
         <dependency>
             <groupId>org.nd4j</groupId>
@@ -139,82 +138,6 @@
             <artifactId>scala-reflect</artifactId>
             <version>${scala.version}</version>
         </dependency>
-
-        <dependency>
-            <groupId>com.google.inject</groupId>
-            <artifactId>guice</artifactId>
-            <version>${guice.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.google.protobuf</groupId>
-            <artifactId>protobuf-java</artifactId>
-            <version>${google.protobuf.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons-codec.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-collections</groupId>
-            <artifactId>commons-collections</artifactId>
-            <version>${commons-collections.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <version>${commons-io.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-lang</groupId>
-            <artifactId>commons-lang</artifactId>
-            <version>${commons-lang.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-net</groupId>
-            <artifactId>commons-net</artifactId>
-            <version>${commons-net.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.sun.xml.bind</groupId>
-            <artifactId>jaxb-core</artifactId>
-            <version>${jaxb.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.sun.xml.bind</groupId>
-            <artifactId>jaxb-impl</artifactId>
-            <version>${jaxb.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>io.netty</groupId>
-            <artifactId>netty</artifactId>
-            <version>${netty.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>javax.servlet</groupId>
-            <artifactId>javax.servlet-api</artifactId>
-            <version>${servlet.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-compress</artifactId>
-            <version>${commons-compress.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-math3</artifactId>
-            <version>${commons-math3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.curator</groupId>
-            <artifactId>curator-recipes</artifactId>
-            <version>${curator.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.typesafe</groupId>
             <artifactId>config</artifactId>
@@ -250,9 +173,7 @@
                     <artifactId>log4j</artifactId>
                 </exclusion>
             </exclusions>
-
         </dependency>
-
     </dependencies>
 
     <profiles>
diff --git a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/pom.xml b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/pom.xml
index 1b4f33c1e..fa18ad91d 100644
--- a/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/pom.xml
+++ b/deeplearning4j/deeplearning4j-ui-parent/deeplearning4j-play/pom.xml
@@ -129,32 +129,11 @@
             <artifactId>deeplearning4j-ui-model</artifactId>
             <version>${project.version}</version>
         </dependency>
-
-        <dependency>
-            <groupId>com.google.protobuf</groupId>
-            <artifactId>protobuf-java</artifactId>
-            <version>${google.protobuf.version}</version>
-        </dependency>
         <dependency>
             <groupId>javax.ws.rs</groupId>
             <artifactId>javax.ws.rs-api</artifactId>
             <version>${ws.rs.version}</version>
         </dependency>
-        <dependency>
-            <groupId>joda-time</groupId>
-            <artifactId>joda-time</artifactId>
-            <version>${jodatime.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.hibernate</groupId>
-            <artifactId>hibernate-validator</artifactId>
-            <version>${hibernate.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.scala-lang</groupId>
             <artifactId>scala-library</artifactId>
@@ -165,11 +144,6 @@
             <artifactId>scala-reflect</artifactId>
             <version>${scala.version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.yaml</groupId>
-            <artifactId>snakeyaml</artifactId>
-            <version>${snakeyaml.version}</version>
-        </dependency>
         <dependency>
             <groupId>com.typesafe.play</groupId>
             <artifactId>play-java_2.11</artifactId>
diff --git a/docs/deeplearning4j-nlp/templates/word2vec.md b/docs/deeplearning4j-nlp/templates/word2vec.md
index e941060f2..df188dc2f 100644
--- a/docs/deeplearning4j-nlp/templates/word2vec.md
+++ b/docs/deeplearning4j-nlp/templates/word2vec.md
@@ -447,7 +447,7 @@ Marketers might seek to establish relationships among products to build a recomm
 
 ### <a name="patent">Google's Word2vec Patent</a>
 
-Word2vec is [a method of computing vector representations of words](http://arxiv.org/pdf/1301.3781.pdf) introduced by a team of researchers at Google led by Tomas Mikolov. Google [hosts an open-source version of Word2vec](https://code.google.com/p/word2vec/) released under an Apache 2.0 license. In 2014, Mikolov left Google for Facebook, and in May 2015, [Google was granted a patent for the method](http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=1&f=G&l=50&co1=AND&d=PTXT&s1=9037464&OS=9037464&RS=9037464), which does not abrogate the Apache license under which it has been released. 
+Word2vec is [a method of computing vector representations of words](https://arxiv.org/pdf/1301.3781.pdf) introduced by a team of researchers at Google led by Tomas Mikolov. Google [hosts an open-source version of Word2vec](https://code.google.com/p/word2vec/) released under an Apache 2.0 license. In 2014, Mikolov left Google for Facebook, and in May 2015, [Google was granted a patent for the method](http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=1&f=G&l=50&co1=AND&d=PTXT&s1=9037464&OS=9037464&RS=9037464), which does not abrogate the Apache license under which it has been released. 
 
 ### <a name="foreign">Foreign Languages</a>
 
@@ -485,7 +485,7 @@ Deeplearning4j has a class called [SequenceVectors](https://github.com/eclipse/d
 * [Quora: What Are Some Interesting Word2Vec Results?](http://www.quora.com/Word2vec/What-are-some-interesting-Word2Vec-results/answer/Omer-Levy)
 * [Word2Vec: an introduction](http://www.folgertkarsdorp.nl/word2vec-an-introduction/); Folgert Karsdorp
 * [Mikolov's Original Word2vec Code @Google](https://code.google.com/p/word2vec/)
-* [word2vec Explained: Deriving Mikolov et al.’s Negative-Sampling Word-Embedding Method](http://arxiv.org/pdf/1402.3722v1.pdf); Yoav Goldberg and Omer Levy
+* [word2vec Explained: Deriving Mikolov et al.’s Negative-Sampling Word-Embedding Method](https://arxiv.org/pdf/1402.3722v1.pdf); Yoav Goldberg and Omer Levy
 * [Advances in Pre-Training Distributed Word Representations - by Mikolov et al](https://arxiv.org/abs/1712.09405)
 
 
diff --git a/docs/deeplearning4j-nn/templates/computationgraph.md b/docs/deeplearning4j-nn/templates/computationgraph.md
index a5ced0ceb..f4ff7f03d 100644
--- a/docs/deeplearning4j-nn/templates/computationgraph.md
+++ b/docs/deeplearning4j-nn/templates/computationgraph.md
@@ -51,10 +51,10 @@ Examples of some architectures that can be built using ComputationGraph include:
 
 - Multi-task learning architectures
 - Recurrent neural networks with skip connections
-- [GoogLeNet](http://arxiv.org/abs/1409.4842), a complex type of convolutional netural network for image classification
-- [Image caption generation](http://arxiv.org/abs/1411.4555)
+- [GoogLeNet](https://arxiv.org/abs/1409.4842), a complex type of convolutional netural network for image classification
+- [Image caption generation](https://arxiv.org/abs/1411.4555)
 - [Convolutional networks for sentence classification](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/sentenceclassification/CnnSentenceClassificationExample.java)
-- [Residual learning convolutional neural networks](http://arxiv.org/abs/1512.03385)
+- [Residual learning convolutional neural networks](https://arxiv.org/abs/1512.03385)
 
 
 ## <a name="config">Configuring a Computation Graph</a>
diff --git a/docs/deeplearning4j-nn/templates/model-persistence.md b/docs/deeplearning4j-nn/templates/model-persistence.md
index ef4d593e9..82f87f1ff 100644
--- a/docs/deeplearning4j-nn/templates/model-persistence.md
+++ b/docs/deeplearning4j-nn/templates/model-persistence.md
@@ -8,7 +8,7 @@ weight: 10
 
 ## Saving and Loading a Neural Network
 
-The `ModelSerializer` is a class which handles loading and saving models. There are two methods for saving models shown in the examples through the link. The first example saves a normal multilayer network, the second one saves a [computation graph](https://deeplearning4j.org/compgraph).
+The `ModelSerializer` is a class which handles loading and saving models. There are two methods for saving models shown in the examples through the link. The first example saves a normal multilayer network, the second one saves a [computation graph](https://deeplearning4j.org/docs/latest/deeplearning4j-nn-computationgraph).
 
 Here is a [basic example](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving) with code to save a computation graph using the `ModelSerializer` class, as well as an example of using ModelSerializer to save a neural net built using MultiLayer configuration.
 
diff --git a/docs/deeplearning4j-nn/templates/recurrent.md b/docs/deeplearning4j-nn/templates/recurrent.md
index 0b33981e7..fe07ebddb 100644
--- a/docs/deeplearning4j-nn/templates/recurrent.md
+++ b/docs/deeplearning4j-nn/templates/recurrent.md
@@ -29,7 +29,7 @@ DL4J currently supports the following types of recurrent neural network
 * BaseRecurrent
 
 Java documentation for each is available, [GravesLSTM](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/GravesLSTM.html), 
- [BidirectionalGravesLSTM](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.html),  [BaseRecurrent](https://deeplearning4j.org/doc/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.html)
+ [BidirectionalGravesLSTM](https://deeplearning4j.org/api/{{page.version}}/org/deeplearning4j/nn/conf/layers/GravesBidirectionalLSTM.html),  [BaseRecurrent](https://deeplearning4j.org/api/latest/org/deeplearning4j/nn/conf/layers/BaseRecurrentLayer.html)
 
 #### Data for RNNs
 Consider for the moment a standard feed-forward network (a multi-layer perceptron or 'DenseLayer' in DL4J). These networks expect input and output data that is two-dimensional: that is, data with "shape" [numExamples,inputSize]. This means that the data into a feed-forward network has ‘numExamples’ rows/examples, where each row consists of ‘inputSize’ columns. A single example would have shape [1,inputSize], though in practice we generally use multiple examples for computational and optimization efficiency. Similarly, output data for a standard feed-forward network is also two dimensional, with shape [numExamples,outputSize].
diff --git a/docs/deeplearning4j-nn/templates/tsne-visualization.md b/docs/deeplearning4j-nn/templates/tsne-visualization.md
index 83ab3a3ce..9a55b1a74 100644
--- a/docs/deeplearning4j-nn/templates/tsne-visualization.md
+++ b/docs/deeplearning4j-nn/templates/tsne-visualization.md
@@ -8,7 +8,7 @@ weight: 10
 
 ## t-SNE's Data Visualization
 
-[t-Distributed Stochastic Neighbor Embedding](http://homepage.tudelft.nl/19j49/t-SNE.html) (t-SNE) is a data-visualization tool created by Laurens van der Maaten at Delft University of Technology.
+[t-Distributed Stochastic Neighbor Embedding](https://en.wikipedia.org/wiki/T-distributed_stochastic_neighbor_embedding) (t-SNE) is a data-visualization tool created by Laurens van der Maaten at Delft University of Technology.
 
 While it can be used for any data, t-SNE (pronounced Tee-Snee) is only really meaningful with labeled data, which clarify how the input is clustering. Below, you can see the kind of graphic you can generate in DL4J with t-SNE working on MNIST data.
 
diff --git a/docs/deeplearning4j-scaleout/templates/howto.md b/docs/deeplearning4j-scaleout/templates/howto.md
index 500b1a241..af55969c6 100644
--- a/docs/deeplearning4j-scaleout/templates/howto.md
+++ b/docs/deeplearning4j-scaleout/templates/howto.md
@@ -627,7 +627,7 @@ To use the system clock time source, add the following to Spark submit:
 
 ## <a name="ubuntu16">Failed training on Ubuntu 16.04 (Ubuntu bug that may affect DL4J users)</a>
 
-When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at http://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04.
+When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at https://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04.
 
 Some workarounds are suggested. 
 
@@ -695,7 +695,7 @@ To use the system clock time source, add the following to Spark submit:
 
 ## <a href="ubuntu16">Failed training on Ubuntu 16.04 (Ubuntu bug that may affect DL4J users)</a>
 
-When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at http://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04.
+When running a Spark on YARN cluster on Ubuntu 16.04 machines, chances are that after finishing a job, all processes owned by the user running Hadoop/YARN are killed. This is related to a bug in Ubuntu, which is documented at https://bugs.launchpad.net/ubuntu/+source/procps/+bug/1610499. There's also a Stackoverflow discussion about it at https://stackoverflow.com/questions/38419078/logouts-while-running-hadoop-under-ubuntu-16-04.
 
 Some workarounds are suggested. 
 
diff --git a/docs/deeplearning4j/templates/beginners.md b/docs/deeplearning4j/templates/beginners.md
index f7740516d..3ca4d82f1 100644
--- a/docs/deeplearning4j/templates/beginners.md
+++ b/docs/deeplearning4j/templates/beginners.md
@@ -99,4 +99,4 @@ You can also download a [free version of the Skymind Intelligence Layer](https:/
 
 Most of what we know about deep learning is contained in academic papers. You can find some of the major research groups [here](https://skymind.ai/wiki/machine-learning-research-groups-labs).
 
-While individual courses have limits on what they can teach, the Internet does not. Most math and programming questions can be answered by Googling and searching sites like [Stackoverflow](http://stackoverflow.com) and [Math Stackexchange](https://math.stackexchange.com/).
+While individual courses have limits on what they can teach, the Internet does not. Most math and programming questions can be answered by Googling and searching sites like [Stackoverflow](https://stackoverflow.com) and [Math Stackexchange](https://math.stackexchange.com/).
diff --git a/docs/deeplearning4j/templates/cheat-sheet.md b/docs/deeplearning4j/templates/cheat-sheet.md
index 3437ffa0f..f4b4157af 100644
--- a/docs/deeplearning4j/templates/cheat-sheet.md
+++ b/docs/deeplearning4j/templates/cheat-sheet.md
@@ -220,7 +220,7 @@ List of supported activation functions:
 * **LEAKYRELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationLReLU.java)) - leaky rectified linear unit. ```f(x) = max(0, x) + alpha * min(0, x)``` with ```alpha=0.01``` by default.
 * **RATIONALTANH** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRationalTanh.java)) - ```tanh(y) ~ sgn(y) * { 1 - 1/(1+|y|+y^2+1.41645*y^4)}``` which approximates ```f(x) = 1.7159 * tanh(2x/3)```, but should be faster to execute. ([Reference](https://arxiv.org/abs/1508.01292))
 * **RELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationReLU.java)) - standard rectified linear unit: ```f(x) = x``` if ```x>0``` or ```f(x) = 0``` otherwise
-* **RRELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java)) - randomized rectified linear unit. Deterministic during test time. ([Reference](http://arxiv.org/abs/1505.00853))
+* **RRELU** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java)) - randomized rectified linear unit. Deterministic during test time. ([Reference](https://arxiv.org/abs/1505.00853))
 * **SIGMOID** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSigmoid.java)) - standard sigmoid activation function, ```f(x) = 1 / (1 + exp(-x))```
 * **SOFTMAX** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSoftmax.java)) - standard softmax activation function
 * **SOFTPLUS** - ([Source](https://github.com/eclipse/deeplearning4j/blob/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationSoftPlus.java)) - ```f(x) = log(1+e^x)``` - shape is similar to a smooth version of the RELU activation function
@@ -269,7 +269,7 @@ The [CS231n course notes](http://cs231n.github.io/neural-networks-3/#ada) have a
 Supported updaters in Deeplearning4j:
 * **AdaDelta** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaDelta.java)) - [Reference](https://arxiv.org/abs/1212.5701)
 * **AdaGrad** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaGrad.java)) - [Reference](http://jmlr.org/papers/v12/duchi11a.html)
-* **AdaMax** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java)) - A variant of the Adam updater - [Reference](http://arxiv.org/abs/1412.6980)
+* **AdaMax** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java)) - A variant of the Adam updater - [Reference](https://arxiv.org/abs/1412.6980)
 * **Adam** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java))
 * **Nadam** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Nadam.java)) - A variant of the Adam updater, using the Nesterov mementum update rule - [Reference](https://arxiv.org/abs/1609.04747)
 * **Nesterovs** - ([Source](https://github.com/eclipse/deeplearning4j/tree/master/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Nesterovs.java)) - Nesterov momentum updater
diff --git a/docs/deeplearning4j/templates/config-performance-debugging.md b/docs/deeplearning4j/templates/config-performance-debugging.md
index 6dafd13b7..04b92ba23 100644
--- a/docs/deeplearning4j/templates/config-performance-debugging.md
+++ b/docs/deeplearning4j/templates/config-performance-debugging.md
@@ -84,7 +84,7 @@ Not all DL4J layer types are supported in cuDNN. DL4J layers with cuDNN support
 To check if cuDNN is being used, the simplest approach is to look at the log output when running inference or training:
 If cuDNN is NOT available when you are using a layer that supports it, you will see a message such as:
 ```
-o.d.n.l.c.ConvolutionLayer - cuDNN not found: use cuDNN for better GPU performance by including the deeplearning4j-cuda module. For more information, please refer to: https://deeplearning4j.org/cudnn
+o.d.n.l.c.ConvolutionLayer - cuDNN not found: use cuDNN for better GPU performance by including the deeplearning4j-cuda module. For more information, please refer to: https://deeplearning4j.org/docs/latest/deeplearning4j-config-cudnn
 java.lang.ClassNotFoundException: org.deeplearning4j.nn.layers.convolution.CudnnConvolutionHelper
 	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
 	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
diff --git a/docs/deeplearning4j/templates/examples-tour.md b/docs/deeplearning4j/templates/examples-tour.md
index 2aa5dd29b..ee6c049ab 100644
--- a/docs/deeplearning4j/templates/examples-tour.md
+++ b/docs/deeplearning4j/templates/examples-tour.md
@@ -18,31 +18,31 @@ Most of the examples make use of DataVec, a toolkit for preprocessing and clearn
 
 This example takes the canonical Iris dataset of the flower species of the same name, whose relevant measurements are sepal length, sepal width, petal length and petal width. It builds a Spark RDD from the relatively small dataset and runs an analysis against it. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/analysis/IrisAnalysis.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/analysis/IrisAnalysis.java)
 
 ### BasicDataVecExample.java
 
 This example loads data into a Spark RDD. All DataVec transform operations use Spark RDDs. Here, we use DataVec to filter data, apply time transformations and remove columns.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/basic/BasicDataVecExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/basic/BasicDataVecExample.java)
 
 ### PrintSchemasAtEachStep.java
 
 This example shows the print Schema tools that are useful to visualize and to ensure that the code for the transform is behaving as expected. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/debugging/PrintSchemasAtEachStep.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/debugging/PrintSchemasAtEachStep.java)
 
 ### JoinExample.java
 
 You may need to join datasets before passing to a neural network. You can do that in DataVec, and this example shows you how. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/join/JoinExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/join/JoinExample.java)
 
 ### LogDataExample.java
 
 This is an example of parsing log data using DataVec. The obvious use cases are cybersecurity and customer relationship management. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/logdata/LogDataExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/datavec-examples/src/main/java/org/datavec/transform/logdata/LogDataExample.java)
 
 ### MnistImagePipelineExample.java
 
@@ -50,7 +50,7 @@ This example is from the video below, which demonstrates the ParentPathLabelGene
 
 <iframe width="560" height="315" src="http://www.youtube.com/embed/GLC8CIoHDnI" frameborder="0" allowfullscreen></iframe>
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataExamples/MnistImagePipelineExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/dataExamples/MnistImagePipelineExample.java)
 
 ### PreprocessNormalizerExample.java
 
@@ -78,13 +78,13 @@ MNIST is the "Hello World" of deep learning. Simple, straightforward, and focuss
 
 This is a Single Layer Perceptron for recognizing digits. Note that this pulls the images from a binary package containing the dataset, a rather special case for data ingestion.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistSingleLayerExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistSingleLayerExample.java)
 
 ### MLPMnistTwoLayerExample.java
 
 A two-layer perceptron for MNIST, showing there is more than one useful network for a given dataset. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistTwoLayerExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/mnist/MLPMnistTwoLayerExample.java)
 
 ### Feedforward Examples
 
@@ -92,7 +92,7 @@ Data flows through feed-forward neural networks in a single pass from input via
 
 These networks can be used for a wide range of tasks depending on they are configured. Along with image classification over MNIST data, this directory has examples demonstrating regression, classification, and anomoly detection.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward)
 
 ### Convolutional Neural Networks
 
@@ -102,7 +102,7 @@ Convolutional Neural Networks are mainly used for image recognition, although th
 
 This example can be run using either LeNet or AlexNet. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/AnimalsClassification.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/convolution/AnimalsClassification.java)
 
 ---
 
@@ -115,7 +115,7 @@ load the model for later training or inference.
 
 This demonstrates saving and loading a network build using the class ComputationGraph.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving/SaveLoadComputationGraph.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/modelsaving/SaveLoadComputationGraph.java)
 
 ### SaveLoadMultiLayerNetwork.java
 
@@ -135,11 +135,11 @@ Do you need to add a Loss Function that is not available or prebuilt yet? Check
 
 ### CustomLossExample.java
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossExample.java)
 
 ### CustomLossL1L2.java
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossL1L2.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/lossfunctions/CustomLossL1L2.java)
 
 ### Custom Layer
 
@@ -147,7 +147,7 @@ Do you need to add a layer with features that aren't available in DeepLearning4J
 
 ### CustomLayerExample.java
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/customlayers/CustomLayerExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/misc/customlayers/CustomLayerExample.java)
 
 ---
 
@@ -159,25 +159,25 @@ Neural Networks for NLP? We have those, too.
 
 Global Vectors for Word Representation are useful for detecting relationships between words. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/glove/GloVeExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/glove/GloVeExample.java)
 
 ### Paragraph Vectors
 
 A vectorized representation of words. Described [here](https://cs.stanford.edu/~quocle/paragraph_vector.pdf)
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/paragraphvectors/ParagraphVectorsClassifierExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/paragraphvectors/ParagraphVectorsClassifierExample.java)
 
 ### Sequence Vectors
 
 One way to represent sentences is as a sequence of words. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sequencevectors/SequenceVectorsTextExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/sequencevectors/SequenceVectorsTextExample.java)
 
 ### Word2Vec
 
 Described [here](https://deeplearning4j.org/word2vec.html)
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/word2vec/Word2VecRawTextExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/word2vec/Word2VecRawTextExample.java)
 
 ---
 
@@ -185,7 +185,7 @@ Described [here](https://deeplearning4j.org/word2vec.html)
 
 t-Distributed Stochastic Neighbor Embedding (t-SNE) is useful for data visualization. We include an example in the NLP section since word similarity visualization is a common use. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/tsne/TSNEStandardExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/nlp/tsne/TSNEStandardExample.java)
 
 ---
 
@@ -199,19 +199,19 @@ The examples folder for Recurrent Neural Networks has the following:
 
 An RNN learns a string of characters.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/basic/BasicRNNExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/basic/BasicRNNExample.java)
 
 ### GravesLSTMCharModellingExample.java
 
 Takes the complete works of Shakespeare as a sequence of characters and Trains a Neural Net to generate "Shakespeare" one character at a time.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/character/GravesLSTMCharModellingExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/character/GravesLSTMCharModellingExample.java)
 
 ### SingleTimestepRegressionExample.java
 
 Regression with an LSTM (Long Short Term Memory) Recurrent Neural Network. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/regression/SingleTimestepRegressionExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/recurrent/regression/SingleTimestepRegressionExample.java)
 
 ### AdditionRNN.java
 
@@ -254,13 +254,13 @@ DeepLearning4j supports using a Spark Cluster for network training. Here are the
 ### MnistMLPExample.java
 
 This is an example of a Multi-Layer Perceptron training on the Mnist data set of handwritten digits. 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/mlp/MnistMLPExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/mlp/MnistMLPExample.java)
 
 ### SparkLSTMCharacterExample.java
 
 An LSTM recurrent Network in Spark. 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/rnn/SparkLSTMCharacterExample.java)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/blob/master/dl4j-spark-examples/dl4j-spark/src/main/java/org/deeplearning4j/rnn/SparkLSTMCharacterExample.java)
 
 ---
 
@@ -274,7 +274,7 @@ The learning algorithms and loss functions are executed as ND4J operations.
 
 This is a directory with examples for creating and manipulating NDArrays.
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/tree/master/nd4j-examples/src/main/java/org/nd4j/examples)
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/nd4j-examples/src/main/java/org/nd4j/examples)
 
 ---
 
@@ -282,4 +282,4 @@ This is a directory with examples for creating and manipulating NDArrays.
 
 Deep learning algorithms have learned to play Space Invaders and Doom using reinforcement learning. DeepLearning4J/RL4J examples of Reinforcement Learning are available here: 
 
-[Show me the code](http://github.com/eclipse/deeplearning4j-examples/tree/master/rl4j-examples)
\ No newline at end of file
+[Show me the code](https://github.com/eclipse/deeplearning4j-examples/tree/master/rl4j-examples)
\ No newline at end of file
diff --git a/docs/deeplearning4j/templates/quickstart.md b/docs/deeplearning4j/templates/quickstart.md
index bcc042f07..25f4216ff 100644
--- a/docs/deeplearning4j/templates/quickstart.md
+++ b/docs/deeplearning4j/templates/quickstart.md
@@ -179,7 +179,7 @@ Congratulations! You just trained your first neural network with Deeplearning4j.
 **Q:** **SPARK ISSUES** I am running the examples and having issues with the Spark based examples such as distributed training or datavec transform options.
 
 
-**A:** You may be missing some dependencies that Spark requires. See this [Stack Overflow discussion](http://stackoverflow.com/a/38735202/3892515) for a discussion of potential dependency issues. Windows users may need the winutils.exe from Hadoop.
+**A:** You may be missing some dependencies that Spark requires. See this [Stack Overflow discussion](https://stackoverflow.com/a/38735202/3892515) for a discussion of potential dependency issues. Windows users may need the winutils.exe from Hadoop.
 
 Download winutils.exe from https://github.com/steveloughran/winutils and put it into the null/bin/winutils.exe (or create a hadoop folder and add that to HADOOP_HOME)
 
diff --git a/libnd4j/CMakeLists.txt b/libnd4j/CMakeLists.txt
index c563eda27..9ce9b46a3 100755
--- a/libnd4j/CMakeLists.txt
+++ b/libnd4j/CMakeLists.txt
@@ -25,7 +25,7 @@ elseif (APPLE)
 elseif(WIN32)
     set(X86_BUILD true)
     if (CUDA_BLAS)
-        set(CMAKE_CXX_FLAGS_RELEASE  " /O2 -D_RELEASE=true /wd4804")
+        set(CMAKE_CXX_FLAGS_RELEASE  "-D_RELEASE=true /wd4804")
         set(CMAKE_CXX_FLAGS_DEBUG  "  /FS /EHsc /wd4661 /wd4804 /wd4267 /wd4244 /wd4251 /wd4305")
     else()
         set(CMAKE_CXX_FLAGS_RELEASE  "-O3 -fPIC -std=c++11 -fmax-errors=2 -D_RELEASE=true")
diff --git a/libnd4j/blas/Environment.cpp b/libnd4j/blas/Environment.cpp
index 90c391cf1..de0ac925b 100644
--- a/libnd4j/blas/Environment.cpp
+++ b/libnd4j/blas/Environment.cpp
@@ -43,7 +43,7 @@
 namespace nd4j {
 
     nd4j::Environment::Environment() {
-        _tadThreshold.store(8);
+        _tadThreshold.store(1);
         _elementThreshold.store(1024);
         _verbose.store(false);
         _debug.store(false);
@@ -52,6 +52,7 @@ namespace nd4j {
         _leaks.store(false);
         _dataType.store(nd4j::DataType::FLOAT32);
         _maxThreads = std::thread::hardware_concurrency();
+        _maxMasterThreads = _maxThreads.load();
 
 #ifndef ANDROID
         const char* omp_threads = std::getenv("OMP_NUM_THREADS");
@@ -66,6 +67,94 @@ namespace nd4j {
                 // still do nothing
             }
         }
+
+        /**
+         * Defines size of thread pool used for parallelism
+         */
+        const char* max_threads = std::getenv("SD_MAX_THREADS");
+        if (max_threads != nullptr) {
+            try {
+                std::string t(max_threads);
+                int val = std::stoi(t);
+                _maxThreads.store(val);
+            } catch (std::invalid_argument &e) {
+                // just do nothing
+            } catch (std::out_of_range &e) {
+                // still do nothing
+            }
+        }
+
+        /**
+         * Defines max number of threads usable at once
+         */
+        const char* max_master_threads = std::getenv("SD_MASTER_THREADS");
+        if (max_master_threads != nullptr) {
+            try {
+                std::string t(max_master_threads);
+                int val = std::stoi(t);
+                _maxMasterThreads.store(val);
+            } catch (std::invalid_argument &e) {
+                // just do nothing
+            } catch (std::out_of_range &e) {
+                // still do nothing
+            }
+        }
+
+        /**
+         * If this env var is defined - we'll disallow use of platform-specific helpers (mkldnn, cudnn, etc)
+         */
+        const char* forbid_helpers = std::getenv("SD_FORBID_HELPERS");
+        if (max_master_threads != nullptr) {
+            _allowHelpers = false;
+        }
+
+        /**
+         * This var defines max amount of host memory library can allocate
+         */
+        const char* max_primary_memory = std::getenv("SD_MAX_PRIMARY_BYTES");
+        if (max_primary_memory != nullptr) {
+            try {
+                std::string t(max_primary_memory);
+                auto val = std::stol(t);
+                _maxTotalPrimaryMemory.store(val);
+            } catch (std::invalid_argument &e) {
+                // just do nothing
+            } catch (std::out_of_range &e) {
+                // still do nothing
+            }
+        }
+
+        /**
+         * This var defines max amount of special (i.e. device) memory library can allocate on all devices combined
+         */
+        const char* max_special_memory = std::getenv("SD_MAX_SPECIAL_BYTES");
+        if (max_special_memory != nullptr) {
+            try {
+                std::string t(max_special_memory);
+                auto val = std::stol(t);
+                _maxTotalSpecialMemory.store(val);
+            } catch (std::invalid_argument &e) {
+                // just do nothing
+            } catch (std::out_of_range &e) {
+                // still do nothing
+            }
+        }
+
+        /**
+         * This var defines max amount of special (i.e. device) memory library can allocate on all devices combined
+         */
+        const char* max_device_memory = std::getenv("SD_MAX_DEVICE_BYTES");
+        if (max_device_memory != nullptr) {
+            try {
+                std::string t(max_device_memory);
+                auto val = std::stol(t);
+                _maxDeviceMemory.store(val);
+            } catch (std::invalid_argument &e) {
+                // just do nothing
+            } catch (std::out_of_range &e) {
+                // still do nothing
+            }
+        }
 #endif
 
 #ifdef __CUDABLAS__
@@ -97,6 +186,18 @@ namespace nd4j {
         //
     }
 
+    void Environment::setMaxPrimaryMemory(uint64_t maxBytes) {
+        _maxTotalPrimaryMemory = maxBytes;
+    }
+
+    void Environment::setMaxSpecialyMemory(uint64_t maxBytes) {
+        _maxTotalSpecialMemory;
+    }
+
+    void Environment::setMaxDeviceMemory(uint64_t maxBytes) {
+        _maxDeviceMemory = maxBytes;
+    }
+
     Environment *Environment::getInstance() {
         if (_instance == 0)
             _instance = new Environment();
@@ -179,8 +280,16 @@ namespace nd4j {
         return _maxThreads.load();
     }
 
+    int Environment::maxMasterThreads() {
+        return _maxMasterThreads.load();
+    }
+
     void Environment::setMaxThreads(int max) {
-        _maxThreads.store(max);
+        //_maxThreads.store(max);
+    }
+
+    void Environment::setMaxMasterThreads(int max) {
+        //_maxMasterThreads = max;
     }
 
     bool Environment::precisionBoostAllowed() {
@@ -211,6 +320,14 @@ namespace nd4j {
         return _blasPatchVersion;
     }
 
+    bool Environment::helpersAllowed() {
+        return _allowHelpers.load();
+    }
+
+    void Environment::allowHelpers(bool reallyAllow) {
+        _allowHelpers.store(reallyAllow);
+    }
+
     nd4j::Environment *nd4j::Environment::_instance = 0;
 
 }
diff --git a/libnd4j/blas/Environment.h b/libnd4j/blas/Environment.h
index a303d27d0..54982471f 100644
--- a/libnd4j/blas/Environment.h
+++ b/libnd4j/blas/Environment.h
@@ -37,10 +37,18 @@ namespace nd4j{
         std::atomic<bool> _debug;
         std::atomic<bool> _leaks;
         std::atomic<bool> _profile;
-        std::atomic<int> _maxThreads;
         std::atomic<nd4j::DataType> _dataType;
         std::atomic<bool> _precBoost;
         std::atomic<bool> _useMKLDNN{true};
+        std::atomic<bool> _allowHelpers{true};
+
+        std::atomic<int> _maxThreads;
+        std::atomic<int> _maxMasterThreads;
+
+        // these fields hold defaults
+        std::atomic<int64_t> _maxTotalPrimaryMemory{-1};
+        std::atomic<int64_t> _maxTotalSpecialMemory{-1};
+        std::atomic<int64_t> _maxDeviceMemory{-1};
 
 #ifdef __ND4J_EXPERIMENTAL__
         const bool _experimental = true;
@@ -74,6 +82,8 @@ namespace nd4j{
         void setDebug(bool reallyDebug);
         void setProfiling(bool reallyProfile);
         void setLeaksDetector(bool reallyDetect);
+        bool helpersAllowed();
+        void allowHelpers(bool reallyAllow);
         
         int tadThreshold();
         void setTadThreshold(int threshold);
@@ -84,6 +94,13 @@ namespace nd4j{
         int maxThreads();
         void setMaxThreads(int max);
 
+        int maxMasterThreads();
+        void setMaxMasterThreads(int max);
+
+        void setMaxPrimaryMemory(uint64_t maxBytes);
+        void setMaxSpecialyMemory(uint64_t maxBytes);
+        void setMaxDeviceMemory(uint64_t maxBytes);
+
         bool isUseMKLDNN() { return _useMKLDNN.load(); }
         void setUseMKLDNN(bool useMKLDNN) { _useMKLDNN.store(useMKLDNN); }
 
diff --git a/libnd4j/blas/NativeOps.h b/libnd4j/blas/NativeOps.h
index b10b3807a..ff368d7c8 100755
--- a/libnd4j/blas/NativeOps.h
+++ b/libnd4j/blas/NativeOps.h
@@ -1732,6 +1732,7 @@ typedef nd4j::graph::RandomGenerator OpaqueRandomGenerator;
 
 ND4J_EXPORT OpaqueContext* createGraphContext(int nodeId);
 ND4J_EXPORT OpaqueRandomGenerator* getGraphContextRandomGenerator(OpaqueContext* ptr);
+ND4J_EXPORT void ctxAllowHelpers(OpaqueContext* ptr, bool reallyAllow);
 ND4J_EXPORT void markGraphContextInplace(OpaqueContext* ptr, bool reallyInplace);
 ND4J_EXPORT void setGraphContextCudaContext(OpaqueContext* ptr, void *stream, void *reductionPointer, void *allocationPointer);
 ND4J_EXPORT void setGraphContextInputArray(OpaqueContext* ptr, int index, void *buffer, void *shapeInfo, void *specialBuffer, void *specialShapeInfo);
diff --git a/libnd4j/blas/cpu/NativeOps.cpp b/libnd4j/blas/cpu/NativeOps.cpp
index 151f5c883..df6ccc240 100644
--- a/libnd4j/blas/cpu/NativeOps.cpp
+++ b/libnd4j/blas/cpu/NativeOps.cpp
@@ -2874,6 +2874,9 @@ void deleteGraphContext(nd4j::graph::Context* ptr) {
     delete ptr;
 }
 
+void ctxAllowHelpers(OpaqueContext* ptr, bool reallyAllow) {
+    ptr->allowHelpers(reallyAllow);
+}
 
 nd4j::graph::RandomGenerator* createRandomGenerator(Nd4jLong rootSeed, Nd4jLong nodeSeed) {
     return new nd4j::graph::RandomGenerator(rootSeed, nodeSeed);
diff --git a/libnd4j/blas/cuda/NativeOps.cu b/libnd4j/blas/cuda/NativeOps.cu
index 2af0e3783..cda6acbad 100755
--- a/libnd4j/blas/cuda/NativeOps.cu
+++ b/libnd4j/blas/cuda/NativeOps.cu
@@ -3558,4 +3558,8 @@ bool isMinimalRequirementsMet() {
 
 bool isOptimalRequirementsMet() {
     return true;
+}
+
+void ctxAllowHelpers(OpaqueContext* ptr, bool reallyAllow) {
+    ptr->allowHelpers(reallyAllow);
 }
\ No newline at end of file
diff --git a/libnd4j/include/execution/Threads.h b/libnd4j/include/execution/Threads.h
index 683220b61..be12a311a 100644
--- a/libnd4j/include/execution/Threads.h
+++ b/libnd4j/include/execution/Threads.h
@@ -107,11 +107,22 @@ namespace samediff {
          * @param increment
          * @return
          */
-        static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = nd4j::Environment::getInstance()->maxThreads());
-
-        static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = nd4j::Environment::getInstance()->maxThreads());
+        static int parallel_for(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
 
         /**
+         * This function executes 1 dimensional loop for a given number of threads
+         *
+         * @param function
+         * @param start
+         * @param stop
+         * @param increment
+         * @param numThreads
+         * @return
+         */
+        static int parallel_tad(FUNC_1D function, int64_t start, int64_t stop, int64_t increment = 1, uint32_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
+
+        /**
+         * This method will execute function splitting 2 nested loops space with multiple threads
          *
          * @param function
          * @param numThreads
@@ -123,9 +134,10 @@ namespace samediff {
          * @param inc_y
          * @return
          */
-        static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = nd4j::Environment::getInstance()->maxThreads(), bool debug = false);
+        static int parallel_for(FUNC_2D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads(), bool debug = false);
 
         /**
+         * This method will execute function splitting 3 nested loops space with multiple threads
          *
          * @param function
          * @param numThreads
@@ -140,7 +152,7 @@ namespace samediff {
          * @param inc_z
          * @return
          */
-        static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = nd4j::Environment::getInstance()->maxThreads());
+        static int parallel_for(FUNC_3D function, int64_t start_x, int64_t stop_x, int64_t inc_x, int64_t start_y, int64_t stop_y, int64_t inc_y, int64_t start_z, int64_t stop_z, int64_t inc_z, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
 
         /**
          *
@@ -148,11 +160,11 @@ namespace samediff {
          * @param numThreads
          * @return
          */
-        static int parallel_do(FUNC_DO function, uint64_t numThreads = nd4j::Environment::getInstance()->maxThreads());
+        static int parallel_do(FUNC_DO function, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
 
-        static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxThreads());
+        static int64_t parallel_long(FUNC_RL function, FUNC_AL aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
 
-        static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxThreads());
+        static double parallel_double(FUNC_RD function, FUNC_AD aggregator, int64_t start, int64_t stop, int64_t increment = 1, uint64_t numThreads = nd4j::Environment::getInstance()->maxMasterThreads());
     };
 }
 
diff --git a/libnd4j/include/graph/Context.h b/libnd4j/include/graph/Context.h
index f397d46f3..d5b85b543 100644
--- a/libnd4j/include/graph/Context.h
+++ b/libnd4j/include/graph/Context.h
@@ -58,9 +58,12 @@ namespace nd4j {
 
             std::vector<nd4j::DataType> _dataTypes;
 
+            // fields for fast execution (out-of-graph ops use)
             std::vector<NDArray*> _fastpath_in;
             std::vector<NDArray*> _fastpath_out;
             std::vector<NDArray*> _handles;
+
+            bool _helpersAllowed = true;
         public:
             Context(ContextPrototype* prototype, VariableSpace* variableSpace);
 
@@ -187,7 +190,15 @@ namespace nd4j {
             void setIArguments(Nd4jLong *arguments, int numberOfArguments);
             void setBArguments(bool *arguments, int numberOfArguments);
 
+            void setTArguments(const std::vector<double> &tArgs);
+            void setIArguments(const std::vector<Nd4jLong> &tArgs);
+            void setBArguments(const std::vector<bool> &tArgs);
+
             void setCudaContext(Nd4jPointer cudaStream, Nd4jPointer reductionPointer, Nd4jPointer allocationPointer);
+
+
+            void allowHelpers(bool reallyAllow);
+            bool helpersAllowed();
         };
     }
 }
diff --git a/libnd4j/include/graph/impl/Context.cpp b/libnd4j/include/graph/impl/Context.cpp
index 085fa969e..146e66067 100644
--- a/libnd4j/include/graph/impl/Context.cpp
+++ b/libnd4j/include/graph/impl/Context.cpp
@@ -461,6 +461,29 @@ namespace nd4j {
                 v->setContext(_context);
 #endif
         }
+
+        void Context::allowHelpers(bool reallyAllow) {
+            _helpersAllowed = reallyAllow;
+        }
+
+        bool Context::helpersAllowed() {
+            return _helpersAllowed;
+        }
+
+        void Context::setTArguments(const std::vector<double> &tArgs) {
+            for (auto t:tArgs)
+                _tArgs.emplace_back(t);
+        }
+
+        void Context::setIArguments(const std::vector<Nd4jLong> &iArgs) {
+            for (auto i:iArgs)
+                _iArgs.emplace_back(i);
+        }
+
+        void Context::setBArguments(const std::vector<bool> &bArgs) {
+            for (auto b:bArgs)
+                _bArgs.emplace_back(b);
+        }
     }
 }
 
diff --git a/libnd4j/include/helpers/files.h b/libnd4j/include/helpers/files.h
index fa87d4e3e..c49cedbb7 100644
--- a/libnd4j/include/helpers/files.h
+++ b/libnd4j/include/helpers/files.h
@@ -16,7 +16,7 @@
 
 //
 // Methods to lookup files in $PATH
-// adopted from http://stackoverflow.com/questions/2718915/check-if-file-exists-including-on-path
+// adopted from https://stackoverflow.com/questions/2718915/check-if-file-exists-including-on-path
 //
 
 #ifndef LIBND4J_FILES_H
diff --git a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp
index 538214b14..cc11eedca 100644
--- a/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp
+++ b/libnd4j/include/ops/declarable/generic/parity_ops/adjust_contrast.cpp
@@ -27,12 +27,14 @@
 namespace nd4j {
 namespace ops {
 
-CONFIGURABLE_OP_IMPL(adjust_contrast, 1, 1, true, 1, 0) {
+CONFIGURABLE_OP_IMPL(adjust_contrast, 1, 1, true, -2, 0) {
 
     auto input  = INPUT_VARIABLE(0);
     auto output = OUTPUT_VARIABLE(0);
 
-    const double factor = T_ARG(0);
+    REQUIRE_TRUE(block.numT() > 0 || block.width() > 1, 0, "ADJUST_CONTRAST: Scale factor required");
+
+    const double factor = block.width() > 1 ? INPUT_VARIABLE(1)->e<double>(0) : T_ARG(0);
 
     REQUIRE_TRUE(input->rankOf() > 2, 0, "ADJUST_CONTRAST: op expects rank of input array to be >= 3, but got %i instead", input->rankOf());
     REQUIRE_TRUE(input->sizeAt(-1) == 3, 0, "ADJUST_CONTRAST: operation expects image with 3 channels (R, G, B), but got %i instead", input->sizeAt(-1));
@@ -59,15 +61,17 @@ DECLARE_TYPES(adjust_contrast) {
 }
 
 
-    CONFIGURABLE_OP_IMPL(adjust_contrast_v2, 1, 1, true, 1, 0) {
+    CONFIGURABLE_OP_IMPL(adjust_contrast_v2, 1, 1, true, -2, 0) {
 
         auto input  = INPUT_VARIABLE(0);
         auto output = OUTPUT_VARIABLE(0);
 
-        const double factor = T_ARG(0);
+        REQUIRE_TRUE(block.numT() > 0 || block.width() > 1, 0, "ADJUST_CONTRAST_V2: Scale factor required");
 
-        REQUIRE_TRUE(input->rankOf() > 2, 0, "ADJUST_CONTRAST: op expects rank of input array to be >= 3, but got %i instead", input->rankOf());
-        REQUIRE_TRUE(input->sizeAt(-1) == 3, 0, "ADJUST_CONTRAST: operation expects image with 3 channels (R, G, B), but got %i instead", input->sizeAt(-1));
+        const double factor = block.width() > 1 ? INPUT_VARIABLE(1)->e<double>(0) : T_ARG(0);
+
+        REQUIRE_TRUE(input->rankOf() > 2, 0, "ADJUST_CONTRAST_V2: op expects rank of input array to be >= 3, but got %i instead", input->rankOf());
+        REQUIRE_TRUE(input->sizeAt(-1) == 3, 0, "ADJUST_CONTRAST_V2: operation expects image with 3 channels (R, G, B), but got %i instead", input->sizeAt(-1));
 
         // compute mean before
         std::vector<int> axes(input->rankOf() - 1);
@@ -78,10 +82,10 @@ DECLARE_TYPES(adjust_contrast) {
         auto mean = input->reduceAlongDims(reduce::Mean, axes);
 
         // result as (x - mean) * factor + mean
-        std::unique_ptr<NDArray> temp(input->dup());
-        input->applyTrueBroadcast(BroadcastOpsTuple::Subtract(), &mean, temp.get());
-        temp->applyScalar(scalar::Multiply, factor);
-        temp->applyTrueBroadcast(BroadcastOpsTuple::Add(), &mean, output);
+        auto temp = input->ulike();
+        input->applyTrueBroadcast(BroadcastOpsTuple::Subtract(), &mean, &temp);
+        temp.applyScalar(scalar::Multiply, factor);
+        temp.applyTrueBroadcast(BroadcastOpsTuple::Add(), &mean, output);
 
         return Status::OK();
     }
diff --git a/libnd4j/include/ops/declarable/headers/nn.h b/libnd4j/include/ops/declarable/headers/nn.h
index 9f9b0e40a..810733680 100644
--- a/libnd4j/include/ops/declarable/headers/nn.h
+++ b/libnd4j/include/ops/declarable/headers/nn.h
@@ -137,7 +137,7 @@ namespace nd4j {
         #endif
 
         /**
-         * This operation performs batch normalization of layer, it is based on following article http://arxiv.org/abs/1502.03167.
+         * This operation performs batch normalization of layer, it is based on following article https://arxiv.org/abs/1502.03167.
          * Expected arguments:
          * x: input 4D array of shape [bS,iH,iW,iD] (data format = NHWC) or [bS,iD,iH,iW] (data format = NCHW), where
          *    bS - batch size
diff --git a/libnd4j/include/ops/declarable/headers/parity_ops.h b/libnd4j/include/ops/declarable/headers/parity_ops.h
index 3660ee229..590d99308 100644
--- a/libnd4j/include/ops/declarable/headers/parity_ops.h
+++ b/libnd4j/include/ops/declarable/headers/parity_ops.h
@@ -610,8 +610,8 @@ namespace nd4j {
          *
          */
         #if NOT_EXCLUDED(OP_adjust_contrast)
-        DECLARE_CONFIGURABLE_OP(adjust_contrast, 1, 1, true, 1, 0);
-        DECLARE_CONFIGURABLE_OP(adjust_contrast_v2, 1, 1, true, 1, 0);
+        DECLARE_CONFIGURABLE_OP(adjust_contrast, 1, 1, true, -2, 0);
+        DECLARE_CONFIGURABLE_OP(adjust_contrast_v2, 1, 1, true, -2, 0);
         #endif
 
 
diff --git a/libnd4j/include/ops/declarable/helpers/convolutions.h b/libnd4j/include/ops/declarable/helpers/convolutions.h
index fc7c41034..68cfc8d05 100644
--- a/libnd4j/include/ops/declarable/helpers/convolutions.h
+++ b/libnd4j/include/ops/declarable/helpers/convolutions.h
@@ -194,6 +194,54 @@ namespace nd4j {
 
             }
 
+            static inline void calcPaddingAndDilationForConv2DMKL(const int iH, const int iW, const int oH, const int oW, const int kH, const int kW, const int sH, const int sW, const int isSameMode, int& pH, int& pW, int& dH, int& dW) {
+
+                if(kH != 1) {
+                    if(isSameMode) {
+                        pH = (oH - 1) * sH - iH + kH - pH;
+                        dH = dH - 1;
+                    }
+                    else
+                        dH = (iH + 2*pH - (oH - 1) * sH - kH) / (kH - 1);
+                }
+                if(kW != 1) {
+                    if(isSameMode) {
+                        pW = (oW - 1) * sW - iW + kW - pW;
+                        dW = dW - 1;
+                    }
+                    else
+                        dW = (iW + 2*pW - (oW - 1) * sW - kW) / (kW - 1);
+                }
+            }
+
+            static inline void calcPaddingAndDilationForConv3DMKL(const int iD, const int iH, const int iW, const int oD, const int oH, const int oW, const int kD, const int kH, const int kW, const int sD, const int sH, const int sW, const int isSameMode, int& pD, int& pH, int& pW, int& dD, int& dH, int& dW) {
+
+                if(kD != 1) {
+                    if(isSameMode) {
+                        pD = (oD - 1) * sD - iD + kD - pD;
+                        dD = dD - 1;
+                    }
+                    else
+                        dD = (iD + 2*pD - (oD - 1) * sD - kD) / (kD - 1);
+                }
+                if(kH != 1) {
+                    if(isSameMode) {
+                        pH = (oH - 1) * sH - iH + kH - pH;
+                        dH = dH - 1;
+                    }
+                    else
+                        dH = (iH + 2*pH - (oH - 1) * sH - kH) / (kH - 1);
+                }
+                if(kW != 1) {
+                    if(isSameMode) {
+                        pW = (oW - 1) * sW - iW + kW - pW;
+                        dW = dW - 1;
+                    }
+                    else
+                        dW = (iW + 2*pW - (oW - 1) * sW - kW) / (kW - 1);
+                }
+            }
+
             static void conv2d(nd4j::graph::Context  &context, const NDArray* input, const NDArray* weights, const NDArray* bias, NDArray* output, const int kH, const int kW, const int sH, const int sW, int pH, int pW, const int dH, const int dW, const int isSameMode, const int isNCHW);
 
             // static void conv2d(nd4j::graph::Context & block, const std::vector<NDArray*>& inArrs, NDArray* output, const std::vector<int>& intArgs);
diff --git a/libnd4j/include/ops/declarable/helpers/cpu/gru.cpp b/libnd4j/include/ops/declarable/helpers/cpu/gru.cpp
index 9799e609d..579ab2612 100644
--- a/libnd4j/include/ops/declarable/helpers/cpu/gru.cpp
+++ b/libnd4j/include/ops/declarable/helpers/cpu/gru.cpp
@@ -19,7 +19,7 @@
 //
 
 // implementation of gated Recurrent Unit cell
-// (cf. http://arxiv.org/abs/1406.1078).
+// (cf. https://arxiv.org/abs/1406.1078).
 // Kyunghyun Cho, Bart van Merrienboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, Yoshua Bengio
 // "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation"
 
diff --git a/libnd4j/include/ops/declarable/helpers/cuda/gru.cu b/libnd4j/include/ops/declarable/helpers/cuda/gru.cu
index 8e7b62a91..cbbdf1439 100644
--- a/libnd4j/include/ops/declarable/helpers/cuda/gru.cu
+++ b/libnd4j/include/ops/declarable/helpers/cuda/gru.cu
@@ -19,7 +19,7 @@
 //
 
 // implementation of gated Recurrent Unit cell
-// (cf. http://arxiv.org/abs/1406.1078).
+// (cf. https://arxiv.org/abs/1406.1078).
 // Kyunghyun Cho, Bart van Merrienboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares, Holger Schwenk, Yoshua Bengio
 // "Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation"
 
diff --git a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
index fe1574ea1..5ee19b007 100644
--- a/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
+++ b/libnd4j/include/ops/declarable/impl/DeclarableOp.cpp
@@ -506,12 +506,15 @@ namespace nd4j {
             Nd4jStatus status;
             bool hasHelper = false;
 
-            // if we have platform-specific helper for this op - invoke it
-            if (OpRegistrator::getInstance()->hasHelper(this->getOpHash())) {
-                auto helper =  OpRegistrator::getInstance()->getPlatformHelper(this->getOpHash());
-                if (helper->isUsable(*block)) {
-                    status = helper->invokeHelper(*block);
-                    hasHelper = true;
+            // platform helpers use might be forbidden for various reasons, so we'll check it out first
+            if (block->helpersAllowed() && nd4j::Environment::getInstance()->helpersAllowed()) {
+                // if we have platform-specific helper for this op - invoke it
+                if (OpRegistrator::getInstance()->hasHelper(this->getOpHash())) {
+                    auto helper = OpRegistrator::getInstance()->getPlatformHelper(this->getOpHash());
+                    if (helper->isUsable(*block)) {
+                        status = helper->invokeHelper(*block);
+                        hasHelper = true;
+                    }
                 }
             }
 
diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp
index cfd1620b0..239e243ca 100644
--- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp
+++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv2d.cpp
@@ -46,10 +46,13 @@ static void deconv2dMKLDNN(const NDArray* input, const NDArray* weights, const N
     int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH;       // corresponding indexes
     ConvolutionUtils::getSizesAndIndexesConv2d(true, *input, *output, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
 
+    int dHmkl(dH), dWmkl(dW), pHmkl(pH), pWmkl(pW);
+    ConvolutionUtils::calcPaddingAndDilationForConv2DMKL(oH, oW, iH, iW, kH, kW, sH, sW, isSameMode, pHmkl, pWmkl, dHmkl, dWmkl);
+
     mkldnn::memory::dims strides   = { sH, sW };
-    mkldnn::memory::dims dilation  = { dH - 1, dW - 1};
     mkldnn::memory::dims padding   = { pH, pW };
-    mkldnn::memory::dims padding_r = { (iH - 1) * sH - oH + kH - pH, (iW - 1) * sW - oW + kW - pW };
+    mkldnn::memory::dims padding_r = { pHmkl, pWmkl };
+    mkldnn::memory::dims dilation  = { dHmkl, dWmkl };
 
     // input type
     mkldnn::memory::data_type xType;
@@ -190,11 +193,13 @@ static void deconv2dBackPropMKLDNN(const NDArray* input, const NDArray* weights,
     int indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH;       // corresponding indexes
     ConvolutionUtils::getSizesAndIndexesConv2d(true, *input, *gradO, bS, iC, iH, iW, oC, oH, oW, indIOioC, indIiH, indWoC, indWiC, indWkH, indOoH);
 
-    mkldnn::memory::dims strides   = { sH, sW };
-    mkldnn::memory::dims dilation  = { dH - 1, dW - 1 };
-    mkldnn::memory::dims padding   = { pH, pW };
-    mkldnn::memory::dims padding_r = { (iH - 1) * sH - oH + kH - pH, (iW - 1) * sW - oW + kW - pW };
+    int dHmkl(dH), dWmkl(dW), pHmkl(pH), pWmkl(pW);
+    ConvolutionUtils::calcPaddingAndDilationForConv2DMKL(oH, oW, iH, iW, kH, kW, sH, sW, isSameMode, pHmkl, pWmkl, dHmkl, dWmkl);
 
+    mkldnn::memory::dims strides   = { sH, sW };
+    mkldnn::memory::dims padding   = { pH, pW };
+    mkldnn::memory::dims padding_r = { pHmkl, pWmkl };
+    mkldnn::memory::dims dilation  = { dHmkl, dWmkl };
     // input type
     mkldnn::memory::data_type xType = input->dataType() == DataType::FLOAT32 ? mkldnn::memory::data_type::f32 : mkldnn::memory::data_type::bf16;
     // weights type
@@ -425,7 +430,6 @@ PLATFORM_CHECK(deconv2d) {
 
     return block.isUseMKLDNN() && (
             (xType==DataType::FLOAT32 && wType==DataType::FLOAT32 && bType==DataType::FLOAT32 && zType==DataType::FLOAT32) ||
-            (xType==DataType::HALF    && wType==DataType::HALF    && bType==DataType::HALF    && zType==DataType::HALF   ) ||
             ((xType==DataType::UINT8 || xType==DataType::INT8) && wType==DataType::INT8 && (zType==DataType::UINT8 || zType==DataType::INT8 || zType==DataType::INT32 || zType==DataType::FLOAT32) && bType == zType)
           );
 }
diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp
index aab4a723a..d1d7ca87f 100644
--- a/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp
+++ b/libnd4j/include/ops/declarable/platform/mkldnn/deconv3d.cpp
@@ -47,10 +47,13 @@ static void deconv3dMKLDNN(const NDArray* input, const NDArray* weights, const N
     int indIOioC, indIOioD, indWoC, indWiC, indWkD;             // corresponding indexes
     ConvolutionUtils::getSizesAndIndexesConv3d(true, *input, *output, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
 
+    int dDmkl(dD), dHmkl(dH), dWmkl(dW), pDmkl(pD), pHmkl(pH), pWmkl(pW);
+    ConvolutionUtils::calcPaddingAndDilationForConv3DMKL(oD, oH, oW, iD, iH, iW, kD, kH, kW, sD, sH, sW, isSameMode, pDmkl, pHmkl, pWmkl, dDmkl, dHmkl, dWmkl);
+
     mkldnn::memory::dims strides   = { sD, sH, sW };
-    mkldnn::memory::dims dilation  = { dD - 1, dH - 1, dW - 1};
     mkldnn::memory::dims padding   = { pD, pH, pW };
-    mkldnn::memory::dims padding_r = {(iD - 1) * sD - oD + kD - pD, (iH - 1) * sH - oH + kH - pH, (iW - 1) * sW - oW + kW - pW };
+    mkldnn::memory::dims padding_r = { pDmkl, pHmkl, pWmkl };
+    mkldnn::memory::dims dilation  = { dDmkl, dHmkl, dWmkl };
 
     // input type
     mkldnn::memory::data_type xType;
@@ -194,10 +197,13 @@ static void deconv3dBackPropMKLDNN(const NDArray* input, const NDArray* weights,
     int indIOioC, indIOioD, indWoC, indWiC, indWkD;             // corresponding indexes
     ConvolutionUtils::getSizesAndIndexesConv3d(true, *input, *gradO, bS, iC, iD, iH, iW, oC, oD, oH, oW, indIOioC, indIOioD, indWoC, indWiC, indWkD);
 
+    int dDmkl(dD), dHmkl(dH), dWmkl(dW), pDmkl(pD), pHmkl(pH), pWmkl(pW);
+    ConvolutionUtils::calcPaddingAndDilationForConv3DMKL(oD, oH, oW, iD, iH, iW, kD, kH, kW, sD, sH, sW, isSameMode, pDmkl, pHmkl, pWmkl, dDmkl, dHmkl, dWmkl);
+
     mkldnn::memory::dims strides   = { sD, sH, sW };
-    mkldnn::memory::dims dilation  = { dD - 1, dH - 1, dW - 1 };
     mkldnn::memory::dims padding   = { pD, pH, pW };
-    mkldnn::memory::dims padding_r = {(iD - 1) * sD - oD + kD - pD, (iH - 1) * sH - oH + kH - pH, (iW - 1) * sW - oW + kW - pW };
+    mkldnn::memory::dims padding_r = { pDmkl, pHmkl, pWmkl };
+    mkldnn::memory::dims dilation  = { dDmkl, dHmkl, dWmkl };
 
     // input type
     mkldnn::memory::data_type xType = input->dataType() == DataType::FLOAT32 ? mkldnn::memory::data_type::f32 : mkldnn::memory::data_type::bf16;
@@ -438,7 +444,6 @@ PLATFORM_CHECK(deconv3d) {
 
     return block.isUseMKLDNN() && (
             (xType==DataType::FLOAT32 && wType==DataType::FLOAT32 && bType==DataType::FLOAT32 && zType==DataType::FLOAT32) ||
-            (xType==DataType::HALF    && wType==DataType::HALF    && bType==DataType::HALF    && zType==DataType::HALF   ) ||
             ((xType==DataType::UINT8 || xType==DataType::INT8) && wType==DataType::INT8 && (zType==DataType::UINT8 || zType==DataType::INT8 || zType==DataType::INT32 || zType==DataType::FLOAT32) && bType == zType)
           );
 }
diff --git a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp
index 8a1afdedb..084fb760b 100644
--- a/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp
+++ b/libnd4j/include/ops/declarable/platform/mkldnn/mkldnnUtils.cpp
@@ -20,6 +20,7 @@
 
 #include <mkldnn_types.h>
 #include "mkldnnUtils.h"
+#include <ops/declarable/helpers/convolutions.h>
 
 using namespace mkldnn;
 
@@ -154,6 +155,14 @@ namespace nd4j {
             mkldnn::memory::dims conv_bias_tz = { oC };
             mkldnn::memory::dims conv_dst_tz = { bS, oC, oH, oW };
 
+            int dHmkl(dH), dWmkl(dW), pHmkl(pH), pWmkl(pW);
+            nd4j::ops::ConvolutionUtils::calcPaddingAndDilationForConv2DMKL(iH, iW, oH, oW, kH, kW, sH, sW, isSameMode, pHmkl, pWmkl, dHmkl, dWmkl);
+
+            conv_strides   = { sH, sW };
+            conv_padding   = { pH, pW };
+            conv_padding_r = { pHmkl, pWmkl };
+            conv_dilation  = { dHmkl, dWmkl };
+
             conv_strides   = { sH, sW };
             conv_padding   = { pH, pW };
             conv_dilation  = { dH-1, dW-1};
@@ -234,12 +243,13 @@ namespace nd4j {
             mkldnn::memory::dims conv_bias_tz = { oC };
             mkldnn::memory::dims conv_dst_tz = { bS, oC, oD, oH, oW };
 
+            int dDmkl(dD), dHmkl(dH), dWmkl(dW), pDmkl(pD), pHmkl(pH), pWmkl(pW);
+            nd4j::ops::ConvolutionUtils::calcPaddingAndDilationForConv3DMKL(iD, iH, iW, oD, oH, oW, kD, kH, kW, sD, sH, sW, isSameMode, pDmkl, pHmkl, pWmkl, dDmkl, dHmkl, dWmkl);
+
             conv_strides   = { sD, sH, sW };
-            conv_dilation  = { dD-1, dH-1, dW-1};
             conv_padding   = { pD, pH, pW };
-            conv_padding_r = { (oD - 1) * sD - iD + kD - pD,
-                               (oH - 1) * sH - iH + kH - pH,
-                               (oW - 1) * sW - iW + kW - pW };
+            conv_padding_r = { pDmkl, pHmkl, pWmkl };
+            conv_dilation  = { dDmkl, dHmkl, dWmkl };
 
             auto type = mkldnn::memory::data_type::f32;
             auto format = isNCDHW ? mkldnn::memory::format_tag::ncdhw : mkldnn::memory::format_tag::ndhwc;
diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
index 353e51ad3..23208ce1f 100644
--- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests1.cpp
@@ -2137,9 +2137,9 @@ TEST_F(ConvolutionTests1, deconv2d_test1) {
     int paddingMode = 0;             // 1-SAME, 0-VALID;
     int dataFormat  = 1;             // 1-NHWC, 0-NCHW
 
-    auto input    = NDArrayFactory::create<double>('c', {bS, iH, iW, iC});
-    auto weights  = NDArrayFactory::create<double>('c', {kH, kW, oC, iC});
-    auto exp = NDArrayFactory::create<double>('c', {bS, oH, oW, oC}, {  2.75,   7.75,  12.75,  17.75,  22.75, 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 27.75,  32.75,  37.75,  42.75,  47.75,
+    auto input    = NDArrayFactory::create<float>('c', {bS, iH, iW, iC});
+    auto weights  = NDArrayFactory::create<float>('c', {kH, kW, oC, iC});
+    auto exp = NDArrayFactory::create<float>('c', {bS, oH, oW, oC}, {  2.75,   7.75,  12.75,  17.75,  22.75, 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 27.75,  32.75,  37.75,  42.75,  47.75,
                                                   55.5 ,  65.5 ,  75.5 ,  85.5 ,  95.5 ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,105.5 , 115.5 , 125.5 , 135.5 , 145.5 ,
                                                   55.5 ,  65.5 ,  75.5 ,  85.5 ,  95.5 ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,105.5 , 115.5 , 125.5 , 135.5 , 145.5 ,
                                                   52.75,  57.75,  62.75,  67.75,  72.75,130.5 , 140.5 , 150.5 , 160.5 , 170.5 ,130.5 , 140.5 , 150.5 , 160.5 , 170.5 , 77.75,  82.75,  87.75,  92.75,  97.75,
@@ -2170,9 +2170,9 @@ TEST_F(ConvolutionTests1, deconv2d_test2) {
     int paddingMode = 1;             // 1-SAME, 0-VALID;
     int dataFormat  = 1;             // 1-NHWC, 0-NCHW
 
-    auto input    = NDArrayFactory::create<double>('c', {bS, oH, oW, oC});
-    auto weights  = NDArrayFactory::create<double>('c', {kH, kW, iC, oC});
-    auto exp = NDArrayFactory::create<double>('c', {bS, iH, iW, iC}, {2.75,   7.75,  12.75,  17.75,  22.75, 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 ,
+    auto input    = NDArrayFactory::create<float>('c', {bS, oH, oW, oC});
+    auto weights  = NDArrayFactory::create<float>('c', {kH, kW, iC, oC});
+    auto exp = NDArrayFactory::create<float>('c', {bS, iH, iW, iC}, {2.75,   7.75,  12.75,  17.75,  22.75, 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 , 30.5 ,  40.5 ,  50.5 ,  60.5 ,  70.5 ,
                                                 55.5 ,  65.5 ,  75.5 ,  85.5 ,  95.5 ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,
                                                 55.5 ,  65.5 ,  75.5 ,  85.5 ,  95.5 ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,
                                                 55.5 ,  65.5 ,  75.5 ,  85.5 ,  95.5 ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,161.  , 181.  , 201.  , 221.  , 241.  ,
@@ -2194,6 +2194,39 @@ TEST_F(ConvolutionTests1, deconv2d_test2) {
     delete results;
 }
 
+//////////////////////////////////////////////////////////////////////
+TEST_F(ConvolutionTests1, deconv2d_test3) {
+
+    int bS=1, oH=5,oW=5,  oC=3,iC=2,  kH=2,kW=2,  sH=1,sW=1,  pH=0,pW=0,  dH=2,dW=2;
+    int       iH=3,iW=3;
+    int paddingMode = 0;             // 1-SAME, 0-VALID;
+    int dataFormat  = 1;             // 1-NHWC, 0-NCHW
+
+    auto input    = NDArrayFactory::create<float>('c', {bS, iH, iW, iC});
+    auto weights  = NDArrayFactory::create<float>('c', {kH, kW, oC, iC});
+    auto bias     = NDArrayFactory::create<float>('c', {oC});
+
+    auto exp = NDArrayFactory::create<float>('c', {bS, oH, oW, oC}, {-2.9, -6.8, -10.7, -2.6, -6.1, -9.6, -16.9, -23.9, -30.9, -13.1, -16.6, -20.1, -11.6, -14.7, -17.8, -2.0, -4.7, -7.4, -1.7, -4.0, -6.3, -11.5, -16.1,
+                                -20.7, -8.6, -10.9, -13.2, -7.1, -9.0, -10.9, -27.4, -32.8, -38.2, -24.4, -29.0, -33.6, -65.0, -74.2, -83.4, -38.2, -42.8, -47.4,
+                                -32.8, -36.6, -40.4, -18.2, -20.9, -23.6, -15.5, -17.8, -20.1, -39.1, -43.7, -48.3, -22.4, -24.7, -27.0, -18.5, -20.4, -22.3, -10.1, -11.6, -13.1,
+                                -7.4, -8.5, -9.6, -19.3, -21.5, -23.7, -10.7, -11.8, -12.9, -6.8, -7.5, -8.2});
+
+    input.linspace(-10, 0.5);
+    weights.linspace(0.1, 0.1);
+    bias = 0.2;
+
+    nd4j::ops::deconv2d op;
+    auto results = op.execute({&input, &weights}, {}, {kH,kW,  sH,sW,  pH,pW,  dH,dW, paddingMode, dataFormat});
+    ASSERT_EQ(Status::OK(), results->status());
+
+    auto output = results->at(0);
+
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete results;
+}
+
 //////////////////////////////////////////////////////////////////////
 TYPED_TEST(TypedConvolutionTests1, deconv2d_tf_test1) {
 
diff --git a/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp b/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp
index c20271dd0..836ad123b 100644
--- a/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp
+++ b/libnd4j/tests_cpu/layers_tests/ConvolutionTests2.cpp
@@ -567,6 +567,52 @@ TEST_F(ConvolutionTests2, deconv3d_test4) {
     delete results;
 }
 
+//////////////////////////////////////////////////////////////////////
+TEST_F(ConvolutionTests2, deconv3d_test5) {
+
+    int bS=1, oD=5,oH=5,oW=5,  oC=3,iC=2,  kD=2,kH=2,kW=2,  sD=1,sH=1,sW=1,  pD=0,pH=0,pW=0,  dD=2,dH=2,dW=2;
+    int       iD=3,iH=3,iW=3;
+    int paddingMode = 0;             // 1-SAME, 0-VALID;
+    int dataFormat  = 1;             // 1-NHWC, 0-NCHW
+
+    auto input    = NDArrayFactory::create<float>('c', {bS, iD, iH, iW, iC});
+    auto weights  = NDArrayFactory::create<float>('c', {kD, kH, kW, oC, iC});
+    auto bias     = NDArrayFactory::create<float>('c', {oC});
+
+    auto exp = NDArrayFactory::create<float>('c', {bS, oD, oH, oW, oC}, {-2.9, -6.8, -10.7, -2.6, -6.1, -9.6, -16.9, -23.9, -30.9, -13.1, -16.6, -20.1, -11.6, -14.7, -17.8, -2.0, -4.7, -7.4, -1.7, -4.0, -6.3, -11.5,
+                -16.1, -20.7, -8.6, -10.9, -13.2, -7.1, -9.0, -10.9, -27.4, -32.8, -38.2, -24.4, -29.0, -33.6, -65.0, -74.2, -83.4, -38.2, -42.8, -47.4, -32.8,
+                -36.6, -40.4, -18.2, -20.9, -23.6, -15.5, -17.8, -20.1, -39.1, -43.7, -48.3, -22.4, -24.7, -27.0, -18.5, -20.4, -22.3, -10.1, -11.6, -13.1, -7.4,
+                -8.5, -9.6, -19.3, -21.5, -23.7, -10.7, -11.8, -12.9, -6.8, -7.5, -8.2, -0.2, -0.5, -0.8, 0.1, 0.2, 0.3, -0.7, -0.5, -0.3, 0.4, 0.5, 0.6, 1.9, 2.4,
+                2.9, 0.7, 1.6, 2.5, 1.0, 2.3, 3.6, 4.7, 7.3, 9.9, 4.9, 6.2, 7.5, 6.4, 8.1, 9.8, -0.4, 1.4, 3.2, 2.6, 5.2, 7.8, 10.6, 15.8, 21.0, 10.4, 13.0, 15.6,
+                15.8, 19.2, 22.6, 6.1, 7.0, 7.9, 8.8, 10.1, 11.4, 20.3, 22.9, 25.5, 12.7, 14.0, 15.3, 16.6, 18.3, 20.0, 14.2, 16.3, 18.4, 16.9, 19.4, 21.9, 40.1,
+                45.1, 50.1, 24.4, 26.9, 29.4, 28.3, 31.2, 34.1, -47.2, -47.8, -48.4, -41.8, -41.6, -41.4, -85.4, -85., -84.6, -41.2, -41.0, -40.8, -33.4, -32.4, -31.4,
+                -31., -29.2, -27.4, -25.6, -23.0, -20.4, -45.8, -40.6, -35.4, -17.8, -15.2, -12.6, -10.0, -6.6, -3.2, -65.6, -62.0, -58.4, -50.0, -44.8, -39.6, -89.2,
+                -78.8, -68.4, -34.4, -29.2, -24., -14.0, -7.2, -0.4, -20.2, -18.4, -16.6, -10., -7.4, -4.8, -14.6, -9.4, -4.2, -2.2, 0.4, 3.0, 10.4, 13.8, 17.2, 10.4,
+                14.6, 18.8, 20.6, 25.6, 30.6, 53.8, 63.8, 73.8, 35.6, 40.6, 45.6, 48.2, 54.0, 59.8, -3.8, -4.1, -4.4, 1.3, 1.4, 1.5, 1.7, 1.9, 2.1, 1.6, 1.7, 1.8, 7.9,
+                8.4, 8.9, 11.5, 12.4, 13.3, 16.6, 17.9, 19.2, 35.9, 38.5, 41.1, 20.5, 21.8, 23.1, 26.8, 28.5, 30.2, 21.2, 23.0, 24.8, 33.8, 36.4, 39.0, 73.0, 78.2,
+                83.4, 41.6, 44.2, 46.8, 56.6, 60.0, 63.4, 16.9, 17.8, 18.7, 24.4, 25.7, 27., 51.5, 54.1, 56.7, 28.3, 29.6, 30.9, 37.0, 38.7, 40.4, 39.4, 41.5,
+                43.6, 46.9, 49.4, 51.9, 100.1, 105.1, 110.1, 54.4, 56.9, 59.4, 63.1, 66.0, 68.9, 42.1, 45.4, 48.7, 47.2, 50.9, 54.6, 104.3, 111.7,
+                119.1, 58.3, 62.0, 65.7, 64.6, 68.7, 72.8, 57.4, 61.9, 66.4, 62.5, 67.4, 72.3, 138.5, 148.3, 158.1, 77.2, 82.1, 87.0, 83.5, 88.8, 94.1,
+                134.6, 143.6, 152.6, 147.2, 157.0, 166.8, 321.4, 341.0, 360.6, 176.6, 186.4, 196.2, 191.6, 202.2, 212.8, 84.4, 88.9,
+                93.4, 91.9, 96.8, 101.7, 197.3, 207.1, 216.9, 106.6, 111.5, 116.4, 115.3, 120.6, 125.9, 106.9, 112.6, 118.3, 114.4, 120.5, 126.6, 245.9, 258.1, 270.3, 132.7, 138.8, 144.9, 141.4, 147.9, 154.4});
+
+    input.linspace(-10, 0.5);
+    weights.linspace(0.1, 0.1);
+    bias = 0.2;
+
+    nd4j::ops::deconv3d op;
+    auto results = op.execute({&input, &weights}, {}, {kD,kH,kW,  sD,sH,sW,  pD,pH,pW,  dD,dH,dW, paddingMode, dataFormat});
+    ASSERT_EQ(Status::OK(), results->status());
+
+    auto output = results->at(0);
+    // output->printBuffer();
+
+    ASSERT_TRUE(exp.isSameShape(output));
+    ASSERT_TRUE(exp.equalsTo(output));
+
+    delete results;
+}
+
 //////////////////////////////////////////////////////////////////////
 TEST_F(ConvolutionTests2, deconv3d_bp_test1) {
 
diff --git a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp
index 1a459a012..d29d1f0e1 100644
--- a/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp
+++ b/libnd4j/tests_cpu/layers_tests/DeclarableOpsTests16.cpp
@@ -161,4 +161,29 @@ TEST_F(DeclarableOpsTests16, test_empty_cast_1) {
     ASSERT_EQ(e, *result->at(0));
 
     delete result;
+}
+
+TEST_F(DeclarableOpsTests16, test_range_1) {
+    nd4j::ops::range op;
+    auto z = NDArrayFactory::create<float>('c', {200});
+
+    Context ctx(1);
+    ctx.setTArguments({-1.0, 1.0, 0.01});
+    ctx.setOutputArray(0, &z);
+
+    auto status = op.execute(&ctx);
+    ASSERT_EQ(Status::OK(), status);
+}
+
+TEST_F(DeclarableOpsTests16, test_range_2) {
+    nd4j::ops::range op;
+    auto z = NDArrayFactory::create<float>('c', {200});
+
+    double tArgs[] = {-1.0, 1.0, 0.01};
+
+    auto shapes = ::calculateOutputShapes2(nullptr, op.getOpHash(), nullptr, nullptr, 0, tArgs, 3, nullptr, 0, nullptr, 0);
+    shape::printShapeInfoLinear("Result", shapes->at(0));
+    ASSERT_TRUE(shape::shapeEquals(z.shapeInfo(), shapes->at(0)));
+
+    delete shapes;
 }
\ No newline at end of file
diff --git a/libnd4j/tests_cpu/layers_tests/testinclude.h b/libnd4j/tests_cpu/layers_tests/testinclude.h
index f27623cff..79607cdc9 100644
--- a/libnd4j/tests_cpu/layers_tests/testinclude.h
+++ b/libnd4j/tests_cpu/layers_tests/testinclude.h
@@ -24,7 +24,7 @@
 #include <string>
 #include <op_boilerplate.h>
 
-//http://stackoverflow.com/questions/228005/alternative-to-itoa-for-converting-integer-to-string-c
+//https://stackoverflow.com/questions/228005/alternative-to-itoa-for-converting-integer-to-string-c
 FORCEINLINE std::string int_array_to_string(Nd4jLong int_array[], Nd4jLong size_of_array) {
     std::string returnstring = "[";
     for (int temp = 0; temp < size_of_array; temp++) {
diff --git a/nd4j/README.md b/nd4j/README.md
index f26adea75..1d41d4403 100644
--- a/nd4j/README.md
+++ b/nd4j/README.md
@@ -41,12 +41,12 @@ To install ND4J, there are a couple of approaches, and more information can be f
 
 #### Install from Maven Central
 
-1. Search for nd4j in the [Maven Central Repository](http://mvnrepository.com/search?q=nd4j) to find the available nd4j jars.
+1. Search for nd4j in the [Maven Central Repository](https://search.maven.org/search?q=nd4j) to find the available nd4j jars.
 2. Include the appropriate dependency in your pom.xml.
 
 #### Clone from the GitHub Repo
 
-https://deeplearning4j.org/buildinglocally 
+https://deeplearning4j.org/docs/latest/deeplearning4j-build-from-source 
 ## Contribute
 
 1. Check for open issues, or open a new issue to start a discussion around a feature idea or a bug.
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml
index 21924f80a..b4a374baf 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/pom.xml
@@ -192,12 +192,6 @@
         </dependency>
 
 
-        <dependency>
-            <groupId>org.objenesis</groupId>
-            <artifactId>objenesis</artifactId>
-            <version>${objenesis.version}</version>
-        </dependency>
-
 
         <!-- oshi: Used for collecting system information for system info reporting -->
         <dependency>
@@ -206,22 +200,6 @@
             <version>${oshi.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>ch.qos.logback</groupId>
-            <artifactId>logback-classic</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>ch.qos.logback</groupId>
-            <artifactId>logback-core</artifactId>
-            <scope>test</scope>
-        </dependency>
-
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java
index 668a7a4a9..7b1cc5768 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/autodiff/samediff/ops/SDNN.java
@@ -69,7 +69,7 @@ public class SDNN extends SDOps {
 
     /**
      * Neural network batch normalization operation.<br>
-     * For details, see <a href="http://arxiv.org/abs/1502.03167">http://arxiv.org/abs/1502.03167</a>
+     * For details, see <a href="https://arxiv.org/abs/1502.03167">https://arxiv.org/abs/1502.03167</a>
      *
      * @param name     Name of the output variable
      * @param input    Input variable.
@@ -139,7 +139,7 @@ public class SDNN extends SDOps {
      * out = a * (exp(x) - 1) if x <= 0<br>
      * with constant a = 1.0
      * <p>
-     * See: <a href="http://arxiv.org/abs/1511.07289">http://arxiv.org/abs/1511.07289</a>
+     * See: <a href="https://arxiv.org/abs/1511.07289">https://arxiv.org/abs/1511.07289</a>
      *
      * @param x Input variable
      * @return Output variable
@@ -154,7 +154,7 @@ public class SDNN extends SDOps {
      * out = a * (exp(x) - 1) if x <= 0<br>
      * with constant a = 1.0
      * <p>
-     * See: <a href="http://arxiv.org/abs/1511.07289">http://arxiv.org/abs/1511.07289</a>
+     * See: <a href="https://arxiv.org/abs/1511.07289">https://arxiv.org/abs/1511.07289</a>
      *
      * @param name Output variable name
      * @param x    Input variable
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java
index 478305e76..9221d601c 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/activations/impl/ActivationRReLU.java
@@ -34,7 +34,7 @@ import org.nd4j.shade.jackson.annotation.JsonIgnoreProperties;
  *  alpha is drawn from uniform(l,u) during training and is set to l+u/2 during test
  *  l and u default to 1/8 and 1/3 respectively
  *
- *  <a href="http://arxiv.org/abs/1505.00853">
+ *  <a href="https://arxiv.org/abs/1505.00853">
  *  Empirical Evaluation of Rectified Activations in Convolutional Network</a>
  */
 @EqualsAndHashCode(callSuper = false)
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/OpContext.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/OpContext.java
index cd74a60a0..e66d52f91 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/OpContext.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/OpContext.java
@@ -128,4 +128,12 @@ public interface OpContext extends AutoCloseable {
      * @param reallyInplace
      */
     void markInplace(boolean reallyInplace);
+
+    /**
+     * This method allows to enable/disable use of platform helpers within ops. I.e. mkldnn or cuDNN.
+     * PLEASE NOTE: default value is True
+     *
+     * @param reallyAllow
+     */
+    void allowHelpers(boolean reallyAllow);
 }
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BitCast.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BitCast.java
index ee0adfb94..43bff11e6 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BitCast.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/custom/BitCast.java
@@ -1,25 +1,54 @@
 package org.nd4j.linalg.api.ops.custom;
 
+import lombok.val;
 import org.nd4j.autodiff.samediff.SDVariable;
 import org.nd4j.autodiff.samediff.SameDiff;
+import org.nd4j.imports.graphmapper.tf.TFGraphMapper;
 import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.api.ops.DynamicCustomOp;
+import org.nd4j.linalg.api.shape.options.ArrayOptionsHelper;
 import org.nd4j.linalg.factory.Nd4j;
+import org.tensorflow.framework.AttrValue;
+import org.tensorflow.framework.GraphDef;
+import org.tensorflow.framework.NodeDef;
+
+import java.util.Map;
 
 public class BitCast extends DynamicCustomOp {
     public BitCast() {}
 
+    public BitCast(INDArray in, DataType dataType, INDArray out) {
+        this(in, dataType.toInt(), out);
+    }
+
     public BitCast(INDArray in, int dataType, INDArray out) {
         inputArguments.add(in);
         outputArguments.add(out);
         iArguments.add(Long.valueOf(dataType));
     }
 
+    public BitCast(INDArray in, DataType dataType) {
+        this(in, dataType.toInt());
+    }
+
+    public BitCast(INDArray in, int dataType) {
+        inputArguments.add(in);
+        iArguments.add(Long.valueOf(dataType));
+    }
+
     public BitCast(SameDiff sameDiff, SDVariable in, SDVariable dataType) {
         super("", sameDiff, new SDVariable[]{in, dataType});
     }
 
+    @Override
+    public void initFromTensorFlow(NodeDef nodeDef, SameDiff initWith, Map<String, AttrValue> attributesForNode, GraphDef graph) {
+        TFGraphMapper.initFunctionFromProperties(nodeDef.getOp(), this, attributesForNode, nodeDef, graph);
+        val t = nodeDef.getAttrOrDefault("type", null);
+        val type = ArrayOptionsHelper.convertToDataType(t.getType());
+        addIArgument(type.toInt());
+    }
+
     @Override
     public String opName() {
         return "bitcast";
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/scalar/LeakyReLU.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/scalar/LeakyReLU.java
index fe70de288..b9a98dc6e 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/scalar/LeakyReLU.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/scalar/LeakyReLU.java
@@ -34,7 +34,7 @@ import org.tensorflow.framework.NodeDef;
  * Out(x) = x if x >= 0<br>
  * Leaky ReLU may avoid zero gradient "dying ReLU" problem by having non-zero
  * gradient below 0.<br>
- * See for example http://arxiv.org/abs/1505.00853 for a comparison of
+ * See for example https://arxiv.org/abs/1505.00853 for a comparison of
  * ReLU variants.
  *
  * @author Alex Black
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/MaxOut.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/MaxOut.java
index 05993cd7f..939ed854b 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/MaxOut.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/MaxOut.java
@@ -33,7 +33,7 @@ import java.util.List;
 
 /**
  * Max out activation:
- * http://arxiv.org/pdf/1302.4389.pdf
+ * https://arxiv.org/pdf/1302.4389.pdf
  *
  * @author Adam Gibson
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/ELU.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/ELU.java
index 6923639fd..c4fc245b7 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/ELU.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/impl/transforms/strict/ELU.java
@@ -32,7 +32,7 @@ import java.util.List;
  * Introduced in paper:<br>
  * Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)<br>
  * Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter (2015)<br>
- * <a href="http://arxiv.org/abs/1511.07289">http://arxiv.org/abs/1511.07289</a>
+ * <a href="https://arxiv.org/abs/1511.07289">https://arxiv.org/abs/1511.07289</a>
  *
  * @author Alex Black
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/DistributionUniform.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/DistributionUniform.java
index 0744533ba..ecc76a1b2 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/DistributionUniform.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/ops/random/custom/DistributionUniform.java
@@ -74,6 +74,7 @@ public class DistributionUniform extends DynamicCustomOp {
         AttrValue v = attributesForNode.get("dtype");
         dataType = TFGraphMapper.convertType(v.getType());
         addIArgument(dataType.toInt());
+        addTArgument(0.0, 1.0); //TF version is hardcoded 0 to 1
     }
 
     protected void addArgs() {
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/shape/Shape.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/shape/Shape.java
index 51711b3d2..44298ffa2 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/shape/Shape.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/api/shape/Shape.java
@@ -3607,6 +3607,13 @@ public class Shape {
             return ArrayUtil.prodLong(shape);
     }
 
+    public static long lengthOf(int[] shape) {
+        if (shape.length == 0)
+            return 1L;
+        else
+            return ArrayUtil.prodLong(shape);
+    }
+
     /**
      * Calculate the length of the buffer required to store the given shape with the given strides
      *
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/iterator/SamplingDataSetIterator.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/iterator/SamplingDataSetIterator.java
index c33b37565..cc6fba068 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/iterator/SamplingDataSetIterator.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/dataset/api/iterator/SamplingDataSetIterator.java
@@ -28,11 +28,6 @@ import java.util.List;
  * @author Adam Gibson
  */
 public class SamplingDataSetIterator implements DataSetIterator {
-
-    /**
-     *
-     */
-    private static final long serialVersionUID = -2700563801361726914L;
     private DataSet sampleFrom;
     private int batchSize;
     private int totalNumberSamples;
@@ -145,6 +140,4 @@ public class SamplingDataSetIterator implements DataSetIterator {
         numTimesSampled++;
         return ret;
     }
-
-
 }
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
index 25960a8a8..c95dc5ef2 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/factory/Nd4j.java
@@ -1164,26 +1164,15 @@ public class Nd4j {
      * @param type  the opType to create
      * @return the created buffer
      */
-    public static DataBuffer createBuffer(int[] shape, DataType type) {
-        long length = ArrayUtil.prodLong(shape);
-
-        if (type == DataType.INT)
-            return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createInt(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
-        else if (type == DataType.LONG)
-            return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createLong(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
-        else if (type == DataType.HALF)
-            return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createHalf(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
-        else if (type == DataType.DOUBLE)
-            return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createDouble(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
-        else
-            return Nd4j.getMemoryManager().getCurrentWorkspace() == null ? DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true) : DATA_BUFFER_FACTORY_INSTANCE.createFloat(length, true, Nd4j.getMemoryManager().getCurrentWorkspace());
+    public static DataBuffer createBuffer(@NonNull int[] shape, @NonNull DataType type) {
+        return createBuffer(ArrayUtil.toLongArray(shape), type);
     }
 
     /**
      * See {@link  #createBuffer(int[], DataType)}
      */
-    public static DataBuffer createBuffer(long[] shape, DataType type) {
-        long length = ArrayUtil.prodLong(shape);
+    public static DataBuffer createBuffer(@NonNull long[] shape, @NonNull DataType type) {
+        long length = Shape.lengthOf(shape);
 
         switch (type) {
             case BOOL:
@@ -1229,14 +1218,14 @@ public class Nd4j {
      * @return the created buffer.
      */
     public static DataBuffer createBufferDetached(int[] shape, DataType type) {
-        return createBufferDetachedImpl( ArrayUtil.prodLong(shape), type);
+        return createBufferDetachedImpl( Shape.lengthOf(shape), type);
     }
 
     /**
      * See {@link  #createBufferDetached(int[], DataType)}
      */
     public static DataBuffer createBufferDetached(long[] shape, DataType type) {
-        return createBufferDetachedImpl( ArrayUtil.prodLong(shape), type);
+        return createBufferDetachedImpl( Shape.lengthOf(shape), type);
     }
 
     // used by createBufferDetached(long[] DataType) and createBufferDetached(int[] , DataType)
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaMaxUpdater.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaMaxUpdater.java
index c398dad72..20a908f1e 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaMaxUpdater.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdaMaxUpdater.java
@@ -32,7 +32,7 @@ import java.util.Map;
 
 /**
  * The AdaMax updater, a variant of Adam.
- * http://arxiv.org/abs/1412.6980
+ * https://arxiv.org/abs/1412.6980
  *
  * @author Justin Long
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdamUpdater.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdamUpdater.java
index 8d7709873..e68af09f7 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdamUpdater.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/AdamUpdater.java
@@ -30,7 +30,7 @@ import java.util.Map;
 
 /**
  * The Adam updater.
- * http://arxiv.org/abs/1412.6980
+ * https://arxiv.org/abs/1412.6980
  *
  * @author Adam Gibson
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java
index 00956589a..848bb3408 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/AdaMax.java
@@ -28,7 +28,7 @@ import java.util.Map;
 
 /**
  * The AdaMax updater, a variant of Adam.
- * http://arxiv.org/abs/1412.6980
+ * https://arxiv.org/abs/1412.6980
  *
  * @author Justin Long
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java
index 22ebe06f3..6901af59c 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/learning/config/Adam.java
@@ -29,7 +29,7 @@ import java.util.Map;
 
 /**
  * The Adam updater.
- * http://arxiv.org/abs/1412.6980
+ * https://arxiv.org/abs/1412.6980
  *
  * @author Adam Gibson
  */
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/string/NDArrayStrings.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/string/NDArrayStrings.java
index c28f35151..f5c0d5c9e 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/string/NDArrayStrings.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-api/src/main/java/org/nd4j/linalg/string/NDArrayStrings.java
@@ -46,6 +46,9 @@ public class NDArrayStrings {
 
     public static final String EMPTY_ARRAY_STR = "[]";
 
+    private static final String[] OPEN_BRACKETS =  new String[]{"", "[", "[[", "[[[", "[[[[", "[[[[[", "[[[[[[", "[[[[[[[", "[[[[[[[["};
+    private static final String[] CLOSE_BRACKETS = new String[]{"", "]", "]]", "]]]", "]]]]", "]]]]]", "]]]]]]", "]]]]]]]", "]]]]]]]]"};
+
     /**
      * The default number of elements for printing INDArrays (via NDArrayStrings or INDArray.toString)
      */
@@ -190,29 +193,29 @@ public class NDArrayStrings {
 
     private String format(INDArray arr, int offset, boolean summarize) {
         int rank = arr.rank();
-        if (arr.isScalar()) {
+        if (arr.isScalar() || arr.length() == 1) {
+            int fRank = Math.min(rank, OPEN_BRACKETS.length-1);
             if (arr.isR()) {
-                //true scalar i.e shape = [] not legacy which is [1,1]
                 double arrElement = arr.getDouble(0);
                 if (!dontOverrideFormat && ((Math.abs(arrElement) < this.minToPrintWithoutSwitching && arrElement != 0) || (Math.abs(arrElement) >= this.maxToPrintWithoutSwitching))) {
                     //switch to scientific notation
                     String asString = localeIndifferentDecimalFormat(scientificFormat).format(arrElement);
                     //from E to small e
                     asString = asString.replace('E', 'e');
-                    return asString;
+                    return OPEN_BRACKETS[fRank] + asString + CLOSE_BRACKETS[fRank];
                 } else {
-                    if (arr.getDouble(0) == 0) return "0";
-                    return decimalFormat.format(arr.getDouble(0));
+                    if (arr.getDouble(0) == 0) return OPEN_BRACKETS[fRank] + "0" + CLOSE_BRACKETS[fRank];
+                    return OPEN_BRACKETS[fRank] + decimalFormat.format(arr.getDouble(0)) + CLOSE_BRACKETS[fRank];
                 }
             } else if (arr.isZ()) {
                 long arrElement = arr.getLong(0);
-                return String.valueOf(arrElement);
+                return OPEN_BRACKETS[fRank] + arrElement + CLOSE_BRACKETS[fRank];
             } else if (arr.isB()) {
                 long arrElement = arr.getLong(0);
-                return arrElement == 0 ? "false" : "true";
+                return OPEN_BRACKETS[fRank] + (arrElement == 0 ? "false" : "true") + CLOSE_BRACKETS[fRank];
             } else if (arr.dataType() == DataType.UTF8){
                 String s = arr.getString(0);
-                return "\"" + s.replaceAll("\n","\\n") + "\"";
+                return OPEN_BRACKETS[fRank] + "\"" + s.replaceAll("\n","\\n") + "\"" + CLOSE_BRACKETS[fRank];
             } else
                 throw new ND4JIllegalStateException();
         } else if (rank == 1) {
@@ -246,9 +249,10 @@ public class NDArrayStrings {
                     //hack fix for slice issue with 'f' order
                     if (arr.ordering() == 'f' && arr.rank() > 2 && arr.size(arr.rank() - 1) == 1) {
                         sb.append(format(arr.dup('c').slice(i), offset, summarize));
-                    } else if(arr.rank() <= 1 || arr.length() == 1) {
-                        sb.append(format(Nd4j.scalar(arr.getDouble(0)),offset,summarize));
                     }
+//                    else if(arr.rank() <= 1 || arr.length() == 1) {
+//                        sb.append(format(Nd4j.scalar(arr.getDouble(0)),offset,summarize));
+//                    }
                     else {
                         sb.append(format(arr.slice(i), offset, summarize));
                     }
diff --git a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
index 8f621668b..d4a7b8f8b 100644
--- a/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
+++ b/nd4j/nd4j-backends/nd4j-api-parent/nd4j-native-api/src/main/java/org/nd4j/nativeblas/NativeOps.java
@@ -1123,6 +1123,7 @@ public interface NativeOps {
     void setGraphContextTArguments(OpaqueContext ptr, DoublePointer arguments, int numberOfArguments);
     void setGraphContextIArguments(OpaqueContext ptr, LongPointer arguments, int numberOfArguments);
     void setGraphContextBArguments(OpaqueContext ptr, BooleanPointer arguments, int numberOfArguments);
+    void ctxAllowHelpers(OpaqueContext ptr, boolean reallyAllow);
     void deleteGraphContext(OpaqueContext ptr);
 
     OpaqueRandomGenerator createRandomGenerator(long rootSeed, long nodeSeed);
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
index 8fe744b38..20f2b5f22 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaExecutioner.java
@@ -2226,7 +2226,7 @@ public class CudaExecutioner extends DefaultOpExecutioner {
 
         cnt = 0;
         for (val t: op.tArgs())
-            tArgs.put(cnt++, (float) t);
+            tArgs.put(cnt++, t);
 
         OpaqueShapeList ptrptr = nativeOps.calculateOutputShapes2(null, hash, inputBuffers, inputShapes, op.inputArguments().length, tArgs, op.tArgs().length, iArgs, op.iArgs().length, bArgs, op.numBArguments());
 
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
index 32f1b0a10..b75f688fe 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/linalg/jcublas/ops/executioner/CudaOpContext.java
@@ -136,4 +136,9 @@ public class CudaOpContext extends BaseOpContext implements OpContext {
     public void markInplace(boolean reallyInplace) {
         nativeOps.markGraphContextInplace(context, reallyInplace);
     }
+
+    @Override
+    public void allowHelpers(boolean reallyAllow) {
+        nativeOps.ctxAllowHelpers(context, reallyAllow);
+    }
 }
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
index fecb64012..e8b5e15c9 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-cuda/src/main/java/org/nd4j/nativeblas/Nd4jCuda.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.1-1: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.2: DO NOT EDIT THIS FILE
 
 package org.nd4j.nativeblas;
 
@@ -575,6 +575,8 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
         public native void setDebug(@Cast("bool") boolean reallyDebug);
         public native void setProfiling(@Cast("bool") boolean reallyProfile);
         public native void setLeaksDetector(@Cast("bool") boolean reallyDetect);
+        public native @Cast("bool") boolean helpersAllowed();
+        public native void allowHelpers(@Cast("bool") boolean reallyAllow);
         
         public native int tadThreshold();
         public native void setTadThreshold(int threshold);
@@ -585,6 +587,13 @@ public class Nd4jCuda extends org.nd4j.nativeblas.Nd4jCudaHelper {
         public native int maxThreads();
         public native void setMaxThreads(int max);
 
+        public native int maxMasterThreads();
+        public native void setMaxMasterThreads(int max);
+
+        public native void setMaxPrimaryMemory(@Cast("uint64_t") long maxBytes);
+        public native void setMaxSpecialyMemory(@Cast("uint64_t") long maxBytes);
+        public native void setMaxDeviceMemory(@Cast("uint64_t") long maxBytes);
+
         public native @Cast("bool") boolean isUseMKLDNN();
         public native void setUseMKLDNN(@Cast("bool") boolean useMKLDNN);
 
@@ -3087,6 +3096,7 @@ public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr);
 
 public native OpaqueContext createGraphContext(int nodeId);
 public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr);
+public native void ctxAllowHelpers(OpaqueContext ptr, @Cast("bool") boolean reallyAllow);
 public native void markGraphContextInplace(OpaqueContext ptr, @Cast("bool") boolean reallyInplace);
 public native void setGraphContextCudaContext(OpaqueContext ptr, Pointer stream, Pointer reductionPointer, Pointer allocationPointer);
 public native void setGraphContextInputArray(OpaqueContext ptr, int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo);
@@ -5454,6 +5464,10 @@ NDArray& NDArray::operator()(const Nd4jLong* idx) {
 
         
 
+        
+
+        
+
         
 
         
@@ -6740,7 +6754,20 @@ NDArray& NDArray::operator()(const Nd4jLong* idx) {
             public native void setBArguments(@Cast("bool*") BooleanPointer arguments, int numberOfArguments);
             public native void setBArguments(@Cast("bool*") boolean[] arguments, int numberOfArguments);
 
+            public native void setTArguments(@StdVector DoublePointer tArgs);
+            public native void setTArguments(@StdVector DoubleBuffer tArgs);
+            public native void setTArguments(@StdVector double[] tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector LongPointer tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector LongBuffer tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector long[] tArgs);
+            public native void setBArguments(@Cast("bool*") @StdVector BooleanPointer tArgs);
+            public native void setBArguments(@Cast("bool*") @StdVector boolean[] tArgs);
+
             public native void setCudaContext(@Cast("Nd4jPointer") Pointer cudaStream, @Cast("Nd4jPointer") Pointer reductionPointer, @Cast("Nd4jPointer") Pointer allocationPointer);
+
+
+            public native void allowHelpers(@Cast("bool") boolean reallyAllow);
+            public native @Cast("bool") boolean helpersAllowed();
         }
     
 
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/pom.xml b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/pom.xml
index c6017e3a7..33e54bd4a 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/pom.xml
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/pom.xml
@@ -34,13 +34,6 @@
             <classifier>${dependency.classifier}</classifier>
         </dependency>
 
-        <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-core</artifactId>
-            <version>5.0.2.RELEASE</version>
-            <scope>test</scope>
-        </dependency>
-
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>javacpp</artifactId>
@@ -87,73 +80,10 @@
             <artifactId>nd4j-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-        </dependency>
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-jackson</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>ch.qos.logback</groupId>
-            <artifactId>logback-classic</artifactId>
-            <version>${logback.version}</version>
-            <scope>test</scope>
-        </dependency>
-
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-tensorflow</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
-        </dependency>
-
-        <!-- Reflections: required in one of the tests -->
-        <dependency>
-            <groupId>org.reflections</groupId>
-            <artifactId>reflections</artifactId>
-            <version>${reflections.version}</version>
-            <scope>test</scope>
-            <exclusions>
-                <exclusion>
-                    <groupId>com.google.code.findbugs</groupId>
-                    <artifactId>*</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
     </dependencies>
 
     <build>
         <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-surefire-plugin</artifactId>
-                <configuration>
-                    <environmentVariables>
-                        <LD_LIBRARY_PATH>${env.LD_LIBRARY_PATH}:${user.dir}:${libnd4jhome}/blasbuild/cpu/blas/</LD_LIBRARY_PATH>
-                    </environmentVariables>
-                    <includes>
-                        <include>*.java</include>
-                        <include>**/*.java</include>
-                        <include>**/Test*.java</include>
-                        <include>**/*Test.java</include>
-                        <include>**/*TestCase.java</include>
-                    </includes>
-                    <junitArtifactName>junit:junit</junitArtifactName>
-                    <systemPropertyVariables>
-                        <org.nd4j.linalg.defaultbackend>org.nd4j.linalg.cpu.nativecpu.CpuBackend</org.nd4j.linalg.defaultbackend>
-                        <org.nd4j.linalg.tests.backendstorun>org.nd4j.linalg.cpu.nativecpu.CpuBackend</org.nd4j.linalg.tests.backendstorun>
-                    </systemPropertyVariables>
-                    <!--
-                        Maximum heap size was set to 8g, as a minimum required value for tests run.
-                        Depending on a build machine, default value is not always enough.
-                    -->
-                    <argLine>-Ddtype=float -Xmx8g</argLine>
-                </configuration>
-            </plugin>
             <plugin>
                 <artifactId>maven-compiler-plugin</artifactId>
                 <executions>
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
index 9431a3453..6700f9019 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/linalg/cpu/nativecpu/ops/CpuOpContext.java
@@ -105,4 +105,9 @@ public class CpuOpContext extends BaseOpContext implements OpContext {
     public void markInplace(boolean reallyInplace) {
         nativeOps.markGraphContextInplace(context, reallyInplace);
     }
+
+    @Override
+    public void allowHelpers(boolean reallyAllow) {
+        nativeOps.ctxAllowHelpers(context, reallyAllow);
+    }
 }
diff --git a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
index 06c061fad..e2e9b0c2f 100644
--- a/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
+++ b/nd4j/nd4j-backends/nd4j-backend-impls/nd4j-native/src/main/java/org/nd4j/nativeblas/Nd4jCpu.java
@@ -575,6 +575,8 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
         public native void setDebug(@Cast("bool") boolean reallyDebug);
         public native void setProfiling(@Cast("bool") boolean reallyProfile);
         public native void setLeaksDetector(@Cast("bool") boolean reallyDetect);
+        public native @Cast("bool") boolean helpersAllowed();
+        public native void allowHelpers(@Cast("bool") boolean reallyAllow);
         
         public native int tadThreshold();
         public native void setTadThreshold(int threshold);
@@ -585,6 +587,13 @@ public class Nd4jCpu extends org.nd4j.nativeblas.Nd4jCpuHelper {
         public native int maxThreads();
         public native void setMaxThreads(int max);
 
+        public native int maxMasterThreads();
+        public native void setMaxMasterThreads(int max);
+
+        public native void setMaxPrimaryMemory(@Cast("uint64_t") long maxBytes);
+        public native void setMaxSpecialyMemory(@Cast("uint64_t") long maxBytes);
+        public native void setMaxDeviceMemory(@Cast("uint64_t") long maxBytes);
+
         public native @Cast("bool") boolean isUseMKLDNN();
         public native void setUseMKLDNN(@Cast("bool") boolean useMKLDNN);
 
@@ -3087,6 +3096,7 @@ public native void deleteShapeBuffer(OpaqueConstantDataBuffer ptr);
 
 public native OpaqueContext createGraphContext(int nodeId);
 public native OpaqueRandomGenerator getGraphContextRandomGenerator(OpaqueContext ptr);
+public native void ctxAllowHelpers(OpaqueContext ptr, @Cast("bool") boolean reallyAllow);
 public native void markGraphContextInplace(OpaqueContext ptr, @Cast("bool") boolean reallyInplace);
 public native void setGraphContextCudaContext(OpaqueContext ptr, Pointer stream, Pointer reductionPointer, Pointer allocationPointer);
 public native void setGraphContextInputArray(OpaqueContext ptr, int index, Pointer buffer, Pointer shapeInfo, Pointer specialBuffer, Pointer specialShapeInfo);
@@ -6744,7 +6754,20 @@ NDArray& NDArray::operator()(const Nd4jLong* idx) {
             public native void setBArguments(@Cast("bool*") BooleanPointer arguments, int numberOfArguments);
             public native void setBArguments(@Cast("bool*") boolean[] arguments, int numberOfArguments);
 
+            public native void setTArguments(@StdVector DoublePointer tArgs);
+            public native void setTArguments(@StdVector DoubleBuffer tArgs);
+            public native void setTArguments(@StdVector double[] tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector LongPointer tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector LongBuffer tArgs);
+            public native void setIArguments(@Cast("Nd4jLong*") @StdVector long[] tArgs);
+            public native void setBArguments(@Cast("bool*") @StdVector BooleanPointer tArgs);
+            public native void setBArguments(@Cast("bool*") @StdVector boolean[] tArgs);
+
             public native void setCudaContext(@Cast("Nd4jPointer") Pointer cudaStream, @Cast("Nd4jPointer") Pointer reductionPointer, @Cast("Nd4jPointer") Pointer allocationPointer);
+
+
+            public native void allowHelpers(@Cast("bool") boolean reallyAllow);
+            public native @Cast("bool") boolean helpersAllowed();
         }
     
 
@@ -11383,6 +11406,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 // #elif _MSC_VER
 // #define FORCEINLINE __forceinline
 // #elif __GNUC__
+// #define INLINE_LOOPS
 // #define FORCEINLINE __attribute__((always_inline)) inline 
 // #elif __CUDACC__ 
 // #else
@@ -21680,7 +21704,7 @@ public static final int TAD_THRESHOLD = TAD_THRESHOLD();
 //         #endif
 
         /**
-         * This operation performs batch normalization of layer, it is based on following article http://arxiv.org/abs/1502.03167.
+         * This operation performs batch normalization of layer, it is based on following article https://arxiv.org/abs/1502.03167.
          * Expected arguments:
          * x: input 4D array of shape [bS,iH,iW,iD] (data format = NHWC) or [bS,iD,iH,iW] (data format = NCHW), where
          *    bS - batch size
diff --git a/nd4j/nd4j-backends/nd4j-tests/pom.xml b/nd4j/nd4j-backends/nd4j-tests/pom.xml
index 5f1d372ff..50fa24bf9 100644
--- a/nd4j/nd4j-backends/nd4j-tests/pom.xml
+++ b/nd4j/nd4j-backends/nd4j-tests/pom.xml
@@ -57,12 +57,7 @@
         </plugins>
     </build>
     <dependencies>
-         <dependency>
-            <groupId>org.springframework</groupId>
-            <artifactId>spring-core</artifactId>
-            <version>5.0.2.RELEASE</version>
-            <scope>test</scope>
-        </dependency>
+
 
         <dependency>
             <groupId>junit</groupId>
@@ -105,10 +100,12 @@
             <artifactId>logback-core</artifactId>
             <version>${logback.version}</version>
         </dependency>
+
         <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-kafka_${scala.binary.version}</artifactId>
-            <version>${project.version}</version>
+            <groupId>org.springframework</groupId>
+            <artifactId>spring-core</artifactId>
+            <version>5.0.2.RELEASE</version>
+            <scope>test</scope>
         </dependency>
 
         <dependency>
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/BaseNd4jTest.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/BaseNd4jTest.java
index a9582b6ad..c3c94e1ed 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/BaseNd4jTest.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/BaseNd4jTest.java
@@ -20,16 +20,13 @@ package org.nd4j.linalg;
 import lombok.val;
 import org.bytedeco.javacpp.Pointer;
 import org.junit.After;
-import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.Rule;
 import org.junit.rules.TestName;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-import org.nd4j.config.ND4JEnvironmentVars;
 import org.nd4j.config.ND4JSystemProperties;
 import org.nd4j.linalg.api.buffer.DataType;
-import org.nd4j.linalg.api.environment.Nd4jEnvironment;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.factory.Nd4jBackend;
@@ -38,7 +35,6 @@ import org.nd4j.linalg.util.ArrayUtil;
 import org.nd4j.nativeblas.NativeOpsHolder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import scala.collection.mutable.StringBuilder;
 
 import java.lang.management.ManagementFactory;
 import java.util.*;
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ToStringTest.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ToStringTest.java
index 97d952fea..42b895f76 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ToStringTest.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/ToStringTest.java
@@ -31,6 +31,7 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.ndarray.INDArray;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.factory.Nd4jBackend;
+import org.nd4j.linalg.util.ArrayUtil;
 
 @RunWith(Parameterized.class)
 @Slf4j
@@ -57,6 +58,30 @@ public class ToStringTest extends BaseNd4jTest {
                 Nd4j.createFromArray(1, 2, 3, 4, 5, 6, 7, 8).toString(6, true, 1));
     }
 
+    @Test
+    public void testToStringScalars(){
+        DataType[] dataTypes = new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.BOOL, DataType.INT, DataType.UINT32};
+        String[] strs = new String[]{"1.0000", "1.0000", "true", "1", "1"};
+
+        for(int dt=0; dt<5; dt++ ) {
+            for (int i = 0; i < 5; i++) {
+                long[] shape = ArrayUtil.nTimes(i, 1L);
+                INDArray scalar = Nd4j.scalar(1.0f).castTo(dataTypes[dt]).reshape(shape);
+                String str = scalar.toString();
+                StringBuilder sb = new StringBuilder();
+                for (int j = 0; j < i; j++) {
+                    sb.append("[");
+                }
+                sb.append(strs[dt]);
+                for (int j = 0; j < i; j++) {
+                    sb.append("]");
+                }
+                String exp = sb.toString();
+                assertEquals("Rank: " + i + ", DT: " + dataTypes[dt], exp, str);
+            }
+        }
+    }
+
     @Override
     public char ordering() {
         return 'c';
diff --git a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
index ad38f39d7..556405c14 100644
--- a/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
+++ b/nd4j/nd4j-backends/nd4j-tests/src/test/java/org/nd4j/linalg/custom/CustomOpsTests.java
@@ -931,4 +931,36 @@ public class CustomOpsTests extends BaseNd4jTest {
         Nd4j.exec(new KnnMinDistance(point, lowest, highest, distance));
         System.out.println(distance);
     }
+
+
+    @Test
+    public void testRange(){
+        DynamicCustomOp op = DynamicCustomOp.builder("range")
+                .addFloatingPointArguments(-1.0, 1.0, 0.01)
+                .build();
+
+        List<LongShapeDescriptor> lsd = op.calculateOutputShape();
+        //System.out.println("Calculated output shape: " + Arrays.toString(lsd.get(0).getShape()));
+        op.setOutputArgument(0, Nd4j.create(lsd.get(0)));
+
+        Nd4j.exec(op);
+    }
+
+    @Test
+    public void testBitCastShape_1(){
+        val out = Nd4j.createUninitialized(1,10);
+        BitCast op = new BitCast(Nd4j.zeros(DataType.FLOAT,1,10), DataType.INT.toInt(), out);
+        List<LongShapeDescriptor> lsd = op.calculateOutputShape();
+        assertEquals(1, lsd.size());
+        assertArrayEquals(new long[]{1,10}, lsd.get(0).getShape());
+    }
+
+    @Test
+    public void testBitCastShape_2(){
+        val out = Nd4j.createUninitialized(1,10);
+        BitCast op = new BitCast(Nd4j.zeros(DataType.DOUBLE,1,10), DataType.INT.toInt(), out);
+        List<LongShapeDescriptor> lsd = op.calculateOutputShape();
+        assertEquals(1, lsd.size());
+        assertArrayEquals(new long[]{1,10, 2}, lsd.get(0).getShape());
+    }
 }
diff --git a/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java b/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java
index 3bcff03f0..c77f945d0 100644
--- a/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java
+++ b/nd4j/nd4j-common/src/main/java/org/nd4j/config/ND4JEnvironmentVars.java
@@ -137,6 +137,39 @@ public class ND4JEnvironmentVars {
      */
     public static final String ND4J_IGNORE_AVX = "ND4J_IGNORE_AVX";
 
+    /**
+     * This variable defines how many threads will be used in ThreadPool for parallel execution of linear algebra.
+     * Default value: number of threads supported by this system.
+     */
+    public static final String SD_MAX_THREADS = "SD_MAX_THREADS";
+
+    /**
+     * This variable defines how many threads will be used for any 1 linear algebra operation.
+     * Default value: number of threads supported by this system.
+     */
+    public static final String SD_MASTER_THREADS = "SD_MASTER_THREADS";
+
+    /**
+     * If set, this variable disables use of optimized platform helpers (i.e. mkldnn or cuDNN)
+     */
+    public static final String SD_FORBID_HELPERS = "SD_FORBID_HELPERS";
+
+    /**
+     * If set, this variables defines how much memory application is allowed to use off-heap.
+     * PLEASE NOTE: this option is separate from JVM XMS/XMX options
+     */
+    public static final String SD_MAX_PRIMARY_BYTES = "SD_MAX_PRIMARY_BYTES";
+
+    /**
+     * If set, this variable defines how much memory application is allowed to use ON ALL computational devices COMBINED.
+     */
+    public static final String SD_MAX_SPECIAL_BYTES = "SD_MAX_SPECIAL_BYTES";
+
+    /**
+     * If set, this variable defines how much memory application is allowed to use on any one computational device
+     */
+    public static final String SD_MAX_DEVICE_BYTES = "SD_MAX_DEVICE_BYTES";
+
     private ND4JEnvironmentVars() {
     }
 }
diff --git a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java
index caeb0d47b..e1408e298 100644
--- a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java
+++ b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/ArrayUtil.java
@@ -1495,7 +1495,7 @@ public class ArrayUtil {
 
     }
 
-    //Credit: http://stackoverflow.com/questions/15533854/converting-byte-array-to-double-array
+    //Credit: https://stackoverflow.com/questions/15533854/converting-byte-array-to-double-array
 
     /**
      *
diff --git a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/MathUtils.java b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/MathUtils.java
index a46238a7a..c32b43669 100644
--- a/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/MathUtils.java
+++ b/nd4j/nd4j-common/src/main/java/org/nd4j/linalg/util/MathUtils.java
@@ -107,7 +107,7 @@ public class MathUtils {
     }
 
     /**
-     * See: <a href="http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2">http://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2</a>
+     * See: <a href="https://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2">https://stackoverflow.com/questions/466204/rounding-off-to-nearest-power-of-2</a>
      *
      * @param v the number to getFromOrigin the next power of 2 for
      * @return the next power of 2 for the passed in value
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
index 734b1b738..21b3f6b65 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/pom.xml
@@ -29,29 +29,6 @@
     <name>nd4j-parameter-server-client</name>
 
     <dependencies>
-
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons-codec.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpclient</artifactId>
-            <version>${httpclient.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpcore</artifactId>
-            <version>${httpcore.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpmime</artifactId>
-            <version>${httpmime.version}</version>
-        </dependency>
-
-
         <dependency>
             <groupId>com.mashape.unirest</groupId>
             <artifactId>unirest-java</artifactId>
@@ -72,11 +49,6 @@
             <artifactId>nd4j-aeron</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <version>${commons-io.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.zeroturnaround</groupId>
             <artifactId>zt-exec</artifactId>
@@ -89,12 +61,6 @@
             <version>${project.version}</version>
             <scope>test</scope>
         </dependency>
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-parameter-server-status_2.11</artifactId>
-            <version>${project.version}</version>
-            <scope>test</scope>
-        </dependency>
 
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/BaseNd4jTest.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/BaseNd4jTest.java
index 8688671bc..36958198d 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/BaseNd4jTest.java
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/BaseNd4jTest.java
@@ -29,7 +29,6 @@ import org.nd4j.linalg.api.buffer.DataType;
 import org.nd4j.linalg.api.memory.MemoryWorkspace;
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.profiler.ProfilerConfig;
-import scala.collection.mutable.StringBuilder;
 
 import java.lang.management.ManagementFactory;
 import java.util.List;
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/background/BackgroundDaemonStarter.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/background/BackgroundDaemonStarter.java
index 894b20189..aa32ba514 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/background/BackgroundDaemonStarter.java
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-client/src/test/java/org/nd4j/parameterserver/background/BackgroundDaemonStarter.java
@@ -31,7 +31,7 @@ import java.util.concurrent.TimeoutException;
 /**
  * Start background daemons for tests
  * Credit to:
- * http://stackoverflow.com/questions/636367/executing-a-java-application-in-a-separate-process
+ * https://stackoverflow.com/questions/636367/executing-a-java-application-in-a-separate-process
  * @author Adam Gibson
  */
 @Slf4j
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
index dd50f938e..62bb98c1c 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server-status/pom.xml
@@ -47,106 +47,11 @@
             <artifactId>nd4j-parameter-server</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>joda-time</groupId>
-            <artifactId>joda-time</artifactId>
-            <version>${jodatime.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.hibernate</groupId>
-            <artifactId>hibernate-validator</artifactId>
-            <version>${hibernate.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-library</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.scala-lang</groupId>
-            <artifactId>scala-reflect</artifactId>
-            <version>${scala.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.yaml</groupId>
-            <artifactId>snakeyaml</artifactId>
-            <version>${snakeyaml.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-core</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-databind</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.core</groupId>
-            <artifactId>jackson-annotations</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-jdk8</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.fasterxml.jackson.datatype</groupId>
-            <artifactId>jackson-datatype-jsr310</artifactId>
-            <version>${jackson.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe</groupId>
-            <artifactId>config</artifactId>
-            <version>${typesafe.config.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>com.typesafe.play</groupId>
-            <artifactId>play-java_2.11</artifactId>
-            <version>${playframework.version}</version>
-            <exclusions>
-              <exclusion>
-                <groupId>ch.qos.logback</groupId>
-                <artifactId>logback-core</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>ch.qos.logback</groupId>
-                <artifactId>logback-classic</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>com.google.code.findbugs</groupId>
-                <artifactId>jsr305</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>org.slf4j</groupId>
-                <artifactId>jul-to-slf4j</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>org.slf4j</groupId>
-                <artifactId>jcl-over-slf4j</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>org.apache.tomcat</groupId>
-                <artifactId>tomcat-servlet-api</artifactId>
-              </exclusion>
-              <exclusion>
-                <groupId>net.jodah</groupId>
-                <artifactId>typetools</artifactId>
-              </exclusion>
-            </exclusions>
-        </dependency>
 
         <dependency>
-            <groupId>net.jodah</groupId>
-            <artifactId>typetools</artifactId>
-            <version>${jodah.typetools.version}</version>
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+          <scope>test</scope>
         </dependency>
 
         <dependency>
@@ -156,9 +61,39 @@
         </dependency>
 
         <dependency>
-          <groupId>junit</groupId>
-          <artifactId>junit</artifactId>
-          <scope>test</scope>
+            <groupId>com.typesafe.play</groupId>
+            <artifactId>play-java_2.11</artifactId>
+            <version>${playframework.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>ch.qos.logback</groupId>
+                    <artifactId>logback-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>ch.qos.logback</groupId>
+                    <artifactId>logback-classic</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.google.code.findbugs</groupId>
+                    <artifactId>jsr305</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jul-to-slf4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jcl-over-slf4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.tomcat</groupId>
+                    <artifactId>tomcat-servlet-api</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>net.jodah</groupId>
+                    <artifactId>typetools</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
     </dependencies>
 
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
index 1122f90d7..af7316a37 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/pom.xml
@@ -34,39 +34,6 @@
             <artifactId>nd4j-parameter-server-model</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons-codec.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpclient</artifactId>
-            <version>${httpclient.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpcore</artifactId>
-            <version>${httpcore.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.httpcomponents</groupId>
-            <artifactId>httpmime</artifactId>
-            <version>${httpmime.version}</version>
-        </dependency>
-
-        <dependency>
-            <groupId>com.mashape.unirest</groupId>
-            <artifactId>unirest-java</artifactId>
-            <version>${unirest.version}</version>
-        </dependency>
-
-        <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-jackson</artifactId>
-            <version>${project.version}</version>
-        </dependency>
-
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-log4j12</artifactId>
@@ -76,16 +43,20 @@
             <groupId>junit</groupId>
             <artifactId>junit</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.nd4j</groupId>
+            <artifactId>nd4j-aeron</artifactId>
+            <version>${project.version}</version>
+        </dependency>
         <dependency>
             <groupId>com.beust</groupId>
             <artifactId>jcommander</artifactId>
             <version>${jcommander.version}</version>
         </dependency>
-
         <dependency>
-            <groupId>org.nd4j</groupId>
-            <artifactId>nd4j-aeron</artifactId>
-            <version>${project.version}</version>
+            <groupId>com.mashape.unirest</groupId>
+            <artifactId>unirest-java</artifactId>
+            <version>${unirest.version}</version>
         </dependency>
     </dependencies>
 
diff --git a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/src/main/java/org/nd4j/parameterserver/util/CheckSocket.java b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/src/main/java/org/nd4j/parameterserver/util/CheckSocket.java
index c550176c8..de88ff27a 100644
--- a/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/src/main/java/org/nd4j/parameterserver/util/CheckSocket.java
+++ b/nd4j/nd4j-parameter-server-parent/nd4j-parameter-server/src/main/java/org/nd4j/parameterserver/util/CheckSocket.java
@@ -20,7 +20,7 @@ import java.io.IOException;
 import java.net.*;
 
 /**
- * Credit: http://stackoverflow.com/questions/5226905/test-if-remote-port-is-in-use
+ * Credit: https://stackoverflow.com/questions/5226905/test-if-remote-port-is-in-use
  *
  *
  */
diff --git a/nd4j/nd4j-remote/nd4j-grpc-client/pom.xml b/nd4j/nd4j-remote/nd4j-grpc-client/pom.xml
index aa60e9586..9dbdcbf24 100644
--- a/nd4j/nd4j-remote/nd4j-grpc-client/pom.xml
+++ b/nd4j/nd4j-remote/nd4j-grpc-client/pom.xml
@@ -74,12 +74,14 @@
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
             <version>${logback.version}</version>
+            <scope>test</scope>
         </dependency>
 
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-core</artifactId>
             <version>${logback.version}</version>
+            <scope>test</scope>
         </dependency>
     </dependencies>
 
diff --git a/nd4j/nd4j-serde/nd4j-arrow/pom.xml b/nd4j/nd4j-serde/nd4j-arrow/pom.xml
index 4e4ba462e..f16583745 100644
--- a/nd4j/nd4j-serde/nd4j-arrow/pom.xml
+++ b/nd4j/nd4j-serde/nd4j-arrow/pom.xml
@@ -39,16 +39,6 @@
             <artifactId>nd4j-api</artifactId>
             <version>${project.version}</version>
         </dependency>
-        <dependency>
-            <groupId>com.carrotsearch</groupId>
-            <artifactId>hppc</artifactId>
-            <version>${hppc.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>joda-time</groupId>
-            <artifactId>joda-time</artifactId>
-            <version>${jodatime.version}</version>
-        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-vector</artifactId>
diff --git a/nd4j/nd4j-serde/nd4j-kryo/pom.xml b/nd4j/nd4j-serde/nd4j-kryo/pom.xml
index 8d046ecf4..850413b1d 100644
--- a/nd4j/nd4j-serde/nd4j-kryo/pom.xml
+++ b/nd4j/nd4j-serde/nd4j-kryo/pom.xml
@@ -94,26 +94,7 @@
             <version>${jkserializers.version}</version>
         </dependency>
 
-        <dependency>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-            <version>${commons-codec.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <version>${commons-io.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-lang3</artifactId>
-            <version>${commons-lang3.version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-            <version>${asm.version}</version>
-        </dependency>
+
         <dependency>
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-core_2.11</artifactId>
diff --git a/pom.xml b/pom.xml
index 1f8e0dca9..f907a58ed 100644
--- a/pom.xml
+++ b/pom.xml
@@ -453,133 +453,6 @@
                 </plugins>
             </build>
         </profile>
-        <profile>
-            <id>s3-repo</id>
-            <activation>
-                <property>
-                    <name>local.software.repository</name>
-                    <value>s3-repo</value>
-                </property>
-            </activation>
-            <distributionManagement>
-                <snapshotRepository>
-                    <id>s3-repo</id>
-                    <name>s3-repo</name>
-                    <url>s3://${s3.repo.url}</url>
-                </snapshotRepository>
-            </distributionManagement>
-        </profile>
-        <profile>
-            <id>skymindnexus-skil</id>
-            <activation>
-                <property>
-                    <name>local.software.repository</name>
-                    <value>skymindnexus-skil</value>
-                </property>
-            </activation>
-            <distributionManagement>
-                <repository>
-                    <id>skymindnexus</id>
-                    <name>skymindnexus</name>
-                    <url>https://nexus.skymind.io/repository/skil/</url>
-                </repository>
-            </distributionManagement>
-        </profile>
-        <profile>
-            <id>local-nexus</id>
-            <activation>
-                <property>
-                    <name>local.software.repository</name>
-                    <value>nexus</value>
-                </property>
-            </activation>
-            <distributionManagement>
-                <snapshotRepository>
-                    <id>local-nexus</id>
-                    <name>local-nexus</name>
-                    <url>
-                        http://master-jenkins.skymind.io:8088/repository/snapshots
-                    </url>
-                </snapshotRepository>
-            </distributionManagement>
-            <build>
-                <plugins>
-                    <plugin>
-                        <artifactId>maven-deploy-plugin</artifactId>
-                        <version>${maven-deploy-plugin.version}</version>
-                        <configuration>
-                            <skip>true</skip>
-                        </configuration>
-                    </plugin>
-                    <plugin>
-                        <groupId>org.sonatype.plugins</groupId>
-                        <artifactId>nexus-staging-maven-plugin</artifactId>
-                        <version>1.6.6</version>
-                        <executions>
-                            <execution>
-                                <id>default-deploy</id>
-                                <phase>deploy</phase>
-                                <goals>
-                                    <goal>deploy</goal>
-                                </goals>
-                            </execution>
-                        </executions>
-                        <extensions>true</extensions>
-                        <configuration>
-                            <serverId>local-nexus</serverId>
-                            <nexusUrl>http://master-jenkins.skymind.io:8088/</nexusUrl>
-                            <skipStagingRepositoryClose>true</skipStagingRepositoryClose>
-                        </configuration>
-                    </plugin>
-                </plugins>
-            </build>
-        </profile>
-        <profile>
-            <id>local-jfrog</id>
-            <activation>
-                <property>
-                    <name>local.software.repository</name>
-                    <value>jfrog</value>
-                </property>
-            </activation>
-            <distributionManagement>
-                <snapshotRepository>
-                    <id>local-jfrog</id>
-                    <name>local-jfrog</name>
-                    <url>http://master-jenkins.skymind.io:8081/artifactory/libs-snapshot-local
-                    </url>
-                </snapshotRepository>
-                <repository>
-                    <id>local-jfrog</id>
-                    <name>local-jfrog</name>
-                    <url>http://master-jenkins.skymind.io:8081/artifactory/libs-release-local
-                    </url>
-                </repository>
-            </distributionManagement>
-        </profile>
-        <profile>
-            <id>Bintray-artifactory</id>
-            <activation>
-                <property>
-                    <name>local.software.repository</name>
-                    <value>bintray</value>
-                </property>
-            </activation>
-            <distributionManagement>
-                <snapshotRepository>
-                    <id>bintray-deeplearning4j-maven</id>
-                    <name>deeplearning4j-maven-snapshots</name>
-                    <url>https://oss.jfrog.org/artifactory/oss-snapshot-local</url>
-                </snapshotRepository>
-                <repository>
-                    <id>bintray-deeplearning4j-maven</id>
-                    <name>deeplearning4j-maven-releases</name>
-                    <url>
-                        https://api.bintray.com/maven/deeplearning4j/maven/${project.artifactId}/;publish=1
-                    </url>
-                </repository>
-            </distributionManagement>
-        </profile>
         <profile>
             <id>sonatype-nexus</id>
             <activation>
diff --git a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
index ca5ddf0f2..796780fb9 100644
--- a/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
+++ b/rl4j/rl4j-core/src/main/java/org/deeplearning4j/rl4j/learning/sync/qlearning/discrete/QLearningDiscrete.java
@@ -44,7 +44,7 @@ import java.util.ArrayList;
  *
  * DQN or Deep Q-Learning in the Discrete domain
  *
- * http://arxiv.org/abs/1312.5602
+ * https://arxiv.org/abs/1312.5602
  *
  */
 public abstract class QLearningDiscrete<O extends Encodable> extends QLearning<O, Integer, DiscreteSpace> {