Params change in layers despite FrozenLayerWithBackprop

Hi,

I used FrozenLayerWithBackprop to freeze some layers in a dl4j model (for implementing GANs). But when I run

generatorNetwork.fit(zRandom, labelG);

and compare the network weights (params) before and after the call, I can see that the params have changed, even in the supposedly frozen layers.

Here’s my code:

  MultiLayerConfiguration genConf = new NeuralNetConfiguration.Builder().seed(seed)
            .weightInit(WeightInit.XAVIER)
            .seed(seed)
                .l2(l2)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
                .updater(updater).list()
            .layer(new DenseLayer.Builder().nIn(zRandomSize).nOut(generatorOnlyLayerSize).activation(activation).dropOut(dropOutProbability).build())
            .layer(new DenseLayer.Builder().activation(activation).nIn(generatorOnlyLayerSize).nOut(generatorOnlyLayerSize).dropOut(dropOutProbability).build())
            .layer(new DenseLayer.Builder().activation(activation).nIn(generatorOnlyLayerSize).nOut(width*height).dropOut(dropOutProbability).build())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(width * height).nOut(n1).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n1).nOut(n2).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
           .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n2).nOut(n3).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n3).nOut(n4).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new OutputLayer.Builder(lossFunction)
                .activation(activation).nIn(n4).nOut(1).build())).build();

  MultiLayerNetwork genNetwork = new MultiLayerNetwork(genConf);
        genNetwork.init();

Am I using it incorrectly? Am I misunderstanding it? Is there a bug?

Thank you, Don

@DonaldAlan mind setting up a reproducer? You might be running in to a bug. Wouldn’t hurt to check. The main reason we built those layers in the first place was for the transfer learning api. That should be well tested. I’d be curious to see the results there.

Here’s code that reproduces the problem:

package org.deeplearning4j.examples.advanced.modelling.bug;

import org.deeplearning4j.nn.api.Layer;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.MultiLayerConfiguration;
import org.deeplearning4j.nn.conf.NeuralNetConfiguration;
import org.deeplearning4j.nn.conf.layers.BatchNormalization;
import org.deeplearning4j.nn.conf.layers.DenseLayer;
import org.deeplearning4j.nn.conf.layers.OutputLayer;
import org.deeplearning4j.nn.conf.layers.misc.FrozenLayerWithBackprop;
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork;
import org.deeplearning4j.nn.weights.WeightInit;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.api.rng.distribution.impl.NormalDistribution;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.Adam;
import org.nd4j.linalg.learning.config.IUpdater;
import org.nd4j.linalg.lossfunctions.LossFunctions;
import java.text.NumberFormat;
import java.util.Arrays;

/**
 * @author donald Smith, zdl
 * Despite using FrozenLayerWithBackProp, running fit changes the params on the frozen layers.
 */
public class FrozenLayerWithBackpropBug {
    private final static int width = 256;
    private final static int height = 3;
    private static final IUpdater updater = Adam.builder().learningRate(1e-3).beta1(0.95).build();
    private final static double dropOutProbability = 0; //0.25; //0.25;
    private final static long seed = System.currentTimeMillis();
    private final static int batchSize = 200; // seems to learn better with big batches
    private final static LossFunctions.LossFunction lossFunction = LossFunctions.LossFunction.MSE;
    private final static Activation activation =  Activation.LEAKYRELU; // Activation.RELU;
    private final static int generatorOnlyLayerSize = 150;
    private final static int n1 = width*height;
    private final static int n2 = 200;
    private final static int n3 = 200;
    private final static int n4 = n3;
    private final static int zRandomSize = 90;
    //...................................
    private final static NumberFormat numberFormat8 = NumberFormat.getNumberInstance();
    static {
        numberFormat8.setMinimumFractionDigits(8);
        numberFormat8.setMaximumFractionDigits(8);
    }
    //--------------

    public static void main(String[] args) throws Exception {
        MultiLayerConfiguration genConf = new NeuralNetConfiguration.Builder().seed(seed)
            .weightInit(WeightInit.XAVIER)
            .seed(seed)
                .l2(0.01)
            .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
                .updater(updater).list()
            .layer(new DenseLayer.Builder().nIn(zRandomSize).nOut(generatorOnlyLayerSize).activation(activation).dropOut(dropOutProbability).build())
            .layer(new DenseLayer.Builder().activation(activation).nIn(generatorOnlyLayerSize).nOut(generatorOnlyLayerSize).dropOut(dropOutProbability).build())
            .layer(new DenseLayer.Builder().activation(activation).nIn(generatorOnlyLayerSize).nOut(width*height).dropOut(dropOutProbability).build())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(width * height).nOut(n1).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n1).nOut(n2).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
           .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n2).nOut(n3).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(n3).nOut(n4).activation(activation).dropOut(dropOutProbability).build()))
                .layer(new BatchNormalization())
            .layer(new FrozenLayerWithBackprop(new OutputLayer.Builder(lossFunction)
                .activation(activation).nIn(n4).nOut(1).build())).build();

        MultiLayerNetwork genNetwork = new MultiLayerNetwork(genConf);
        genNetwork.init();
        INDArray zRandom = Nd4j.rand(new NormalDistribution(), batchSize, zRandomSize); // [30,  20]
        INDArray labelG = Nd4j.rand(batchSize, 1); // [30, 1
        showWeightStats(genNetwork, "Generator");
        genNetwork.fit(zRandom, labelG);  // z: [30, 20],   labelG: [30, 1] = an array of 30 1s.
        showWeightStats(genNetwork, "Generator");
    }

    private static void showWeightStats(final MultiLayerNetwork net, final String name) {
        System.out.println("weightStats for " + name);
        for(int layerIndex = 0;layerIndex<net.getnLayers(); layerIndex++) {
            Layer layer = net.getLayer(layerIndex);
            INDArray params = layer.params();
            double min = params.minNumber().doubleValue();
            double max = params.maxNumber().doubleValue();
            System.out.println(layerIndex + ": " + Arrays.toString(params.shape())
                    + " in [" + numberFormat8.format(min) + " : " + numberFormat8.format(max) + "]");
        }
    }
}


Even if I remove gradientNormalization and the BatchNormalization layers, the params still change.

And the problem shows up on both CPU and GPU.