Below you will find my code.
In the code below, when
// SDVariable out = sd.nn.softmax("out", layer0.mmul(w1).add(b1));
SDVariable out = sd.nn.softmax("out", layer0.mmul(w1FromArray).add(b1));
the error message is
“An input placeholder “input” is required to calculate the requested outputs, but a placeholder value was not provided”
when
SDVariable out = sd.nn.softmax("out", layer0.mmul(w1).add(b1));
// SDVariable out = sd.nn.softmax("out", layer0.mmul(w1FromArray).add(b1));
the code runs through the first iteration then fails while attempting to execute the second because the value of #timesteps has changed. The error is the following:
"ShapeUtils::evalShapeForMatmul static method: input shapes are inconsistent: xDim 14 != yDim 33 "
private static int lastTrainCount = 0;
private static int lastTestCount = 0;
private static SequenceRecordReader trainFeatures;
private static SequenceRecordReader trainLabels;
private static DataSetIterator trainData;
private static SequenceRecordReader testFeatures;
private static SequenceRecordReader testLabels;
private static DataSetIterator testData;
private static NormalizerStandardize normalizer;
//Properties for dataset:
private static int nIn = 6;
private static int nOut = 2;
private static int miniBatchSize = 32;
private static int numLabelClasses = -1;
private static SameDiff sd = SameDiff.create();
private static long dim0 = 0L;
private static long dim1 = 0L;
private static long dim2 = 0L;
private static DataSet t;
private static INDArray w1Array;
public static void sameDiff3() throws IOException, InterruptedException
{
trainFeatures = new CSVSequenceRecordReader();
trainFeatures.initialize(new NumberedFileInputSplit(featuresDirTrain.getAbsolutePath() + "/%d.csv", 0, lastTrainCount));
trainLabels = new CSVSequenceRecordReader();
trainLabels.initialize(new NumberedFileInputSplit(labelsDirTrain.getAbsolutePath() + "/%d.csv", 0, lastTrainCount));
trainData = new SequenceRecordReaderDataSetIterator(trainFeatures, trainLabels, miniBatchSize, numLabelClasses,
true, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END);
testFeatures = new CSVSequenceRecordReader();
testFeatures.initialize(new NumberedFileInputSplit(featuresDirTest.getAbsolutePath() + "/%d.csv", 0, lastTestCount));
testLabels = new CSVSequenceRecordReader();
testLabels.initialize(new NumberedFileInputSplit(labelsDirTest.getAbsolutePath() + "/%d.csv", 0, lastTestCount));
testData = new SequenceRecordReaderDataSetIterator(testFeatures, testLabels, miniBatchSize, numLabelClasses,
true, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END);
normalizer = new NormalizerStandardize();
normalizer.fitLabel(true);
normalizer.fit(trainData); //Collect the statistics (mean/stdev) from the training data. This does not modify the input data
trainData.reset();
while(trainData.hasNext()) {
normalizer.transform(trainData.next()); //Apply normalization to the training data
}
while(testData.hasNext()) {
normalizer.transform(testData.next()); //Apply normalization to the test data. This is using statistics calculated from the *training* set
}
trainData.reset();
testData.reset();
trainData.setPreProcessor(normalizer);
testData.setPreProcessor(normalizer);
System.out.println(" Printing traindata dataset shape - 1");
DataSet data = trainData.next();
System.out.println(Arrays.toString(data.getFeatures().shape()));
System.out.println(" Printing testdata dataset shape - 1");
DataSet data2 = testData.next();
System.out.println(Arrays.toString(data2.getFeatures().shape()));
trainData.reset();
testData.reset();
UIServer uiServer = UIServer.getInstance();
StatsStorage statsStorage = new InMemoryStatsStorage(); //Alternative: new FileStatsStorage(File), for saving and loading later
uiServer.attach(statsStorage);
int listenerFrequency = 1;
sd.setListeners(new ScoreListener());
t = trainData.next();
dim0 = t.getFeatures().size(0);
dim1 = t.getFeatures().size(1);
dim2 = t.getFeatures().size(2);
trainData.reset();
getConfiguration();
int whileLoopIndex = 0;
Map<String,INDArray> placeholderData = new HashMap<>();
whileLoopIndex = -1;
trainData.reset();
while(trainData.hasNext()) {
++whileLoopIndex;
placeholderData = new HashMap<>();
t = trainData.next();
System.out.println(" ======================================================= - ");
System.out.println(" Printing traindata feature and label dataset shape");
System.out.println(Arrays.toString(t.getFeatures().shape()));
System.out.println(Arrays.toString(t.getLabels().shape()));
System.out.println(" ======================================================= - ");
INDArray features = t.getFeatures();
INDArray labels = t.getLabels();
placeholderData.put("input", features);
placeholderData.put("label", labels);
dim0 = t.getFeatures().size(0);
dim1 = t.getFeatures().size(1);
dim2 = t.getFeatures().size(2);
System.out.println(" features - dim0 - "+dim0);
System.out.println(" features - dim1 - "+dim1);
System.out.println(" features - dim2 - "+dim2);
History history = sd.fit(t);
System.out.println(" Completed training run --- ");
}
System.out.println(" Starting test data evaluation --- ");
String outputVariable = "out";
Evaluation evaluation = new Evaluation();
sd.evaluate(testData, outputVariable, evaluation);
System.out.println(" evaluation.stats() - "+evaluation.stats());
String pathToSavedNetwork = "src/main/assets/location_next_neural_network_v6_07.zip";
File savedNetwork = new File(pathToSavedNetwork);
sd.save(savedNetwork, true);
// ModelSerializer.addNormalizerToModel(savedNetwork, normalizer);
System.out.println("----- Example Complete -----");
File saveFileForInference = new File("src/main/assets/sameDiffExampleInference.fb");
try {
sd.asFlatFile(saveFileForInference);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static void getConfiguration()
{
SDVariable input = sd.placeHolder("input", DataType.FLOAT, miniBatchSize, nIn, t.getFeatures().size(2));
SDVariable label = sd.placeHolder("label", DataType.FLOAT, miniBatchSize, nOut, t.getLabels().size(2));
LSTMLayerConfig mLSTMConfiguration = LSTMLayerConfig.builder()
.lstmdataformat(LSTMDataFormat.NST)
// .lstmdataformat(LSTMDataFormat.NTS)
.directionMode(LSTMDirectionMode.FWD)
// .directionMode(LSTMDirectionMode.BIDIR_CONCAT)
.gateAct(LSTMActivations.SIGMOID)
.cellAct(LSTMActivations.SOFTPLUS)
.outAct(LSTMActivations.SOFTPLUS)
.retFullSequence(true)
.retLastC(false)
.retLastH(false)
.build();
LSTMLayerOutputs outputs = new LSTMLayerOutputs(sd.rnn.lstmLayer(
input,
LSTMLayerWeights.builder()
.weights(sd.var("weights", Nd4j.rand(DataType.FLOAT, nIn, 4 * nOut)))
.rWeights(sd.var("rWeights", Nd4j.rand(DataType.FLOAT, nOut, 4 * nOut)))
.bias(sd.var("bias", Nd4j.rand(DataType.FLOAT, 4 * nOut)))
.build(),
mLSTMConfiguration), mLSTMConfiguration);
System.out.println(" input.getShape()[0] - "+input.getShape()[0]);
System.out.println(" input.getShape()[1] - "+input.getShape()[1]);
System.out.println(" input.getShape()[2] - "+input.getShape()[2]);
SDVariable layer0 = outputs.getOutput();
// SDVariable layer1 = layer0.mean(1);
SDVariable w1 = sd.var("w1", new XavierInitScheme('c', nIn, nOut), DataType.FLOAT, miniBatchSize, t.getFeatures().size(2), t.getFeatures().size(2));
SDVariable b1 = sd.var("b1", Nd4j.rand(DataType.FLOAT, miniBatchSize, nOut, t.getFeatures().size(2)));
XavierInitScheme xavierInitScheme = new XavierInitScheme('c', nIn, nOut);
w1Array = xavierInitScheme.create(DataType.FLOAT, nIn, miniBatchSize, t.getFeatures().size(2), t.getFeatures().size(2));
SDVariable w1FromArray = sd.placeHolder("w1FromArray", DataType.FLOAT, miniBatchSize, t.getFeatures().size(2), t.getFeatures().size(2));
// SDVariable out = sd.nn.softmax("out", layer0.mmul(w1).add(b1));
SDVariable out = sd.nn.softmax("out", layer0.mmul(w1FromArray).add(b1));
SDVariable loss = sd.loss.logLoss("loss", label, out);
sd.setLossVariables("loss");
double learningRate = 1e-3;
TrainingConfig config = new TrainingConfig.Builder()
.l2(1e-4) //L2 regularization
.updater(new Adam(learningRate)) //Adam optimizer with specified learning rate
.dataSetFeatureMapping("input") //DataSet features array should be associated with variable "input"
.dataSetLabelMapping("label") //DataSet label array should be associated with variable "label"
.build();
sd.setTrainingConfig(config);
System.out.println(" Printing sd information");
// System.out.println(sd.toString());
System.out.println(sd.summary());
}