Hi guys,
Im trying to run the code from this page: How to build a custom object detector using Yolo, It is an object detector for a rubix cube.
Below the code, i did a little changes:
package com.dl4j.yolo.sample;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.util.List;
import java.util.Random;
import org.bytedeco.opencv.opencv_java;
import org.datavec.api.io.filters.BalancedPathFilter;
import org.datavec.api.io.labels.ParentPathLabelGenerator;
import org.datavec.api.records.metadata.RecordMetaDataImageURI;
import org.datavec.api.split.FileSplit;
import org.datavec.api.split.InputSplit;
import org.datavec.image.loader.NativeImageLoader;
import org.datavec.image.recordreader.objdetect.ObjectDetectionRecordReader;
import org.datavec.image.recordreader.objdetect.impl.VocLabelProvider;
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
import org.deeplearning4j.nn.api.OptimizationAlgorithm;
import org.deeplearning4j.nn.conf.ConvolutionMode;
import org.deeplearning4j.nn.conf.GradientNormalization;
import org.deeplearning4j.nn.conf.WorkspaceMode;
import org.deeplearning4j.nn.conf.inputs.InputType;
import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer;
import org.deeplearning4j.nn.graph.ComputationGraph;
import org.deeplearning4j.nn.layers.objdetect.DetectedObject;
import org.deeplearning4j.nn.transferlearning.FineTuneConfiguration;
import org.deeplearning4j.nn.transferlearning.TransferLearning;
import org.deeplearning4j.nn.weights.WeightInit;
import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
import org.deeplearning4j.util.ModelSerializer;
import org.deeplearning4j.zoo.model.TinyYOLO;
import org.nd4j.linalg.activations.Activation;
import org.nd4j.linalg.api.ndarray.INDArray;
import org.nd4j.linalg.dataset.DataSet;
import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler;
import org.nd4j.linalg.factory.Nd4j;
import org.nd4j.linalg.learning.config.RmsProp;
import org.opencv.core.Mat;
import org.opencv.core.Point;
import org.opencv.core.Scalar;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class YOLOTrainer {
private static final Logger log = LoggerFactory.getLogger(YOLOTrainer.class);
private static final int INPUT_WIDTH = 416;
private static final int INPUT_HEIGHT = 416;
private static final int CHANNELS = 3;
private static final int GRID_WIDTH = 13;
private static final int GRID_HEIGHT = 13;
private static final int CLASSES_NUMBER = 1;
private static final int BOXES_NUMBER = 5;
private static final double[][] PRIOR_BOXES = {{1.5, 1.5}, {2, 2}, {3, 3}, {3.5, 8}, {4, 9}};
private static final int BATCH_SIZE = 4;
private static final int EPOCHS = 50;
private static final double LEARNIGN_RATE = 0.0001;
private static final int SEED = 7854;
/*parent Dataset folder "DATA_DIR" contains two subfolder "images" and "annotations" */
private static final String DATA_DIR = "C:\\Java\\Dataset";
/* Yolo loss function prameters for more info
https://stats.stackexchange.com/questions/287486/yolo-loss-function-explanation*/
private static final double LAMDBA_COORD = 1.0;
private static final double LAMDBA_NO_OBJECT = 0.5;
public static void main(String[] args) throws IOException, InterruptedException {
Random rng = new Random(SEED);
//Initialize the user interface backend, it is just as tensorboard.
//it starts at http://localhost:9000
//UIServer uiServer = UIServer.getInstance();
//Configure where the network information (gradients, score vs. time etc) is to be stored. Here: store in memory.
//StatsStorage statsStorage = new InMemoryStatsStorage();
//Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized
//uiServer.attach(statsStorage);
File imageDir = new File(DATA_DIR, "images");
log.info("Load data...");
ParentPathLabelGenerator LABEL_GENERATOR_MAKER = new ParentPathLabelGenerator();
BalancedPathFilter PATH_FILTER = new BalancedPathFilter(rng, NativeImageLoader.ALLOWED_FORMATS, LABEL_GENERATOR_MAKER);
InputSplit[] data = new FileSplit(imageDir, NativeImageLoader.ALLOWED_FORMATS, rng).sample(PATH_FILTER, 85, 15);
InputSplit trainData = data[0];
InputSplit testData = data[1];
ObjectDetectionRecordReader recordReaderTrain = new ObjectDetectionRecordReader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS,
GRID_HEIGHT, GRID_WIDTH, new VocLabelProvider(DATA_DIR));
recordReaderTrain.initialize(trainData);
ObjectDetectionRecordReader recordReaderTest = new ObjectDetectionRecordReader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS,
GRID_HEIGHT, GRID_WIDTH, new VocLabelProvider(DATA_DIR));
recordReaderTest.initialize(testData);
RecordReaderDataSetIterator train = new RecordReaderDataSetIterator(recordReaderTrain, BATCH_SIZE, 1, 1, true);
train.setPreProcessor(new ImagePreProcessingScaler(0, 1));
RecordReaderDataSetIterator test = new RecordReaderDataSetIterator(recordReaderTest, BATCH_SIZE, 1, 1, true);
test.setPreProcessor(new ImagePreProcessingScaler(0, 1));
/*
ComputationGraph pretrained = (ComputationGraph) TinyYOLO.builder().build().initPretrained();
INDArray priors = Nd4j.create(PRIOR_BOXES);
FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder()
.seed(SEED)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
.gradientNormalizationThreshold(1.0)
.updater(new RmsProp(LEARNIGN_RATE))
.activation(Activation.IDENTITY).miniBatch(true)
.trainingWorkspaceMode(WorkspaceMode.ENABLED)
.build();
ComputationGraph model = new TransferLearning.GraphBuilder(pretrained)
.fineTuneConfiguration(fineTuneConf)
.setInputTypes(InputType.convolutional(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS))
.removeVertexKeepConnections("conv2d_9")
.removeVertexKeepConnections("outputs")
.addLayer("convolution2d_9",
new ConvolutionLayer.Builder(1, 1)
.nIn(1024)
.nOut(BOXES_NUMBER * (5 + CLASSES_NUMBER))
.stride(1, 1)
.convolutionMode(ConvolutionMode.Same)
.weightInit(WeightInit.UNIFORM)
.hasBias(false)
.activation(Activation.IDENTITY)
.build(), "leaky_re_lu_8")
.addLayer("outputs",
new Yolo2OutputLayer.Builder()
.lambdaNoObj(LAMDBA_NO_OBJECT)
.lambdaCoord(LAMDBA_COORD)
.boundingBoxPriors(priors)
.build(), "convolution2d_9")
.setOutputs("outputs")
.build();
log.info("\n Model Summary \n" + model.summary());
log.info("Train model...");
model.setListeners(new ScoreIterationListener(1));//print score after each iteration on stout
//model.setListeners(new StatsListener(statsStorage));// visit http://localhost:9000 to track the training process
for (int i = 0; i < EPOCHS; i++) {
train.reset();
while (train.hasNext()) {
model.fit(train.next());
}
log.info("*** Completed epoch {} ***", i);
}
log.info("*** Saving Model ***");
ModelSerializer.writeModel(model, "C:\\Java\\model.data", true);
log.info("*** Training Done ***");
URI[] loc = testData.locations();
for (int i = 0; i < loc.length; i++) {
URI uri = loc[i];
Mat image = Imgcodecs.imread(uri.getPath().substring(1));
List<DetectedObject> objs = detect(image, model);
boolean found = addRects(image, objs);
String name = String.format("NF_%s.jpg", i);
if(found) {
name = String.format("F_%s.jpg", i);
}
Imgcodecs.imwrite("C:\\Java\\test\\" + name, image);
}
}
public static List<DetectedObject> detect(Mat image, ComputationGraph model) throws IOException {
org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer yout = (org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer) model.getOutputLayer(0);
NativeImageLoader loader = new NativeImageLoader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS);
INDArray ds = loader.asMatrix(image);
ImagePreProcessingScaler scaler = new ImagePreProcessingScaler(0, 1);
scaler.transform(ds);
INDArray results = model.outputSingle(ds);
List<DetectedObject> objs = yout.getPredictedObjects(results, 0.4);
return objs;
}
public static boolean addRects(Mat image, List<DetectedObject> objs) {
boolean result = false;
Scalar color = new Scalar(0, 0, 255);
for (int i = 0; i < objs.size(); i++) {
DetectedObject obj = objs.get(i);
int imgW = image.width();
int imgH = image.height();
double[] xy1 = obj.getTopLeftXY();
double[] xy2 = obj.getBottomRightXY();
int x1 = (int) Math.round(imgW * xy1[0] / GRID_WIDTH);
int y1 = (int) Math.round(imgH * xy1[1] / GRID_HEIGHT);
int x2 = (int) Math.round(imgW * xy2[0] / GRID_WIDTH);
int y2 = (int) Math.round(imgH * xy2[1] / GRID_HEIGHT);
if(x1 == 0 && y1 == 0 && x2 == 0 && y2 == 0) {
continue;
}
result = true;
Imgproc.rectangle(image, new Point(x1, y1), new Point(x2, y2), color);
}
return result;
}
}
Dataset can be downloaded from here.
The problem is when i try to test the model, all the detected objects return NaN
Any hints on this topic would be very helpful.
Thanks.