Tiny YOLO PredictedObjects NaN

Hi guys,

Im trying to run the code from this page: How to build a custom object detector using Yolo, It is an object detector for a rubix cube.

Below the code, i did a little changes:

    package com.dl4j.yolo.sample;   
 
    import java.io.File;
    import java.io.IOException;
    import java.io.Serializable;
    import java.net.URI;
    import java.util.List;
    import java.util.Random;
    
    import org.bytedeco.opencv.opencv_java;
    import org.datavec.api.io.filters.BalancedPathFilter;
    import org.datavec.api.io.labels.ParentPathLabelGenerator;
    import org.datavec.api.records.metadata.RecordMetaDataImageURI;
    import org.datavec.api.split.FileSplit;
    import org.datavec.api.split.InputSplit;
    import org.datavec.image.loader.NativeImageLoader;
    import org.datavec.image.recordreader.objdetect.ObjectDetectionRecordReader;
    import org.datavec.image.recordreader.objdetect.impl.VocLabelProvider;
    import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator;
    import org.deeplearning4j.nn.api.OptimizationAlgorithm;
    import org.deeplearning4j.nn.conf.ConvolutionMode;
    import org.deeplearning4j.nn.conf.GradientNormalization;
    import org.deeplearning4j.nn.conf.WorkspaceMode;
    import org.deeplearning4j.nn.conf.inputs.InputType;
    import org.deeplearning4j.nn.conf.layers.ConvolutionLayer;
    import org.deeplearning4j.nn.conf.layers.objdetect.Yolo2OutputLayer;
    import org.deeplearning4j.nn.graph.ComputationGraph;
    import org.deeplearning4j.nn.layers.objdetect.DetectedObject;
    import org.deeplearning4j.nn.transferlearning.FineTuneConfiguration;
    import org.deeplearning4j.nn.transferlearning.TransferLearning;
    import org.deeplearning4j.nn.weights.WeightInit;
    import org.deeplearning4j.optimize.listeners.ScoreIterationListener;
    import org.deeplearning4j.util.ModelSerializer;
    import org.deeplearning4j.zoo.model.TinyYOLO;
    import org.nd4j.linalg.activations.Activation;
    import org.nd4j.linalg.api.ndarray.INDArray;
    import org.nd4j.linalg.dataset.DataSet;
    import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler;
    import org.nd4j.linalg.factory.Nd4j;
    import org.nd4j.linalg.learning.config.RmsProp;
    import org.opencv.core.Mat;
    import org.opencv.core.Point;
    import org.opencv.core.Scalar;
    import org.opencv.imgcodecs.Imgcodecs;
    import org.opencv.imgproc.Imgproc;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    public class YOLOTrainer {
    	 private static final Logger log = LoggerFactory.getLogger(YOLOTrainer.class);
    
    	    private static final int INPUT_WIDTH = 416;
    	    private static final int INPUT_HEIGHT = 416;
    	    private static final int CHANNELS = 3;
    
    	    private static final int GRID_WIDTH = 13;
    	    private static final int GRID_HEIGHT = 13;
    	    private static final int CLASSES_NUMBER = 1;
    	    private static final int BOXES_NUMBER = 5;
    	    private static final double[][] PRIOR_BOXES = {{1.5, 1.5}, {2, 2}, {3, 3}, {3.5, 8}, {4, 9}};
    
    	    private static final int BATCH_SIZE = 4;
    	    private static final int EPOCHS = 50;
    	    private static final double LEARNIGN_RATE = 0.0001;
    	    private static final int SEED = 7854;
    
    	    /*parent Dataset folder "DATA_DIR" contains two subfolder "images" and "annotations" */
    	    private static final String DATA_DIR = "C:\\Java\\Dataset";
    
    	    /* Yolo loss function prameters for more info
    	    https://stats.stackexchange.com/questions/287486/yolo-loss-function-explanation*/
    	    private static final double LAMDBA_COORD = 1.0;
    	    private static final double LAMDBA_NO_OBJECT = 0.5;
    
    	    public static void main(String[] args) throws IOException, InterruptedException {
    
    	        Random rng = new Random(SEED);
    
    	        //Initialize the user interface backend, it is just as tensorboard.
    	        //it starts at http://localhost:9000
    	        //UIServer uiServer = UIServer.getInstance();
    
    	        //Configure where the network information (gradients, score vs. time etc) is to be stored. Here: store in memory.
    	        //StatsStorage statsStorage = new InMemoryStatsStorage();
    
    	        //Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized
    	        //uiServer.attach(statsStorage);
    
    	        File imageDir = new File(DATA_DIR, "images");
    
    	        log.info("Load data...");
    	        
    	        ParentPathLabelGenerator LABEL_GENERATOR_MAKER = new ParentPathLabelGenerator();
    	        BalancedPathFilter PATH_FILTER = new BalancedPathFilter(rng, NativeImageLoader.ALLOWED_FORMATS, LABEL_GENERATOR_MAKER);
    
    	        InputSplit[] data = new FileSplit(imageDir, NativeImageLoader.ALLOWED_FORMATS, rng).sample(PATH_FILTER, 85, 15);
    	        InputSplit trainData = data[0];
    	        InputSplit testData = data[1];
    
    	        ObjectDetectionRecordReader recordReaderTrain = new ObjectDetectionRecordReader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS,
    	                GRID_HEIGHT, GRID_WIDTH, new VocLabelProvider(DATA_DIR));
    	        recordReaderTrain.initialize(trainData);
    
    	        ObjectDetectionRecordReader recordReaderTest = new ObjectDetectionRecordReader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS,
    	                GRID_HEIGHT, GRID_WIDTH, new VocLabelProvider(DATA_DIR));
    	        recordReaderTest.initialize(testData);
    
    	        RecordReaderDataSetIterator train = new RecordReaderDataSetIterator(recordReaderTrain, BATCH_SIZE, 1, 1, true);
    	        train.setPreProcessor(new ImagePreProcessingScaler(0, 1));
    
    	        RecordReaderDataSetIterator test = new RecordReaderDataSetIterator(recordReaderTest, BATCH_SIZE, 1, 1, true);
    	        test.setPreProcessor(new ImagePreProcessingScaler(0, 1));
    
    	        /*
    	        ComputationGraph pretrained = (ComputationGraph) TinyYOLO.builder().build().initPretrained();
    
    	        INDArray priors = Nd4j.create(PRIOR_BOXES);
    	        FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder()
    	                .seed(SEED)
    	                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
    	                .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer)
    	                .gradientNormalizationThreshold(1.0)
    	                .updater(new RmsProp(LEARNIGN_RATE))
    	                .activation(Activation.IDENTITY).miniBatch(true)
    	                .trainingWorkspaceMode(WorkspaceMode.ENABLED)
    	                .build();
    
    	        ComputationGraph model = new TransferLearning.GraphBuilder(pretrained)
    	                .fineTuneConfiguration(fineTuneConf)
    	                .setInputTypes(InputType.convolutional(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS))
    	                .removeVertexKeepConnections("conv2d_9")
    	                .removeVertexKeepConnections("outputs")
    	                .addLayer("convolution2d_9",
    	                        new ConvolutionLayer.Builder(1, 1)
    	                                .nIn(1024)
    	                                .nOut(BOXES_NUMBER * (5 + CLASSES_NUMBER))
    	                                .stride(1, 1)
    	                                .convolutionMode(ConvolutionMode.Same)
    	                                .weightInit(WeightInit.UNIFORM)
    	                                .hasBias(false)
    	                                .activation(Activation.IDENTITY)
    	                                .build(), "leaky_re_lu_8")
    	                .addLayer("outputs",
    	                        new Yolo2OutputLayer.Builder()
    	                                .lambdaNoObj(LAMDBA_NO_OBJECT)
    	                                .lambdaCoord(LAMDBA_COORD)
    	                                .boundingBoxPriors(priors)
    	                                .build(), "convolution2d_9")
    	                .setOutputs("outputs")
    	                .build();
    
    	        log.info("\n Model Summary \n" + model.summary());
    
    	        log.info("Train model...");
    	        model.setListeners(new ScoreIterationListener(1));//print score after each iteration on stout 
    	        //model.setListeners(new StatsListener(statsStorage));// visit http://localhost:9000 to track the training process
    	        for (int i = 0; i < EPOCHS; i++) {
    	            train.reset();
    	            while (train.hasNext()) {
    	                model.fit(train.next());
    	            }
    	            log.info("*** Completed epoch {} ***", i);
    	        }
    
    	        log.info("*** Saving Model ***");
    	        ModelSerializer.writeModel(model, "C:\\Java\\model.data", true);
    	        log.info("*** Training Done ***");
    	           	        
    	        
    	        URI[] loc = testData.locations();
    	        for (int i = 0; i < loc.length; i++) {
    				URI uri = loc[i];
    				Mat image = Imgcodecs.imread(uri.getPath().substring(1));
    				
    				List<DetectedObject> objs = detect(image, model);
    	        	boolean found = addRects(image, objs);
    	        	String name = String.format("NF_%s.jpg", i);
    	        	
    	        	if(found) {
    	        		name = String.format("F_%s.jpg", i);
    	        	}
    	        	
    	        	Imgcodecs.imwrite("C:\\Java\\test\\" + name, image);
    			}	       
    	    }
    	    
    	    public static List<DetectedObject> detect(Mat image, ComputationGraph model) throws IOException {
    	    	org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer yout = (org.deeplearning4j.nn.layers.objdetect.Yolo2OutputLayer) model.getOutputLayer(0);
    	    	 
    	    	 NativeImageLoader loader = new NativeImageLoader(INPUT_HEIGHT, INPUT_WIDTH, CHANNELS);
    	         INDArray ds = loader.asMatrix(image);        
    	         ImagePreProcessingScaler scaler = new ImagePreProcessingScaler(0, 1);
    	         scaler.transform(ds);
    	         
    	         INDArray results = model.outputSingle(ds);
    	         List<DetectedObject> objs = yout.getPredictedObjects(results, 0.4);	         
    	         
    	         return objs;
    	    }
    	    
    	    public static boolean addRects(Mat image, List<DetectedObject> objs) {
    	    	boolean result = false;
    	    	Scalar color = new Scalar(0, 0, 255);
    	    	for (int i = 0; i < objs.size(); i++) {
    				DetectedObject obj = objs.get(i);
    				
    				int imgW = image.width();
    				int imgH = image.height();
    				
    				double[] xy1 = obj.getTopLeftXY();
    				double[] xy2 = obj.getBottomRightXY();
    				
    				int x1 = (int) Math.round(imgW * xy1[0] / GRID_WIDTH);
    				int y1 = (int) Math.round(imgH * xy1[1] / GRID_HEIGHT);
    				int x2 = (int) Math.round(imgW * xy2[0] / GRID_WIDTH);
    				int y2 = (int) Math.round(imgH * xy2[1] / GRID_HEIGHT);
    				
    				if(x1 == 0 && y1 == 0 && x2 == 0 && y2 == 0) {
    					continue;
    				}
    				
    				result = true;
    				Imgproc.rectangle(image, new Point(x1, y1), new Point(x2, y2), color);			
    			}
    	    	
    	    	return result;
    	    }
    }

Dataset can be downloaded from here.

The problem is when i try to test the model, all the detected objects return NaN
Capture_NaN

Any hints on this topic would be very helpful.
Thanks.

@lquintero07 I have no idea but this is a cool project. I tried the yolo basic but I think Ill try this soon. Let me know of you get it to work.

Can you add this:

     Nd4j.getExecutioner().setProfilingConfig(ProfilerConfig.builder()
                .checkForINF(true)
                .checkElapsedTime(true)
                .checkLocality(true)
                .checkWorkspaces(true)
                .build());

NANs are generally an indicator of a bad dataset or tuning. I’d be curious when it NANs.

Hey hi,

Thanks for your response.

I deleted the previous model, so i trained it again with the lines you said at the start of main method.

This time i dont get NaN values but results dont seems good

F_3

F_5

When i was testing i see NaN from this line: INDArray results = model.outputSingle(ds);