After many searches on the internet I managed to make a network that looks a bit like the one in the image. Experimenting I found a new problem. This is the code that I have been testing. But after running it with the same dataIterator that I have used in other models with the same data, I get an error and I’m not sure how to solve it
import com.trusty.NewsIterator
import org.apache.commons.io.FilenameUtils
import org.deeplearning4j.core.storage.StatsStorage
import org.deeplearning4j.models.word2vec.Word2Vec
import org.deeplearning4j.nn.conf.ComputationGraphConfiguration
import org.deeplearning4j.nn.conf.ConvolutionMode
import org.deeplearning4j.nn.conf.NeuralNetConfiguration
import org.deeplearning4j.nn.conf.WorkspaceMode
import org.deeplearning4j.nn.conf.graph.MergeVertex
import org.deeplearning4j.nn.conf.layers.*
import org.deeplearning4j.nn.graph.ComputationGraph
import org.deeplearning4j.nn.weights.WeightInit
import org.deeplearning4j.optimize.api.InvocationType
import org.deeplearning4j.optimize.listeners.EvaluativeListener
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.ui.api.UIServer
import org.deeplearning4j.ui.model.stats.StatsListener
import org.deeplearning4j.ui.model.storage.InMemoryStatsStorage
import org.nd4j.evaluation.classification.Evaluation
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.learning.config.AdaGrad
import org.nd4j.linalg.lossfunctions.LossFunctions
import org.slf4j.LoggerFactory
import java.io.BufferedOutputStream
import java.io.File
import java.io.FileOutputStream
class FNDNet(
val wordVectors: Word2Vec,
val vectorSize: Int = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).size,
val batchSize: Int = 64,
val nEpochs: Int = 300,
val cnnLayerFeatureMaps: Int = 100, //Number of feature maps / channels / depth for each CNN layer
val iTrain: NewsIterator,
val iTest: NewsIterator,
val inputNeurons: Int = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).size,
val outputs: Int = iTrain.labels.size,
) {
private val log = LoggerFactory.getLogger(FNDNet::class.java)
private fun configuration(): ComputationGraphConfiguration {
return NeuralNetConfiguration.Builder()
.trainingWorkspaceMode(WorkspaceMode.ENABLED).inferenceWorkspaceMode(WorkspaceMode.ENABLED)
.weightInit(WeightInit.RELU)
.activation(Activation.LEAKYRELU)
.updater(AdaGrad(0.0018))
.convolutionMode(ConvolutionMode.Same) //This is important so we can 'stack' the results later
.l2(0.0001)
.graphBuilder()
.addInputs("input")
.addLayer(
"cnn3", Convolution1DLayer.Builder()
.kernelSize(3)
.stride(1)
.nIn(inputNeurons)
.nOut(cnnLayerFeatureMaps)
.build(), "input"
)
.addLayer(
"cnn4", Convolution1DLayer.Builder()
.kernelSize(4)
.stride(1)
.nIn(inputNeurons)
.nOut(cnnLayerFeatureMaps)
.build(), "input"
)
.addLayer(
"cnn5", Convolution1DLayer.Builder()
.kernelSize(5)
.stride(1)
.nIn(inputNeurons)
.nOut(cnnLayerFeatureMaps)
.build(), "input"
)
.addVertex("merge", MergeVertex(), "cnn3", "cnn4", "cnn5") //Perform depth concatenation
.addLayer(
"globalPool", GlobalPoolingLayer.Builder()
.poolingType(PoolingType.MAX)
.dropOut(0.5)
.build(), "merge"
).addLayer(
"dense",
DenseLayer.Builder()
.nIn(3 * cnnLayerFeatureMaps)
.nOut(128)
.activation(Activation.RELU)
.weightInit(WeightInit.XAVIER)
.dropOut(0.5)
.build(), "globalPool"
)
.addLayer(
"out", OutputLayer.Builder()
.lossFunction(LossFunctions.LossFunction.MSE)
.activation(Activation.SOFTMAX)
.nIn(128)
.nOut(outputs) //2 classes: positive or negative
.build(), "dense"
)
.setOutputs("out")
.build()
}
fun runModel(pathnameToSaveNetwork: String = "FakesModelFNDNet.net") {
// Obtain configuration
val configuration = configuration()
// Initialize NeuronalNetwork
val net: ComputationGraph = ComputationGraph(configuration)
//Initialize the user interface backend
val uiServer: UIServer = UIServer.getInstance()
//Configure where the network information (gradients, score vs. time etc) is to be stored. Here: store in memory.
val statsStorage: StatsStorage =
InMemoryStatsStorage() // FileStatsStorage(File("statsStorage")) //Alternative: FileStatsStorage(File), for saving and loading later
//Attach the StatsStorage instance to the UI: this allows the contents of the StatsStorage to be visualized
uiServer.attach(statsStorage)
//Then add the StatsListener to collect this information from the network, as it trains
net.setListeners(StatsListener(statsStorage))
net.init()
log.info("Number of parameters by layer:")
for (layer in net.layers) {
log.info("\t ${layer.conf().layer.layerName} \t ${layer.numParams()}")
}
log.info("Network summary : ${net.summary()}")
log.info("Starting training...")
net.setListeners(ScoreIterationListener(1), EvaluativeListener(iTest, 1, InvocationType.EPOCH_END))
net.fit(iTrain, nEpochs)
log.info("Evaluating...")
val eval: Evaluation = net.evaluate(iTest)
log.info(eval.stats())
net.save(File(pathnameToSaveNetwork), true)
BufferedOutputStream(FileOutputStream(File("${FilenameUtils.removeExtension(pathnameToSaveNetwork)}.stats"))).use { stream ->
stream.write(eval.stats().toByteArray())
stream.close()
}
log.info("----- Example complete -----")
}
}
The data iterator class
class NewsIterator private constructor(
private val dataDirectory: String,
wordVectors: WordVectors,
private val batchSize: Int,
truncateLength: Int,
tokenizerFactory: TokenizerFactory,
private var train: Boolean,
) : DataSetIterator {
private val wordVectors: WordVectors
private val vectorSize: Int = wordVectors.getWordVector(wordVectors.vocab().wordAtIndex(0)).size
private val truncateLength: Int
var maxLength = 0
private set
private var cursor = 0
private var totalNews = 0
private val tokenizerFactory: TokenizerFactory
private val labels: MutableList<String>
private var trainArticles: MutableList<Article> = ArrayList()
private var testArticles: MutableList<Article> = ArrayList()
override fun next(num: Int): DataSet {
if (cursor >= totalNews) throw NoSuchElementException()
return nextDataSet(num)
}
private fun nextDataSet(num: Int): DataSet {
val newsArticle: MutableList<Article> = ArrayList(num)
val veracity = IntArray(num)
var i = 0
while (i < num && cursor < this.totalNews) {
val article = if (train) trainArticles[cursor] else testArticles[cursor]
newsArticle.add(article)
veracity[i] = when (article.veracity) {
Veracity.MOSTLY_TRUE -> 0
Veracity.MOSTLY_FALSE -> 1
Veracity.MIXTURE_OF_TRUE_AND_FALSE -> 2
Veracity.NO_FACTUAL_CONTENT -> 3
else -> 0
}
i++
cursor++
}
//Second: tokenize news and filter out unknown words
val allTokens: MutableList<List<String>> = ArrayList<List<String>>(newsArticle.size)
maxLength = 0
for (article in newsArticle) {
val tokens = tokenizerFactory.create(article.mainText).tokens
val tokensFiltered: MutableList<String> = ArrayList()
for (t in tokens) {
if (wordVectors.hasWord(t)) tokensFiltered.add(t)
}
allTokens.add(tokensFiltered)
maxLength = maxLength.coerceAtLeast(tokensFiltered.size) // the max length
}
//If longest news exceeds 'truncateLength': only take the first 'truncateLength' words
if (maxLength > truncateLength) maxLength = truncateLength
//Create data for training
//Here: we have newsArticle.size() examples of varying lengths
val features: INDArray = Nd4j.create(newsArticle.size, vectorSize, maxLength)
//Three labels for veracity: mostly true, mixture of true and false, mostly false, no factual content
val labels: INDArray = Nd4j.create(newsArticle.size, 4, maxLength)
// //Because we are dealing with news of different lengths and only one output at the final time step: use padding arrays
// //Mask arrays contain 1 if data is present at that time step for that example, or 0 if data is just padding
val featuresMask = Nd4j.zeros(newsArticle.size, maxLength)
val labelsMask = Nd4j.zeros(newsArticle.size, maxLength)
val temp = IntArray(2)
for (i in newsArticle.indices) {
val tokens = allTokens[i]
temp[0] = i
//Get word vectors for each word in news, and put them in the training data
var j = 0
while (j < tokens.size && j < maxLength) {
val token = tokens[j]
val vector = wordVectors.getWordVectorMatrix(token)
features.put(
arrayOf(
NDArrayIndex.point(i.toLong()),
NDArrayIndex.all(),
NDArrayIndex.point(j.toLong())
), vector
)
temp[1] = j
featuresMask.putScalar(temp, 1.0)
j++
}
val idx: Int = veracity[i]
val lastIdx = tokens.size.coerceAtMost(maxLength)
labels.putScalar(intArrayOf(i, idx, lastIdx - 1), 1.0)
labelsMask.putScalar(intArrayOf(i, lastIdx - 1), 1.0)
}
return DataSet(features, labels, featuresMask, labelsMask)
}
/* This function is for load data from xmls files*/
private fun populateData() {
val articleFile = File("${dataDirectory}${File.separator}")
val articles: List<Article> = articleFile.walk()
.maxDepth(1) //The directory hierarchy to be traversed is 1, ie no need to check subdirectories
.filter { it.isFile } //Select only files, do not process folders
.filter { it.extension in listOf("xml") } //Select a file with the extension xml
.map { xmlFile ->
//give package name as argument
val context = JAXBContext.newInstance("com.trusty.document")
val unmarshaller = context.createUnmarshaller()
val xmlData = unmarshaller.unmarshal(xmlFile) as JAXBElement<*>
xmlData.value as Article
}.toList()
println("Start to split data in train and test")
val percentage = articles.size * 80 / 100
articles.groupBy { it.veracity }.forEach { group ->
val chunked = group.value.chunked(percentage)
val splitArticles = when (group.key) {
Veracity.MOSTLY_TRUE -> Pair(chunked.first(), chunked.last())
Veracity.MOSTLY_FALSE -> Pair(chunked.first(), chunked.last())
Veracity.MIXTURE_OF_TRUE_AND_FALSE -> Pair(chunked.first(), chunked.last())
Veracity.NO_FACTUAL_CONTENT -> Pair(chunked.first(), chunked.last())
else -> Pair(chunked.first(), chunked.last())
}
trainArticles.addAll(splitArticles.first)
testArticles.addAll(splitArticles.second)
}
totalNews = if (train) trainArticles.size else testArticles.size
}
override fun inputColumns(): Int {
return vectorSize
}
override fun totalOutcomes(): Int {
return 4 // number of labels for dataset
}
override fun reset() {
cursor = 0
}
override fun resetSupported(): Boolean {
return true
}
override fun asyncSupported(): Boolean {
return true
}
override fun batch(): Int {
return batchSize
}
override fun setPreProcessor(preProcessor: DataSetPreProcessor) {
throw UnsupportedOperationException()
}
override fun getLabels(): List<String> {
return labels
}
override fun hasNext(): Boolean {
return cursor < totalNews
}
override fun next(): DataSet {
return next(batchSize)
}
override fun remove() {}
override fun getPreProcessor(): DataSetPreProcessor {
throw UnsupportedOperationException("Not implemented")
}
class Builder internal constructor() {
private lateinit var dataDirectory: String
private lateinit var wordVectors: WordVectors
private var batchSize = 0
private var truncateLength = 0
private lateinit var tokenizerFactory: TokenizerFactory
private var train = false
fun dataDirectory(dataDirectory: String): Builder {
this.dataDirectory = dataDirectory
return this
}
fun wordVectors(wordVectors: WordVectors): Builder {
this.wordVectors = wordVectors
return this
}
fun batchSize(batchSize: Int): Builder {
this.batchSize = batchSize
return this
}
fun truncateLength(truncateLength: Int): Builder {
this.truncateLength = truncateLength
return this
}
fun train(train: Boolean): Builder {
this.train = train
return this
}
fun tokenizerFactory(tokenizerFactory: TokenizerFactory): Builder {
this.tokenizerFactory = tokenizerFactory
return this
}
fun build(): NewsIterator {
return NewsIterator(
dataDirectory,
wordVectors,
batchSize,
truncateLength,
tokenizerFactory,
train
)
}
override fun toString(): String {
return ("com.trusty.NewsIterator.Builder(dataDirectory= $dataDirectory , wordVectors= $wordVectors , batchSize= $batchSize , truncateLength= $truncateLength, train=$train )")
}
}
companion object {
fun Builder(): Builder {
return NewsIterator.Builder()
}
}
/**
* - initialize various class variables
* - calls populateData function to load news data in categoryData vector
* - also populates labels (i.e. category related information) in labels class variable
*/
init {
this.wordVectors = wordVectors
this.truncateLength = truncateLength
this.tokenizerFactory = tokenizerFactory
populateData()
labels = arrayListOf("mostly true", "mixture of true and false", "mostly false", "no factual content")
}
}
The summary of the network
==========================================================================================================
VertexName (VertexType) nIn,nOut TotalParams ParamsShape Vertex Inputs
==========================================================================================================
input (InputVertex) -,- - - -
cnn3 (Convolution1DLayer) 300,100 90,100 W:{100,300,3,1}, b:{1,100} [input]
cnn4 (Convolution1DLayer) 300,100 120,100 W:{100,300,4,1}, b:{1,100} [input]
cnn5 (Convolution1DLayer) 300,100 150,100 W:{100,300,5,1}, b:{1,100} [input]
merge (MergeVertex) -,- - - [cnn3, cnn4, cnn5]
globalPool (GlobalPoolingLayer) -,- 0 - [merge]
dense (DenseLayer) 300,128 38,528 W:{300,128}, b:{1,128} [globalPool]
out (OutputLayer) 128,4 516 W:{128,4}, b:{1,4} [dense]
----------------------------------------------------------------------------------------------------------
Total Parameters: 399,344
Trainable Parameters: 399,344
Frozen Parameters: 0
==========================================================================================================
The error that throws me is the following:
Exception in thread "main" java.lang.IllegalStateException: Invalid mask array: per-example masking should be a column vector, per output masking arrays should be the same shape as the output/labels arrays. Mask shape: [50, 300], output shape: [50, 4](layer name: out, layer index: 7, layer type: OutputLayer)
at org.deeplearning4j.nn.layers.BaseOutputLayer.applyMask(BaseOutputLayer.java:342)
at org.deeplearning4j.nn.layers.BaseLayer.preOutputWithPreNorm(BaseLayer.java:331)
at org.deeplearning4j.nn.layers.BaseLayer.preOutput(BaseLayer.java:291)
at org.deeplearning4j.nn.layers.BaseOutputLayer.preOutput2d(BaseOutputLayer.java:328)
at org.deeplearning4j.nn.layers.BaseOutputLayer.backpropGradient(BaseOutputLayer.java:147)
at org.deeplearning4j.nn.graph.vertex.impl.LayerVertex.doBackward(LayerVertex.java:149)
at org.deeplearning4j.nn.graph.ComputationGraph.calcBackpropGradients(ComputationGraph.java:2713)
at org.deeplearning4j.nn.graph.ComputationGraph.computeGradientAndScore(ComputationGraph.java:1382)
at org.deeplearning4j.nn.graph.ComputationGraph.computeGradientAndScore(ComputationGraph.java:1342)
at org.deeplearning4j.optimize.solvers.BaseOptimizer.gradientAndScore(BaseOptimizer.java:170)
at org.deeplearning4j.optimize.solvers.StochasticGradientDescent.optimize(StochasticGradientDescent.java:63)
at org.deeplearning4j.optimize.Solver.optimize(Solver.java:52)
at org.deeplearning4j.nn.graph.ComputationGraph.fitHelper(ComputationGraph.java:1166)
at org.deeplearning4j.nn.graph.ComputationGraph.fit(ComputationGraph.java:1116)
at org.deeplearning4j.nn.graph.ComputationGraph.fit(ComputationGraph.java:1083)
at org.deeplearning4j.nn.graph.ComputationGraph.fit(ComputationGraph.java:1019)
at org.deeplearning4j.nn.graph.ComputationGraph.fit(ComputationGraph.java:1007)
at com.trusty.models.FNDNet.runModel(FNDNet.kt:128)
at com.trusty.models.FNDNet.runModel$default(FNDNet.kt:104)
at com.trusty.TrainNews.main(TrainNews.kt:78)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.intellij.rt.execution.application.AppMainV2.main(AppMainV2.java:128)
What can I do to solve my problem. The code is written in kotlin. I hope you can help me.