Recurrent Neural Networks

Overview

Recurrent Neural Networks (RNNs) are a class of neural networks designed to handle sequential data. Unlike feed‑forward networks, RNNs maintain an internal state that captures information about previous inputs, making them ideal for tasks such as language modeling, time‑series prediction, and music generation.

Theory

An RNN processes a sequence x₁, x₂, …, x_T by iterating through a loop:

hₜ = f(W_hh·h₍ₜ₋₁₎ + W_xh·xₜ + b_h)
yₜ = g(W_hy·hₜ + b_y)

where hₜ is the hidden state, f is usually a non‑linear activation (tanh or ReLU), and g produces the output.

Common variants:

Implementation with TensorFlow.js

The example below builds a simple character‑level language model using an LSTM layer.

// HTML element IDs: txtInput, btnGenerate, outResult
async function trainModel(text) {
    const chars = [...new Set(text)];
    const char2idx = Object.fromEntries(chars.map((c,i)=>[c,i]));
    const idx2char = chars;
    const seqLen = 100;
    const step = 1;

    const inputs = [];
    const targets = [];
    for(let i=0;i<text.length-seqLen; i+=step){
        const inputSeq = text.slice(i,i+seqLen).split('').map(c=>char2idx[c]);
        const targetChar = char2idx[text[i+seqLen]];
        inputs.push(inputSeq);
        targets.push(targetChar);
    }

    const xs = tf.tensor2d(inputs, [inputs.length, seqLen], 'int32');
    const ys = tf.oneHot(tf.tensor1d(targets,'int32'), chars.length);

    const model = tf.sequential();
    model.add(tf.layers.embedding({inputDim: chars.length, outputDim: 64, inputLength: seqLen}));
    model.add(tf.layers.lstm({units:128}));
    model.add(tf.layers.dense({units: chars.length, activation:'softmax'}));
    model.compile({optimizer:'adam', loss:'categoricalCrossentropy'});

    await model.fit(xs, ys, {
        epochs:20,
        batchSize:64,
        callbacks: tf.callbacks.earlyStopping({monitor:'loss', patience:3})
    });
    return {model, char2idx, idx2char};
}

function sample(model, char2idx, idx2char, seed, length=200){
    let input = seed.split('').map(c=>char2idx[c]||0);
    let result = seed;
    for(let i=0;i<length;i++){
        const tensor = tf.tensor2d([input.slice(-100)], [1,100], 'int32');
        const preds = model.predict(tensor).arraySync()[0];
        const nextIdx = tf.multinomial(tf.tensor1d(preds),1).dataSync()[0];
        const nextChar = idx2char[nextIdx];
        result += nextChar;
        input.push(nextIdx);
    }
    return result;
}

// UI handlers
let trained = null;
document.getElementById('btnGenerate').addEventListener('click', async ()=>{
    const txt = document.getElementById('txtInput').value;
    if(!trained){
        document.getElementById('outResult').textContent = 'Training...';
        trained = await trainModel(txt);
    }
    const seed = txt.slice(-100);
    const generated = sample(trained.model, trained.char2idx, trained.idx2char, seed);
    document.getElementById('outResult').textContent = generated;
});

Interactive Demo


Further Reading