Nonsense Text

You can make funny nonsense text by counting words. For every word in a text, you count how often it follows the previous word. Then you generate text by choosing a next word according to how often you counted it!

This is called a markov chain. It’s not really useful until you start tracking words further back. If you keep track of not just the previous word but two or more words back, the text you can generate is a little less nonsense and a little more sense. But you need a lot of text to get reliable counts of word sequences. That’s why many people smooth those counts.

But that’s a whole different subject. For now, press the button at the bottom of the page!

from IPython.display import Javascript
import json, requests, re
url = 'http://www.gutenberg.org/cache/epub/1524/pg1524.txt'
text = filter(None, re.split(r'\s+|([\W])', requests.get(url).content[12891:]))
Javascript('document.text='+json.dumps(text))

document.make_counts = function(text) {
    var previous_word = null;
    var counts = {null: {null: 0}};
    for (var i=0; i<text.length; i++) {
        counts[previous_word] = counts[previous_word] || {null: 0};
        counts[previous_word][null]++;
        counts[previous_word][text[i]] = counts[previous_word][text[i]]+1 || 1;
        counts[null][text[i]] = counts[null][text[i]]+1 | 1;
        counts[null][null]++;
        previous_word = text[i];
    }
    return counts;
}
document.counts = document.make_counts(document.text);

function weighted_choice(choices) {
    var total = choices['null'];
    var r = Math.random() * total;
    var upto = 0;
    for (var w in choices) {
        if (w!=='null') {
            if (upto + choices[w] > r) {
                return w;
            }
            upto += choices[w];
        }
    }
}

document.sample = function() {
    var previous_word = 'null';
    var text = '';
    for (var i=0; i<100; i++) {
        var word = weighted_choice(document.counts[previous_word]);
        text += word + ' ';
        previous_word = word;
    }
    return text;
}