To Infinity & Beyond!
Protocols & Lazy Sequences in Node
Part Deux – Sh*t Just Got Real
Bahul Neel Upadhyaya (@bahulneel)
BraveNewTalent
http://github.com/bahulneel
http://www.bravenewtalent.com
cosy.lang
Library● Protocols● Sequences Lazy & Async● Argument length dispatch● Tail recursion● Object Identity● Object Metadata
Protocols● ISeq● ISync● IStream● IPromise
npm install cosy-lang
An Example (TF-IDF)
“Tf–idf, term frequency–inverse document frequency,is a numerical statistic which reflects how important a word is to a document in a collection or corpus. It is often used as a weighting factor in information retrieval and text mining.”
- Wikipedia
Words
function stripWord(word) {
return word.replace(/[^-a-zA-Z_0-9]+/, '').toLowerCase();
}
function isWord(word) {
return /^[-a-zA-Z_0-9]+$/.exec(word)
}
function words(string) {
return vec(filter(isWord, map(stripWord, string.split(/ /))));
}
Term Frequencies function tf(words) {
var max = 0, counts, word;
function countFeq(counts, word) {
var newCounts = clone(counts);
if ('undefined' === typeof newCounts[word]) newCounts[word] = 0;
newCounts[word] += 1;
if (newCounts[word] > max) max = newCounts[word];
return newCounts;
}
counts = reduce(countFeq, {}, words);
if (max) {
for (word in counts) {
if (counts.hasOwnProperty(word)) counts[word] /= max;
}
}
return counts;
}
Inverse Document Frequency idf = fn$({
1: function (terms) {
return idf({}, 1, terms);
},
3: function (freq, docCount, terms) {
if (null === first(terms)) return null;
function calcIdf(terms) {
var docFreq, invDocFreq = {}, word;
docFreq = merge(freq, first(terms));
for (word in docFreq) {
if (docFreq.hasOwnProperty(word)) invDocFreq[word] = docCount/(1+docFreq[word]);
}
return cons(invDocFreq, idf(docFreq, docCount + 1, rest(terms)));
}
return lazy(terms, calcIdf);
}
});
TF-IDF function tfIdf(documents) {
var theWords, terms, freq;
terms = map(tf, map(words, documents));
freq = idf(terms);
function calcTfIdf(tf, idf) {
var word, tfIdf = {};
for (word in tf) {
if (tf.hasOwnProperty(word)) tfIdf[word] = tf[word] * idf[word];
}
return tfIdf;
}
return map(calcTfIdf, terms, freq);
}
Making Sequences Asyncronous
Source● Takes an ISeq & ISync as
it's argument● Extends IStream● Registers a tick callback
using the ISync interface● Emits first element when
callback is called
Sink● Takes an IStream as it's
argument● Extends ISeq & ISync● First returns stream.skip
until stream emits● Calls tick callback when
stream emits
Socket IO - Serverlang.protocol.extend(lang.stream.IStream, socketServer.Socket,
["tap", function (socket, fn) {
socket.on("message", function (data) {
fn(JSON.parse(data));
});
}],
["emit", function (socket, val) {
socket.send(JSON.stringify(val));
}]
);
function server(port, callback) {
var io = socketServer.listen(port);
io.sockets.on('connection', callback);
}
Socker IO - Server
(function (lang, tfIdf, server) {
server(1234, function (socket) {
lang.stream.pipe(tfIdf(socket), socket);
});
})(require('cosy-lang'),
require('./lib/tf-idf'),
require('./lib/socket-server').server);
SocketIO - Clientlang.protocol.extend(lang.stream.IStream, socketClient.SocketNamespace,
["tap", function (socket, fn) {
socket.on("message", function (data) {
fn(JSON.parse(data));
});
}],
["emit", function (socket, val) {
socket.send(JSON.stringify(val));
}]
);
function client(addr, callback) {
var io = socketClient.connect(addr);
io.on('connect', function () {
callback(io);
});
}
Socket IO - Client
client("http://localhost:1234", function (socket) {
lang.stream.tap(socket, function (val) {
console.log('td-idf', val);
});
lang.stream.pipe(documents, socket);
});
Demo
#!
Future work
● Queues● Persistent Data Structures● Performance● Graphs● Persistence
Links
● Cosy
getcosy.org
github.com/organizations/getcosy
● Demo
github.com/bahulneel/cosy-lang-demo
● Me
@bahulneel
github.com/bahulneel
Fin
Questions