exploring the enron email dataset with kiji and hive

●○○○○

●●●

CREATE EXTERNAL TABLE emails ( mid STRUCT<ts: TIMESTAMP, value: STRING>, dateLong STRUCT<ts: TIMESTAMP, value: BIGINT>, fromStr STRUCT<ts: TIMESTAMP, value: STRING>, toStr STRUCT<ts: TIMESTAMP, value: STRING>, subject STRUCT<ts: TIMESTAMP, value: STRING>, body STRUCT<ts: TIMESTAMP, value: STRING>,) STORED BY 'org.kiji.hive.KijiTableStorageHandler'WITH SERDEPROPERTIES ( 'kiji.columns' = ‘info:mid[0],info:date[0],info:from[0],info:to[0],’ + ‘info:subject[0],info:body[0]’) TBLPROPERTIES ( 'kiji.table.uri' = ' kiji://.env/enron_email/emails ');

SELECT

fromStr.value AS fromStr,

count(1) AS count

FROM emails

GROUP BY fromStr.value

ORDER BY count DESC

LIMIT 10;

SELECT fromStr.value AS fromStr, trim(splitToStr) AS toStr, count(1) AS countFROM emails LATERAL VIEW explode(split(toStr.value,',')) tos AS splitToStrGROUP BY fromStr.value,trim(splitToStr)ORDER BY count DESCLIMIT 10;

●●

●○

User Emails

Emails Table Sentiment

Producer

SELECT ((year(datelong.ts)-1999)*52+weekofyear(datelong.ts)) AS weeknum, avg(sentiment.value) AS avgsentiment, stddev(sentiment.value) AS stddevsentiment, count(1) AS nummessagesFROM emailsWHERE regexp_replace(fromStr.value,".*@","")=="enron.com" GROUP BY ((year(datelong.ts)-1999)*52+weekofyear(datelong.ts));

SELECT lword AS word, sum(sentiment) AS totalsentimentFROM ( SELECT mid.value AS mid, lower(word) AS lword, sentiment.value AS sentiment FROM emails LATERAL VIEW explode(sentences(body.value)[0]) wds AS word WHERE regexp_replace(fromStr.value,".*@","")=="enron.com") subqueryGROUP BY lwordORDER BY totalsentiment ASC;

●●

exploring the enron email dataset with kiji and hive

Technology

enron enron

enron final2

hive hive hive hive

introduction to enron communications & epowered tm media...

enron presentation

paper enron

enron arreglado

enron -...

hive 101: hive query language

kiji russie

enron scandal

the collapse de-enron; collapse of enron

hive honeyscribe hive - princesshay

enron, social network analysis, dynamic social networks...

enron fall

-hive- hive insulation valuation experiment

kiji la perle de la carélie la traversée du lac onega se...

eglises - kiji

kiji cassandra la june 2014 - v02 clint-kelly

enron scam