home | news | discuss | issues | license LURE: sym |
|
---|---|
Incrementally watch a stream of symbolstim@menzies.us Use this code to incrementally monitor the frequency counts and entropy of of a stream of numbers. Simple usage:
For those who like less typing:
|
local the=require "config"
|
CreationCreate a new watcher. |
local function create()
return {n=0, nk=0,counts={}, most=0,mode=nil,_ent=nil } end
|
UpdateUpdate a watcher |
local function update(i,x)
if x ~= the.ignore then
i._ent = nil
i.n = i.n + 1
if not i.counts[x] then
i.nk = i.nk + 1
i.counts[x] = 0 end
local seen = i.counts[x] + 1
i.counts[x] = seen
if seen > i.most then
i.most, i.mode = seen,x end end end
local function about(i)
local syms=0
for _,__ in pairs(i.counts) do syms=syms+1 end
return {{pos=i.pos}, {txt=i.txt}, {n=i.n},
{mode=i.mode},{most=i.most},{syms=syms}} end
|
Handy short cout |
local function watch()
local i = create()
return i, function (x) return update(i,x) end end
|
UpdatesUpdate a watcher
|
local function updates(lst,f,i)
i = i or create()
f = f or function (z) return z end
for _,one in pairs(lst) do
update(i, f(one)) end
return i end
|
DistanceUsing a watcher Returns two numbers |
local function distance(i,j,k)
local no = the.ignore
if j==no and k==no then return 0,0
elseif j == the.ignore then return 1,1
elseif k == the.ignore then return 1,1
elseif j==k then return 0,1
else return 1,1
end
end
|
Normalization |
local function norm(i,x)
return x end
local function like(i,x,prior)
return ((i.counts[x] or 0) + the.nb.m*prior) /
(i.n + the.nb.m) end
|
Discretizing a symnolJust returns that symbol.
If this |
local function discretize(i,x)
local r
if x==the.ignore then return x end
if not i.bins then return x end
for _,b in pairs(i.bins) do
r = b.label
if x<=b.most then break end end
return r end
|
Entropy |
|
Takes a little time to calculate. So we cache the
result (and note that |
local function ent(i)
if i._ent == nil then
local e = 0
for _,f in pairs(i.counts) do
e = e - (f/i.n) * math.log((f/i.n), 2)
end
i._ent = e
end
return i._ent end
|
KENeeded for Fayyad-Irrani. Is this even used? |
local function ke(i)
local e,k = 0,0
for _,f in pairs(i.counts) do
e = e + (f/i.n) * math.log((f/i.n), 2)
k = k + 1
end
e = -1*e
return k,e,k*e end
|
External Interface |
return {create=create, update=update, updates=updates,
distance=distance,about=about,like=like,
ent=ent, spread=ent, discretize=discretize,
ke=ke,watch=watch}
|
LegalLURE, Copyright (c) 2017, Tim Menzies All rights reserved, BSD 3-Clause License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|