home | news | discuss | issues | license LURE: tbl |
|
---|---|
Handling Tables of Datatim@menzies.us One of my core data structures is
Each column in
Note that there is nothing hard-wired in this code about What
Within
Note that a column can have multiple categories (see
Note that each column gets one, and only one |
local the=require "config"
local NUM=require "num"
local SYM=require "sym"
local ROW=require "row"
local CSV=require "csv"
local LST=require "lists"
local SUPER=require "superrange"
|
Create a new table |
local function create() return {
rows={},
spec={},
goals={} , less={}, more={},
name={},
all={nums={}, syms={}, cols={}}, -- all columns
x ={nums={}, syms={}, cols={}}, -- all independent columns
y ={nums={}, syms={}, cols={}} -- all depednent columns
} end
|
Define column categories.Input the column header |
local function categories(i,txt)
local spec = {
{when= "%$", what= NUM, weight= 1, where= {i.all.cols, i.x.cols, i.all.nums, i.x.nums}},
{when= "<", what= NUM, weight=-1, where= {i.all.cols, i.y.cols, i.all.nums, i.goals, i.less, i.y.nums}},
{when= ">", what= NUM, weight= 1, where= {i.all.cols, i.y.cols, i.all.nums, i.goals, i.more, i.y.nums}},
{when= "!", what= SYM, weight= 1, where= {i.all.cols, i.y.syms, i.y.cols, i.all.syms}},
{when= "", what= SYM, weight= 1, where= {i.all.cols, i.x.cols, i.all.syms, i.x.syms}}}
for _,want in pairs(spec) do
if string.find(txt,want.when) ~= nil then
return want.what, want.weight, want.where end end end
|
Define table column headersCalled on the first row ever passed to a table. |
local function header(i,cells)
i.spec = cells
for col,cell in ipairs(cells) do
local what, weight, wheres = categories(i,cell)
local one = what.create()
one.pos = col
one.txt = cell
one.what = what
one.weight= weight
i.name[one.txt] = one
for _,where in pairs(wheres) do
where[ #where + 1 ] = one end end
return i end
|
Define a row of dataCalled on every row after row one.
Note that such rows are stored in the |
local function data(i,cells,old)
local new = ROW.update(ROW.create(),cells,i)
i.rows[#i.rows+1] = new
if old then
new.id=old.id end
return new end
|
Update a table with a row of cellsCalls |
local function update(i,cells)
local fn= #i.spec==0 and header or data
return fn(i,cells) end
|
Copy a tableIf If If Note that when rows are copied as deep copies; i.e. the contents move but not the container (so updates to the old table will not effect the new table). |
local function copy(i, from)
local j=create()
header(j, LST.copy(i.spec))
if from=="full" then
for _,r in pairs(i.rows) do
data(j, LST.copy(r.cells)) end
elseif type(from)=='number' then
LST.shuffle(i.rows)
for k=1,from do
data(j, LST.copy(i.rows[k].cells)) end
elseif type(from)=='table' then
for _,r in pairs(from) do
data(j, LST.copy(r.cells),r) end end
return j
end
local function show(i)
local function worker(t)
if #t > 0 then
print("")
local rows={{}}
for _,head in pairs(t) do
local about = head.what.about(head)
local row,cols={},{}
for i=1,#about do
for col,val in pairs(about[i]) do
cols[#cols+1] = col
row[#row+1] = val end end
rows[1] = cols
rows[#rows+1] = row
end
LST.mprint(rows," | ") end end
worker(i.all.syms)
worker(i.all.nums) end
|
near,far |
local function nearfar(i,row1, other, best, better)
out = row1
for _,row2 in pairs(other or i.rows) do
if row1.id ~= row2.id then
d = ROW.distance(row1,row2,i)
if better(d,best) then
best,out = d, row2 end end end
return out end
local function furthest(i,row1, other)
return nearfar(i, row1, other,
-1, function (x,y) return x > y end) end
local function nearest(i,row1, other)
return nearfar(i, row1, other,
10^32, function (x,y) return x < y end) end
local function knn(i,row1,k,other)
k= k or the.tbl.k
tmp,out = {},{}
for _,row2 in pairs(other or i.rows) do
tmp[#tmp + 1] = {d= ROW.distance(row1,row2), row=row2} end
table.sort(tmp, function (x,y) return x.d < y.d end)
for j=1,k do out[j] = tmp[j] end
return out end
|
suspect this is broken |
local function discretizeCells(i,cells)
out=LST.copy(cells)
for _,head in pairs(i.x.cols) do
out[head.pos] = SYM.discretize(head,cells[head.pos]) end
return out
end
|
Goal functionsFeature extractors from a ROW. |
local function klass(t,cells)
return cells[t.y.syms[1].pos] end
local function klassfn(t)
return function (r) return klass(t,r.cells) end end
local function goallast(t,n)
return function(r)
return LST.last(r.cells ) end end
local function goaln(t,n)
return function(r)
return r.cells [ t.y.nums[n].pos ] end end
local function goal1(t)
return goaln(t,1) end
local function dom(t)
local b4={}
return function (r)
if not b4[r.id] then
b4[r.id]=ROW.dominate(r,t) end
return b4[r.id] end end
local funs={goaln=goaln, goal1=goal1,dom=dom,goallast=goallast}
|
Convert a numeric header to symbolic |
local function discretizeHeaders(spec)
return LST.collect(spec,
function(txt)
return string.gsub(txt,"%$","") end) end
|
Discretize rowsReturn a new table with the numerics broken up into
symbolic ranges.
Each header on the new table gets an extra variable, |
local function discretizeRows(i, y)
local j = header(create(),
discretizeHeaders(i.spec))
local yfun = funs[y](i)
for _,head in pairs(i.x.nums) do
local cooked = j.all.cols[head.pos]
local function x(r) return r.cells[cooked.pos] end
cooked.bins = SUPER(i.rows,x, yfun)
end
for _,r in pairs(i.rows) do
local tmp=LST.copy(r.cells)
for _,head in pairs(i.x.nums) do
local cooked = j.all.cols[head.pos]
local old = tmp[cooked.pos]
local new = SYM.discretize(cooked, old)
tmp[cooked.pos] = new end
data(j,tmp,r) end
return j end
|
Create a table from a csv file |
local function fromCsv(f)
local out = create()
CSV(f, function (cells) update(out,cells) end)
return out end
|
External interface |
return {copy=copy, header=header,update=update,
furthest=furthest, nearest=nearest, knn=knn,
createPrim=create,show=show,
create=fromCsv,klassfn=klassfn, klass=klass,
goal1=goal1,dom=dom,goaln=goaln,goallast=goallast,
discretize=discretizeRows,discretize1=discretizeCells}
|
LegalLURE, Copyright (c) 2017, Tim Menzies All rights reserved, BSD 3-Clause License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|