
| Current Path : /home/cgabriel/20_dev/10_dev2017/1310__algorithms/Julia/ |
Linux ift1.ift-informatik.de 5.4.0-216-generic #236-Ubuntu SMP Fri Apr 11 19:53:21 UTC 2025 x86_64 |
| Current File : //home/cgabriel/20_dev/10_dev2017/1310__algorithms/Julia/locsenshash.jl |
#
# Approximate Nearest Neighbour search
# implemented by Stefan Schwarz.
#
# Copyright 2014 <stschwarz@ift-intern.de>
# Stefan Schwarz
using Distance
# abstract definitions
abstract LSHash
abstract Storage
abstract VectorFilter
#type definitions
immutable type RandomBinProjection <: LSHash
hash_name::String
projection_count::Integer
dim::Integer
normals::AbstractArray{Float64, 2}
end
type MemoryStorage <: Storage
buckets::Dict{String, Dict}
end
type Entry
v::Vector
end
immutable type NearestFilter <: VectorFilter
nearest::Integer
NearestFilter(nearest = 5) = new(nearest)
end
immutable type Engine
lshash::LSHash
distance::Metric
storage::Storage
filter::VectorFilter
end
# Function definitions
# Projects a vector on n random hyperplane normals and assigns
# a binary value to each projection depending on the sign. This
# divides the data set by each hyperplane and generates a binary
# hash value in string form, which is being used as a bucket key
# for storage.
#
function hash_vector(a::LSHash, v::Vector)
projection = [dot(vec(a.normals[i,:]), v) for i = 1:size(a.normals)[1]]
return join([x > 0.0 ? "1" : "0" for x in projection])
end
function reset_hash!(a::LSHash, dim::Integer)
a.dim = dim
a.normals = randn(a.projection_count, dim)
end
function store_vector!(a::Storage, hash_name::String, bucket_key::String, v::Vector)
if !haskey(a.buckets, hash_name)
a.buckets[hash_name] = Dict()
end
if !haskey(a.buckets[hash_name], bucket_key)
a.buckets[hash_name][bucket_key] = Entry[]
end
push!(a.buckets[hash_name][bucket_key], Entry(v))
end
function store_vector!(a::Storage, hash_name::String, bucket_key::String, ent::Entry)
if !haskey(a.buckets, hash_name)
a.buckets[hash_name] = Dict()
end
if !haskey(a.buckets[hash_name], bucket_key)
a.buckets[hash_name][bucket_key] = Entry[]
end
push!(a.buckets[hash_name][bucket_key], ent)
end
store_vector!(a::Engine, v::Vector) = store_vector!(a.storage, a.lshash.hash_name, hash_vector(a.lshash, v), v)
#
# Hashes vector v, collects all candidate vectors from the matching
# buckets in storage, applys the (optional) distance function and
# finally the (optional) filter function to construct the returned list
# of either (vector, data, distance) tuples or (vector, data) tuples.
function neighbours(a::Engine, b::LSHash, v::Vector)
bucket_content = Entry[]
key = hash_vector(b, v)
if(haskey(a.storage.buckets[a.lshash.hash_name], key))
bucket_content = a.storage.buckets[a.lshash.hash_name][key]
end
return (Array{Float64, 1}, result_type(a.distance, None, None))[(x.v, evaluate(a.distance, x.v, v)) for x in bucket_content]
end
# Sorts vectors with respect to distance and returns the N nearest.
function filter_vector!{T}(a::NearestFilter, list::Array{(Array{Float64,1}, T), 1})
sort!(list, by=x->(x[2]))
return length(list) < a.nearest ? list : list[1:a.nearest]
end