Skip to content

Commit

Permalink
Merge pull request #12 from paulgoetze/feature/string-attributes
Browse files Browse the repository at this point in the history
Feature/string attributes
  • Loading branch information
paulgoetze authored Dec 22, 2016
2 parents 287f26e + d8202b6 commit c0a1a09
Show file tree
Hide file tree
Showing 10 changed files with 441 additions and 51 deletions.
1 change: 1 addition & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
--format documentation
--color
--order rand
1 change: 1 addition & 0 deletions lib/weka/classifiers/evaluation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Evaluation

alias summary to_summary_string
alias class_details to_class_details_string
alias confusion_matrix to_matrix_string

alias instance_count num_instances
alias correct_count correct
Expand Down
65 changes: 62 additions & 3 deletions lib/weka/core/attribute.rb
Original file line number Diff line number Diff line change
@@ -1,23 +1,82 @@
require 'weka/concerns/persistent'

module Weka
module Core
java_import 'weka.core.Attribute'

class Attribute
include Weka::Concerns::Persistent

TYPES = %i(numeric nominal string date).freeze

class << self
def new_numeric(name)
new(name.to_s)
end

def new_nominal(name, values)
new(name.to_s, Array(values).map(&:to_s))
end

def new_date(name, format)
new(name.to_s, format.to_s)
end

##
# Creates a new Attribute instance of type string.
#
# The java class defines the same constructor:
# Attribute(java.lang.String, java.util.List<java.lang.String>)
# for nominal and string attributes and handles the type internally
# based on the second argument.
#
# In Java you would write following code to create a string Attribute:
# Attribute attribute = new Attribute("name", (FastVector) null);
#
# When we use a similar approach in JRuby:
# attribute = Attribute.new('name', nil)
# then a Java::JavaLang::NullPointerException is thrown.
#
# Thus, we use refelection here and call the contructor explicitly, see
# https://github.com/jruby/jruby/wiki/CallingJavaFromJRuby#constructors
#
# The object returned from Java constructor only has class
# Java::JavaObject so we need to cast it to the proper class
#
# See also:
# https://stackoverflow.com/questions/1792495/casting-objects-in-jruby
def new_string(name)
constructor = Attribute.java_class.declared_constructor(
java.lang.String,
java.util.List
)

constructor.new_instance(name.to_s, nil).to_java(Attribute)
end
end

def values
enumerate_values.to_a
end

##
# Returns the string representation of the attribute's type.
# Overwrites the weka.core.Attribute type Java method, which returns an
# integer representation of the type based on the defined type constants.
def type
self.class.type_to_string(self)
end

##
# The order of the if statements is important here, because a date is also
# a numeric.
def internal_value_of(value)
return value if value === Float::NAN
return Float::NAN if [nil, '?'].include?(value)
return parse_date(value.to_s) if date?
return value.to_f if numeric?
return index_of_value(value.to_s) if nominal?
return index_of_value(value.to_s) if nominal? || string?
end
end

Weka::Core::Attribute.__persistent__ = true
end
end
2 changes: 1 addition & 1 deletion lib/weka/core/dense_instance.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def value_from(value, index)
format_date(value, attribute.date_format)
elsif attribute.numeric?
value
elsif attribute.nominal?
elsif attribute.nominal? || attribute.string?
attribute.value(value)
end
end
Expand Down
51 changes: 44 additions & 7 deletions lib/weka/core/instances.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,29 @@ def add_attributes(&block)
self
end

alias with_attributes add_attributes
alias instances_count num_instances
alias attributes_count num_attributes
alias with_attributes add_attributes
alias instances_count num_instances
alias attributes_count num_attributes
alias has_string_attribute? check_for_string_attributes

## Check if the instances has any attribute of the given type
# @param [String, Symbol, Integer] type type of the attribute to check
# String and Symbol argument are converted to corresponding type
# defined in Weka::Core::Attribute
#
# @example Passing String
# instances.has_attribute_type?('string')
# instances.has_attribute_type?('String')
#
# @example Passing Symbol
# instances.has_attribute_type?(:String)
#
# @example Passing Integer
# instances.has_attribute_type?(Attribute::STRING)
def has_attribute_type?(type)
type = map_attribute_type(type) unless type.is_a?(Integer)
check_for_attribute_type(type)
end

def each
if block_given?
Expand Down Expand Up @@ -120,25 +140,25 @@ def to_c45(file)
end

def numeric(name, class_attribute: false)
attribute = Attribute.new(name.to_s)
attribute = Attribute.new_numeric(name)
add_attribute(attribute)
self.class_attribute = name if class_attribute
end

def nominal(name, values:, class_attribute: false)
attribute = Attribute.new(name.to_s, Array(values).map(&:to_s))
attribute = Attribute.new_nominal(name, values)
add_attribute(attribute)
self.class_attribute = name if class_attribute
end

def string(name, class_attribute: false)
attribute = Attribute.new(name.to_s, [])
attribute = Attribute.new_string(name)
add_attribute(attribute)
self.class_attribute = name if class_attribute
end

def date(name, format: 'yyyy-MM-dd HH:mm', class_attribute: false)
attribute = Attribute.new(name.to_s, format)
attribute = Attribute.new_date(name, format)
add_attribute(attribute)
self.class_attribute = name if class_attribute
end
Expand Down Expand Up @@ -226,9 +246,26 @@ def instance_from(instance_or_values, weight:)
instance_or_values
else
data = internal_values_of(instance_or_values)

# string attribute has unlimited range of possible values.
# Check the return index, if it is -1 then add the value to
# the attribute before creating the instance
data.map!.with_index do |value, index|
if value == -1 && attribute(index).string?
attribute(index).add_string_value(instance_or_values[index].to_s)
else
value
end
end

DenseInstance.new(data, weight: weight)
end
end

def map_attribute_type(type)
return -1 unless Attribute::TYPES.include?(type.downcase.to_sym)
Attribute.const_get(type.upcase)
end
end

Java::WekaCore::Instances.__persistent__ = true
Expand Down
3 changes: 2 additions & 1 deletion spec/classifiers/evaluation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
unclassified_count: :unclassified,
unclassified_percentage: :pct_unclassified,
weighted_f_measure: :weighted_fmeasure,
cumulative_margin_distribution: :toCumulativeMarginDistributionString
cumulative_margin_distribution: :toCumulativeMarginDistributionString,
confusion_matrix: :to_matrix_string
}.each do |alias_method, method|
it "defines the alias ##{alias_method} for ##{method}" do
expect(subject.method(method)).to eq subject.method(alias_method)
Expand Down
Loading

0 comments on commit c0a1a09

Please sign in to comment.