Skip to content

Commit

Permalink
Working gem
Browse files Browse the repository at this point in the history
  • Loading branch information
David Roberts committed Oct 9, 2014
1 parent 1a82650 commit 7040f39
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 34 deletions.
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.1.3
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
source 'https://rubygems.org'

# Specify your gem's dependencies in activerecord_pg_histogram.gemspec
# Specify your gem's dependencies in pg_histogram.gemspec
gemspec
44 changes: 33 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,29 +1,51 @@
# ActiverecordPgHistogram
# PostgreSQL Histogram (for ActiveRecord)

This gem allows for you to efficiently create a histogram from large data sets in your Rails applications.

It uses PostgreSQL's [width_bucket](http://www.postgresql.org/docs/9.3/static/functions-math.html) function to handle the majority of the processing in the database, and only requires 3 database queries.


TODO: Write a gem description

## Installation

Add this line to your application's Gemfile:

gem 'activerecord_pg_histogram'
gem 'pg_histogram'

And then execute:

$ bundle

Or install it yourself as:

$ gem install activerecord_pg_histogram
$ gem install pg_histogram

## Usage

TODO: Write usage instructions here
Create a Histogram object using the following there parameters:
1. ActiveRecord query to use
2. Name of column to count frequency of
3. Bucket size (OPTIONAL - default is 0.5)


histogram = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)


Call the results method to retrieve a Hash of bucket minimums and frequency counts

# create sample data
5.times do { Widget.create(price: 1.2) }
10.times do { Widget.create(price: 2.9 ) }

# get the results
@histogram_data = histogram.results
=> {1.0=>5, 2.5=>10}


The results can be used by your favorite charting libary, such as [Chartkick](https://github.com/ankane/chartkick), to plot the data.

<%= column_chart @histogram_data %>

## Contributing
## Dependencies

1. Fork it
2. Create your feature branch (`git checkout -b my-new-feature`)
3. Commit your changes (`git commit -am 'Add some feature'`)
4. Push to the branch (`git push origin my-new-feature`)
5. Create new Pull Request
This gem has been tested with Ruby 2.1.3 and ActiveRecord 4.1.6. Please open an issue or PR if you experience issues with other versions.
5 changes: 0 additions & 5 deletions lib/activerecord_pg_histogram.rb

This file was deleted.

Empty file.
3 changes: 0 additions & 3 deletions lib/activerecord_pg_histogram/version.rb

This file was deleted.

5 changes: 5 additions & 0 deletions lib/pg_histogram.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
require 'pg_histogram/version'
require 'pg_histogram/histogram'

module PgHistogram
end
86 changes: 86 additions & 0 deletions lib/pg_histogram/histogram.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
module PgHistogram
class Histogram
attr_reader :query, :column, :bucket_size

BUCKET_COL = 'bucket'
FREQUENCY_COL = 'frequency'
ROUND_METHODS_BY_DIRECTION = {
nil => :round,
down: :floor,
up: :ceil
}

# column_name name must be safe for SQL injection
def initialize(query, column_name, bucket_size = 0.5)
@query = query
@column = column_name.to_s
@bucket_size = bucket_size
end

# returns histogram as hash
# bucket minimum as a key
# frequency as value
def results
# error handling case
if max == min
{ min => query.where("#{column} = ?", min).count }
else
labeled_histogram
end
end

def min
@min ||= round_to_increment(query.minimum(column), :down)
end

def max
@max ||= round_to_increment(query.maximum(column), :up)
end

private

def num_buckets
@buckets ||= ((max - min) / bucket_size).to_i
end

# returns the bucket label (minimum which can be in bucket) based on bucket #
def bucket_num_to_label(bucket_num)
min + bucket_size * (bucket_num - 1)
end

# rounds to the nearest bucket_size increment
# can optionally pass :up or :down to always round in one direction
def round_to_increment(num, direction = nil)
return 0 if num.nil?
round_method = ROUND_METHODS_BY_DIRECTION[direction]
denominator = 1 / bucket_size
(num * denominator).send(round_method) / denominator.to_f
end

# executes the query and converts bucket numbers to minimum step in bucket
def labeled_histogram
query_for_buckets.each_with_object({}) do |row, results|
results[bucket_num_to_label(row[BUCKET_COL].to_i)] = row[FREQUENCY_COL].to_i \
unless row[BUCKET_COL].nil?
end
end

def query_for_buckets
ActiveRecord::Base.connection.execute(
<<-SQL
SELECT width_bucket(#{column}, #{min}, #{max}, #{num_buckets}) as #{BUCKET_COL},
count(*) as #{FREQUENCY_COL}
FROM (#{subquery.to_sql}) as subq_results
GROUP BY #{BUCKET_COL}
ORDER BY #{BUCKET_COL}
SQL
)
end

# use passed AR query as a subquery to not interfere with group clause
def subquery
# override default order
query.select(column).order('1')
end
end
end
3 changes: 3 additions & 0 deletions lib/pg_histogram/version.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module PgHistogram
VERSION = "0.1"
end
7 changes: 3 additions & 4 deletions activerecord_pg_histogram.gemspec → pg_histogram.gemspec
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# coding: utf-8
lib = File.expand_path('../lib', __FILE__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'activerecord_pg_histogram/version'
require 'pg_histogram/version'

Gem::Specification.new do |spec|
spec.name = "activerecord_pg_histogram"
spec.version = ActiverecordPgHistogram::VERSION
spec.name = "pg_histogram"
spec.version = PgHistogram::VERSION
spec.authors = ["David Roberts"]
spec.email = ["[email protected]"]
spec.description = %q{Creates a Histogram fron an ActiveRecord query}
Expand All @@ -22,5 +22,4 @@ Gem::Specification.new do |spec|
spec.add_dependency "pg"
spec.add_development_dependency "bundler", "~> 1.3"
spec.add_development_dependency "rake"
spec.add_development_dependency "factory_girl"
end
74 changes: 71 additions & 3 deletions test/histogram_test.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,75 @@
require_relative 'test_helper'

class HistogramTest < Minitest::Test
def test_test
assert true

def setup
Widget.delete_all
@hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.5)
end

def test_with_1_result
Widget.create!(price: 2.00)

assert_equal 2.0, @hist.min, 'Minimum is the single price'
assert_equal 2.0, @hist.max, 'Minimum is the single price'
assert_equal 1, @hist.results[2.0], 'Frequency of 2.0 bucket'
end

def test_ignores_nils_with_1_result
Widget.create!(price: 3.00)
Widget.create!(price: nil)

results = @hist.results
assert_equal 3.0, @hist.min, 'Minimum is the single price'
assert_equal 3.0, @hist.max, 'Minimum is the single price'
assert_equal 1, results.count, 'Histogram bucket count'
assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
end

def test_ignores_nils_with_multiple_results
Widget.create!(price: 3.00)
Widget.create!(price: 2.25)
Widget.create!(price: nil)

results = @hist.results
assert_equal 2.0, @hist.min, 'Minimum'
assert_equal 3.0, @hist.max, 'Maximum'
assert_equal 2, results.count, 'Histogram bucket count'
assert_equal 1, results[3.0], 'Frequency of 3.0 bucket'
assert_equal 1, results[2.0], 'Frequency of 2.0 bucket'
end

def test_with_many_results
# use a different bucket size
hist = PgHistogram::Histogram.new(Widget.all, 'price', 0.25)

10.times { Widget.create!(price: 3.0) }
8.times { Widget.create!(price: 5.76) }
min_price = Widget.create!(price: 0.98).price
max_price = Widget.create!(price: 6.0).price
results = hist.results

assert_equal 0.75, hist.min, 'Histogram minimum price'
assert_equal 6.0, hist.max, 'Histogram maximum price'
assert_equal 21, hist.send(:num_buckets), 'Histogram buckets'
assert_equal 4, results.size, 'Histogram buckets with results'
assert_equal 1, results[0.75], 'Frequency of 0.75 bucket'
assert_equal 10, results[3.0], 'Frequency of 3.0 bucket'
assert_equal 8, results[5.75], 'Frequency of 5.75 bucket'
assert_equal 1, results[6.0], 'Frequency of 6.0 bucket'
end

def test_rounding_to_bucket_size
hist = PgHistogram::Histogram.new(nil, nil, 0.25)

assert_equal 0.5, hist.send(:round_to_increment, 0.478), '0.478 rounded to 0.25 interval'
assert_equal 1.0, hist.send(:round_to_increment, 1.1), '1.1 rounded to 0.25 interval'
assert_equal 0.5, hist.send(:round_to_increment, 0.5), '0.5 rounded to 0.25 interval'
assert_equal 0.25, hist.send(:round_to_increment, 0.478, :down), '0.478 rounded down to 0.25 interval'
assert_equal 1.0, hist.send(:round_to_increment, 1.1, :down), '1.1 rounded down to 0.25 interval'
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :down), '0.5 rounded down to 0.25 interval'
assert_equal 0.5, hist.send(:round_to_increment, 0.478, :up), '0.478 rounded up to 0.25 interval'
assert_equal 1.25, hist.send(:round_to_increment, 1.1, :up), '1.1 rounded up to 0.25 interval'
assert_equal 0.5, hist.send(:round_to_increment, 0.5, :up), '0.5 rounded up to 0.25 interval'
end
end
end
20 changes: 13 additions & 7 deletions test/test_helper.rb
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
# -*- encoding: utf-8 -*-

require 'minitest/spec'
require 'minitest/autorun'
require 'minitest/spec'
require 'logger'
require 'active_record'
require 'factory_girl'
require 'yaml'
require 'pg_histogram'

config = YAML.load(File.read('test/database.yml'))
ActiveRecord::Base.establish_connection config['test']
ActiveRecord::Base.logger = Logger.new 'tmp/test.log'
ActiveRecord::Base.logger.level = Logger::DEBUG
ActiveRecord::Migration.verbose = false

class ActiveSupport::TestCase
include FactoryGirl::Syntax::Methods

# Set up the database that we require
ActiveRecord::Schema.define do
create_table :widgets, force: true do |t|
t.float :price
t.timestamps
end
end


FactoryGirl.find_definitions

class Widget < ActiveRecord::Base
end

0 comments on commit 7040f39

Please sign in to comment.