diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..061685d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,23 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.2.0] - 2019-11-15 + +### Added + +- Checks can be set to a `warn` status when non-critical errors occur. +- Handler now exposes a `timer` method which can wrap a check and ok/warn/fail it based on how long it takes + +### Changed + +- The handler will report an `HTTP 302` status when a run is successful but there are warnings. + +## [1.1.0] - 2019-09-13 + +### Changed + +- Forked from [tribune/is_it_working](https://github.com/tribune/is_it_working). diff --git a/README.rdoc b/README.rdoc index 6584551..a347f13 100644 --- a/README.rdoc +++ b/README.rdoc @@ -18,28 +18,29 @@ Suppose you have a Rails application that uses the following services: * NFS shared directory symlinked to from system/data in the Rails root directory * SMTP server at mail.example.com * A black box service encapsulated in AwesomeService +* A sidekiq queue that should be monitored for retries that aren't clearing out A monitoring handler for this set up could be set up in config/initializers/is_it_working.rb like this: Rails.configuration.middleware.use(IsItWorking::Handler) do |h| # Check the ActiveRecord database connection without spawning a new thread h.check :active_record, :async => false - + # Check the memcache servers used by Rails.cache if using the DalliStore implementation h.check :dalli, :cache => Rails.cache if defined?(ActiveSupport::Cache::DalliStore) && Rails.cache.is_a?(ActiveSupport::Cache::DalliStore) - + # Check that the web service is working by hitting a known URL with Basic authentication h.check :url, :get => "http://api.example.com/version", :username => "appname", :password => "abc123" - + # Check that the NFS mount directory is available with read/write permissions h.check :directory, :path => Rails.root + "system/data", :permission => [:read, :write] - + # Check the mail server configured for ActionMailer h.check :action_mailer if ActionMailer::Base.delivery_method == :smtp - + # Ping another mail server h.check :ping, :host => "mail.example.com", :port => "smtp" - + # Check that AwesomeService is working using the service's own logic h.check :awesome_service do |status| if AwesomeService.active? diff --git a/lib/is_it_working.rb b/lib/is_it_working.rb index b01f852..0718acb 100644 --- a/lib/is_it_working.rb +++ b/lib/is_it_working.rb @@ -6,7 +6,8 @@ module IsItWorking autoload :Filter, File.expand_path("../is_it_working/filter.rb", __FILE__) autoload :Handler, File.expand_path("../is_it_working/handler.rb", __FILE__) autoload :Status, File.expand_path("../is_it_working/status.rb", __FILE__) - + autoload :Timer, File.expand_path("../is_it_working/timer.rb", __FILE__) + # Predefined checks autoload :ActionMailerCheck, File.expand_path("../is_it_working/checks/action_mailer_check.rb", __FILE__) autoload :ActiveRecordCheck, File.expand_path("../is_it_working/checks/active_record_check.rb", __FILE__) diff --git a/lib/is_it_working/filter.rb b/lib/is_it_working/filter.rb index 9e55093..b74496c 100644 --- a/lib/is_it_working/filter.rb +++ b/lib/is_it_working/filter.rb @@ -4,32 +4,32 @@ class Filter class AsyncRunner < Thread attr_accessor :filter_status end - + class SyncRunner attr_accessor :filter_status - + def initialize yield end - + def join end end - - attr_reader :name, :async - + + attr_reader :async, :name, :runner, :status + # Create a new filter to run a status check. The name is used for display purposes. def initialize(name, check, async = true) @name = name @check = check @async = async end - - # Run a status the status check. This method keeps track of the time it took to run + + # Run a status check. This method keeps track of the time it took to run # the check and will trap any unexpected exceptions and report them as failures. def run - status = Status.new(name) - runner = (async ? AsyncRunner : SyncRunner).new do + @status = Status.new(name) + @runner = (async ? AsyncRunner : SyncRunner).new do t = Time.now begin @check.call(status) @@ -41,14 +41,12 @@ def run runner.filter_status = status runner end - + class << self # Run a list of filters and return their status objects - def run_filters (filters) - runners = filters.collect{|f| f.run} - statuses = runners.collect{|runner| runner.filter_status} - runners.each{|runner| runner.join} - statuses + def run_filters(filters) + filters.map(&:run).each(&:join) + filters.map(&:status) end end end diff --git a/lib/is_it_working/handler.rb b/lib/is_it_working/handler.rb index 4f6472b..269874a 100644 --- a/lib/is_it_working/handler.rb +++ b/lib/is_it_working/handler.rb @@ -33,6 +33,7 @@ def initialize(app=nil, route_path="/is_it_working", &block) @app = app @route_path = route_path @hostname = `hostname`.chomp + @timers = [] @filters = [] @mutex = Mutex.new yield self if block_given? @@ -43,6 +44,7 @@ def call(env) statuses = [] t = Time.now statuses = Filter.run_filters(@filters) + Timer.run_timers(@timers) render(statuses, Time.now - t) else @app.call(env) @@ -95,7 +97,9 @@ def check (name, *options_or_check, &block) end end - @filters << Filter.new(name, check, options[:async]) + Filter.new(name, check, options[:async]).tap do |f| + @filters << f + end end # Helper method to synchronize a block of code so it can be thread safe. @@ -107,6 +111,10 @@ def synchronize end end + def timer(*args, **kwargs) + @timers << Timer.new(*args, **kwargs) { yield } + end + protected # Lookup a status check filter from the name and arguments @@ -124,12 +132,12 @@ def lookup_check(name, options) #:nodoc: # Output the plain text response from calling all the filters. def render(statuses, elapsed_time) #:nodoc: - code = if statuses.any?(&:warnings?) - 203 - elsif statuses.all?(&:success?) - 200 - else + code = if statuses.any?(&:failures?) 500 + elsif statuses.any?(&:warnings?) + 302 + else + 200 end headers = { "Content-Type" => "text/plain; charset=utf8", diff --git a/lib/is_it_working/status.rb b/lib/is_it_working/status.rb index e869286..8ec2dd2 100644 --- a/lib/is_it_working/status.rb +++ b/lib/is_it_working/status.rb @@ -23,6 +23,10 @@ def ok? def warn? result == :warn end + + def fail? + result == :fail + end end # The name of the status check for display purposes. @@ -56,12 +60,17 @@ def fail(message) # Returns +true+ only if all checks were OK. def success? - @messages.all?{|m| m.ok?} + @messages.all?(&:ok?) end # Returns +true+ if all checks were OK but warnings were present. def warnings? - success? && @messages.any?{|m| m.warn?} + success? && @messages.any?(&:warn?) + end + + # Returns +true+ if any checks were FAIL. + def failures? + @messages.any?(&:fail?) end end end diff --git a/lib/is_it_working/timer.rb b/lib/is_it_working/timer.rb new file mode 100644 index 0000000..f81d726 --- /dev/null +++ b/lib/is_it_working/timer.rb @@ -0,0 +1,40 @@ +module IsItWorking + class Timer + attr_reader :failure_threshold, :filter, :warning_threshold + + def initialize(warn_after: Float::INFINITY, fail_after: Float::INFINITY) + @failure_threshold = fail_after + @warning_threshold = warn_after + @filter = yield + end + + def call + status = filter.status + if fail_timeout_exceeded?(status.time) + status.fail("runtime exceeded critical threshold: #{failure_threshold}ms") + elsif warn_timeout_exceeded?(status.time) + status.warn("runtime exceeded warning threshold: #{warning_threshold}ms") + end + end + + class << self + def run_timers(timers) + timers.each(&:call) + end + end + + private + + def warn_timeout_exceeded?(time) + timeout_exceeded? time, warning_threshold + end + + def fail_timeout_exceeded?(time) + timeout_exceeded? time, failure_threshold + end + + def timeout_exceeded?(time, val) + time * 1000 > val + end + end +end diff --git a/lib/is_it_working/version.rb b/lib/is_it_working/version.rb index ebdcae2..a5b3603 100644 --- a/lib/is_it_working/version.rb +++ b/lib/is_it_working/version.rb @@ -1,3 +1,3 @@ module IsItWorking - VERSION = '1.1.0'.freeze + VERSION = '1.2.0'.freeze end diff --git a/spec/filter_spec.rb b/spec/filter_spec.rb index 014594a..d73365b 100644 --- a/spec/filter_spec.rb +++ b/spec/filter_spec.rb @@ -1,17 +1,17 @@ require File.expand_path('../spec_helper', __FILE__) describe IsItWorking::Filter do - + it "should have a name" do filter = IsItWorking::Filter.new(:test, lambda{}) filter.name.should == :test end - + it "should run a check and return a thread" do check = lambda do |status| status.ok("success") end - + filter = IsItWorking::Filter.new(:test, check) runner = filter.run status = runner.filter_status @@ -20,12 +20,12 @@ status.messages.first.message.should == "success" status.time.should_not be_nil end - - it "should run a check and recue an errors" do + + it "should run a check and rescue an error" do check = lambda do |status| raise "boom!" end - + filter = IsItWorking::Filter.new(:test, check) runner = filter.run status = runner.filter_status @@ -34,7 +34,13 @@ status.messages.first.message.should include("boom") status.time.should_not be_nil end - + + it "should have a warn state when exceeding the warn_threshold" do + end + + it "should have a warn state when exceeding the warn_threshold" do + end + it "should run multiple filters and return their statuses" do filter_1 = IsItWorking::Filter.new(:test, lambda{|status| status.ok("OK")}) filter_2 = IsItWorking::Filter.new(:test, lambda{|status| status.fail("FAIL")}) @@ -42,5 +48,5 @@ statuses.first.should be_success statuses.last.should_not be_success end - + end diff --git a/spec/handler_spec.rb b/spec/handler_spec.rb index 5df40aa..20400d1 100644 --- a/spec/handler_spec.rb +++ b/spec/handler_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe IsItWorking::Handler do - + it "should lookup filters from the pre-defined checks" do handler = IsItWorking::Handler.new do |h| h.check :directory, :path => ".", :permissions => :read @@ -11,7 +11,7 @@ response.last.flatten.join("").should include("OK") response.last.flatten.join("").should include("directory") end - + it "should use blocks as filters" do handler = IsItWorking::Handler.new do |h| h.check :block do |status| @@ -23,7 +23,7 @@ response.last.flatten.join("").should include("OK") response.last.flatten.join("").should include("block - Okey dokey") end - + it "should use object as filters" do handler = IsItWorking::Handler.new do |h| h.check :lambda, lambda{|status| status.ok("A-okay")} @@ -33,7 +33,7 @@ response.last.flatten.join("").should include("OK") response.last.flatten.join("").should include("lambda - A-okay") end - + it "should create asynchronous filters by default" do handler = IsItWorking::Handler.new do |h| h.check :block do |status| @@ -44,7 +44,7 @@ IsItWorking::Filter::AsyncRunner.should_receive(:new).and_return(runner) response = handler.call({}) end - + it "should be able to create synchronous filters" do handler = IsItWorking::Handler.new do |h| h.check :block, :async => false do |status| @@ -55,7 +55,7 @@ IsItWorking::Filter::SyncRunner.should_receive(:new).and_return(runner) response = handler.call({}) end - + it "should work with synchronous checks" do handler = IsItWorking::Handler.new do |h| h.check :block, :async => false do |status| @@ -67,7 +67,7 @@ response.last.flatten.join("").should include("OK") response.last.flatten.join("").should include("block - Okey dokey") end - + it "should return a success response if all checks pass" do handler = IsItWorking::Handler.new do |h| h.check :block do |status| @@ -82,7 +82,7 @@ response.last.flatten.join("").should include("block - success") response.last.flatten.join("").should include("block - worked") end - + it "should return an error response if any check fails" do handler = IsItWorking::Handler.new do |h| h.check :block do |status| @@ -97,7 +97,43 @@ response.last.flatten.join("").should include("block - success") response.last.flatten.join("").should include("block - down") end - + + it "should return a warning status if a check exceeds a warning timeout" do + handler = IsItWorking::Handler.new do |h| + h.timer(warn_after: 1) do + h.check :block do |status| + sleep 0.02 + status.ok('That took a while! 😅') + end + end + end + response = handler.call({}) + response.first.should == 302 + response.last.flatten.join.should include("OK: block - That took a while") + response.last.flatten.join.should include("WARN: block - runtime exceeded warning threshold") + end + + it "should return a failure status if a check exceeds a warning timeout and a timeout" do + handler = IsItWorking::Handler.new do |h| + h.timer(warn_after: 5, fail_after: 9) do + h.check :block do |status| + sleep 0.1 + status.ok('That took a while! 😅') + end + end + h.timer(warn_after: 5) do + h.check :warn do |status| + sleep 0.1 + status.ok('That took a while! 😅') + end + end + end + response = handler.call({}) + response.first.should == 500 + response.last.flatten.join.should include("OK: block - That took a while") + response.last.flatten.join.should include("FAIL: block - runtime exceeded critical threshold") + end + it "should be able to be used in a middleware stack with the route /is_it_working" do app_response = [200, {"Content-Type" => "text/plain"}, ["OK"]] app = lambda{|env| app_response} @@ -105,13 +141,13 @@ stack = IsItWorking::Handler.new(app) do |h| h.check(:test){|status| check_called = true; status.ok("Woot!")} end - + stack.call("PATH_INFO" => "/").should == app_response check_called.should == false stack.call("PATH_INFO" => "/is_it_working").last.flatten.join("").should include("Woot!") check_called.should == true end - + it "should be able to be used in a middleware stack with a custom route" do app_response = [200, {"Content-Type" => "text/plain"}, ["OK"]] app = lambda{|env| app_response} @@ -119,19 +155,19 @@ stack = IsItWorking::Handler.new(app, "/woot") do |h| h.check(:test){|status| check_called = true; status.ok("Woot!")} end - + stack.call("PATH_INFO" => "/is_it_working").should == app_response check_called.should == false stack.call("PATH_INFO" => "/woot").last.flatten.join("").should include("Woot!") check_called.should == true end - + it "should be able to synchronize access to a block" do handler = IsItWorking::Handler.new handler.synchronize{1}.should == 1 handler.synchronize{2}.should == 2 end - + it "should be able to set the host name reported in the output" do handler = IsItWorking::Handler.new handler.hostname = "woot" diff --git a/spec/status_spec.rb b/spec/status_spec.rb index 3fd2506..db998c6 100644 --- a/spec/status_spec.rb +++ b/spec/status_spec.rb @@ -34,6 +34,16 @@ status.messages.first.message.should == "uh oh" end + it "should have failures" do + status.fail("uh oh") + status.should be_failures + status.messages.size.should == 1 + status.messages.first.should_not be_ok + status.messages.first.should_not be_warn + status.messages.first.should be_fail + status.messages.first.message.should == "uh oh" + end + it "should have both errors and successes" do status.fail("boom") status.ok("wow")