From b0a87060fdab5513528bf40d3fca2680bf488091 Mon Sep 17 00:00:00 2001 From: Scott Behrens Date: Wed, 20 Sep 2017 07:59:03 -0700 Subject: [PATCH] adds language metadata for github repos --- lib/scumblr_tasks/sync_tasks/github_sync.rb | 86 +++++++++++++-------- test/models/task_test.rb | 7 ++ 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/lib/scumblr_tasks/sync_tasks/github_sync.rb b/lib/scumblr_tasks/sync_tasks/github_sync.rb index 97e91311..8d4cdd43 100644 --- a/lib/scumblr_tasks/sync_tasks/github_sync.rb +++ b/lib/scumblr_tasks/sync_tasks/github_sync.rb @@ -47,23 +47,23 @@ def self.config_options def self.options { :sync_type => {name: "Sync Type (Organization/User)", - description: "Should this task retrieve repos for an organization or for a user?", - required: false, - type: :choice, - default: :both, - choices: [:org, :user]}, + description: "Should this task retrieve repos for an organization or for a user?", + required: false, + type: :choice, + default: :both, + choices: [:org, :user]}, :owner => {name: "Organization/User", - description: "Specify the organization or user.", - required: false, - type: :string}, + description: "Specify the organization or user.", + required: false, + type: :string}, :owner_metadata => {name: "Organization/Users from Metadata", - description: "Provide a metadata key to pull organizations or users from.", - required: false, - type: :system_metadata}, + description: "Provide a metadata key to pull organizations or users from.", + required: false, + type: :system_metadata}, :members => {name: "Import Organization Members' Repos", - description: "If syncing for an organization, should the task also import Repos owned by members of the organization.", - required: false, - type: :boolean}, + description: "If syncing for an organization, should the task also import Repos owned by members of the organization.", + required: false, + type: :boolean}, :tags => {name: "Tag Results", description: "Provide a tag for newly created results", required: false, @@ -71,11 +71,11 @@ def self.options type: :tag }, :scope_visibility => {name: "Repo Visibility", - description: "Should the task sync public repos, private repos, or both.", - required: true, - type: :choice, - default: :both, - choices: [:both, :public, :private]}, + description: "Should the task sync public repos, private repos, or both.", + required: true, + type: :choice, + default: :both, + choices: [:both, :public, :private]}, } end @@ -104,9 +104,10 @@ def initialize(options={}) end def run + @completed=0 @last_total = 0 - + owners =[] if(@options[:owner_metadata]) begin @@ -120,7 +121,7 @@ def run previous_results = @options.try(:[],:_self).try(:metadata).try(:[],"previous_results") if(previous_results) - @last_total = previous_results["created"].to_a.count + previous_results["updated"].to_a.count + @last_total = previous_results["created"].to_a.count + previous_results["updated"].to_a.count end owners.each do |owner| @@ -137,7 +138,7 @@ def run end end - + return [] @@ -146,27 +147,42 @@ def run private + def get_languages(name, repo) + begin + response = @github.repos.languages name, repo + rescue Github::Error::Forbidden=>e + handle_rate_limit(e) + retry + rescue + + return nil + end + return response.body + end def get_repos(name, type) + if(type == "org") begin response = @github.repos.list org: name rescue Github::Error::Forbidden=>e - handle_rate_limit(e) + retry + end else begin + response = @github.repos.list user: name rescue Github::Error::Forbidden=>e handle_rate_limit(e) retry + rescue => e + end end parse_results(response) - - while(response.has_next_page?) puts "Getting new page" response = response.next_page @@ -183,7 +199,6 @@ def handle_rate_limit(e) sleep(wait_for + 1) if wait_for.to_i > 0 elsif(e.try(:http_headers).try(:[],"x-ratelimit-remaining").present? && e.try(:http_headers).try(:[],"x-ratelimit-remaining").to_i <= 1) - wait_for = e.http_headers["x-ratelimit-reset"].to_i - Time.now.to_i puts "Sleeping for #{wait_for}" @@ -196,18 +211,18 @@ def handle_rate_limit(e) def parse_results(response) puts "Rate limit: #{response.headers.ratelimit_remaining} of #{response.headers.ratelimit_limit} remaining. Reset in #{response.response.headers["x-ratelimit-reset"].to_i - DateTime.now.to_i} seconds (#{response.response.headers["x-ratelimit-reset"]})" - - response.each do |repo| if(@options[:scope_visibility] == "both" || (repo.private == true && @options[:scope_visibility] == "private") || (repo.private == false && @options[:scope_visibility] == "public")) + + res = Result.where(url: repo.html_url.downcase).first_or_initialize res.title = repo.full_name.to_s + " (Github)" res.domain = "github.com" res.metadata ||={} #search_metadata[:github_analyzer] = true - + res.metadata["repository_data"] ||= {} res.metadata["repository_data"]["name"] = repo["name"] res.metadata["repository_data"]["slug"] = repo["name"] @@ -221,7 +236,16 @@ def parse_results(response) res.metadata["repository_data"]["link"] = repo["html_url"] res.metadata["repository_data"]["repository_host"] = @github_api_endpoint.gsub(/\Ahttps?:\/\//,"").gsub(/\/.+/,"") + # Add programming language metadata including primary language as well as language per LOC + if repo["language"].present? + res.metadata["repository_data"]["primary_language"] = repo["language"] + end + + languages = get_languages(repo["owner"]["login"], repo["name"]) + if languages.present? + res.metadata["repository_data"]["languages"] = languages.to_hash + end if @options[:tags].present? res.add_tags(@options[:tags]) @@ -238,10 +262,6 @@ def parse_results(response) end end end - - - - end diff --git a/test/models/task_test.rb b/test/models/task_test.rb index 2e3ad82a..89836da6 100644 --- a/test/models/task_test.rb +++ b/test/models/task_test.rb @@ -71,8 +71,15 @@ class TaskTest < ActiveSupport::TestCase test "should execute github sync task" do skip("Github OAuth Token not defined") if Rails.configuration.try(:github_oauth_token).blank? github_sync.perform_task + res = Result.find(github_sync.metadata[:current_results]["updated"].first) assert_equal(1, github_sync.metadata[:current_results].count) + + # add assertion that langauges were analyzed + assert_equal("Ruby", res.metadata["repository_data"]["primary_language"]) + + # add assertion that langauges were analyzed + assert(res.metadata["repository_data"]["languages"].keys.include? "Ruby") end test "should execute google search task" do skip("Google developer key not defined") if Rails.configuration.try(:google_developer_key).blank?