From 4000399ded0a7ac6e5165aba90bfecb0dbf93fda Mon Sep 17 00:00:00 2001 From: ZippyDev Date: Sun, 10 Nov 2019 15:41:38 -0700 Subject: [PATCH] Add html parser --- lib/ncaa/parser/html/base.rb | 46 ++++++++++++++++++++++++ lib/ncaa/parser/html/rankings.rb | 58 +++++++++++++++++++++++++++++++ lib/ncaa/parser/html/standings.rb | 13 +++++++ 3 files changed, 117 insertions(+) create mode 100644 lib/ncaa/parser/html/base.rb create mode 100644 lib/ncaa/parser/html/rankings.rb create mode 100644 lib/ncaa/parser/html/standings.rb diff --git a/lib/ncaa/parser/html/base.rb b/lib/ncaa/parser/html/base.rb new file mode 100644 index 0000000..6f2fb95 --- /dev/null +++ b/lib/ncaa/parser/html/base.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +require 'nokogiri' + +module Ncaa + module Parser + module Html + class Base + attr_reader :body, :code, :html, :response, :options + + def self.parse(resp, options: {}) + response = new(resp, options: options) + response.build + end + + def initialize(resp, options: {}) + @body = resp.body + @code = resp.code + @html = parse_html + @response = resp + @options = options + end + + def build + raise NotImplementedError.new + end + + private + + # @param response [String] + # @return [Nokogiri::HTML] + def parse_html + Nokogiri::HTML(body) + rescue StandardError => e + raise InvalidHtmlResponseError.new(error: 'Invalid Html', response: response, message: e.message) + end + + class InvalidHtmlResponseError < StandardError; end + end + end + end +end + +# require after Base is defined +require 'ncaa/parser/html/rankings' +require 'ncaa/parser/html/standings' diff --git a/lib/ncaa/parser/html/rankings.rb b/lib/ncaa/parser/html/rankings.rb new file mode 100644 index 0000000..83a6ca3 --- /dev/null +++ b/lib/ncaa/parser/html/rankings.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module Ncaa + module Parser + module Html + class Rankings < Base + def build + { + type: type.inner_html.strip, + uri: type.values.first.strip, + last_updated: last_updated, + data: rankings + } + end + + private + + # @return [Array] + def table + @table ||= html.css('article.rankings-content table') + end + + # @return [Array] + def keys + @keys ||= table.css('thead th').map { |x| x.text.remove(/\s/).underscore.to_sym } + end + + # @return [Nokogiri::HTML] + def type + @type ||= html.css('form#rankings-menu-form select#edit-rankings-menu option[selected="selected"]').first + end + + # @return [Time] + def last_updated + @last_updated ||= begin + updated = html.css('figure.rankings-last-updated').text + date = updated[/\w+{3,}\.? \d+{1,2}, \d+{2,4}/] + Time.parse(date) + end + end + + # @return [Array] + def rankings + @rankings ||= begin + table.css('tbody tr').each.with_object([]) do |row, arr| + team = {} + columns = row.css('td') + + keys.each.with_index { |key, i| team[key] = columns[i].text } + + arr << team + end + end + end + end + end + end +end diff --git a/lib/ncaa/parser/html/standings.rb b/lib/ncaa/parser/html/standings.rb new file mode 100644 index 0000000..6c10e29 --- /dev/null +++ b/lib/ncaa/parser/html/standings.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Ncaa + module Parser + module Html + class Standings < Base + def build + raise NotImplementedError.new + end + end + end + end +end