Add html parser
This commit is contained in:
parent
94fa6091a0
commit
4000399ded
|
@ -0,0 +1,46 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require 'nokogiri'
|
||||
|
||||
module Ncaa
|
||||
module Parser
|
||||
module Html
|
||||
class Base
|
||||
attr_reader :body, :code, :html, :response, :options
|
||||
|
||||
def self.parse(resp, options: {})
|
||||
response = new(resp, options: options)
|
||||
response.build
|
||||
end
|
||||
|
||||
def initialize(resp, options: {})
|
||||
@body = resp.body
|
||||
@code = resp.code
|
||||
@html = parse_html
|
||||
@response = resp
|
||||
@options = options
|
||||
end
|
||||
|
||||
def build
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# @param response [String]
|
||||
# @return [Nokogiri::HTML]
|
||||
def parse_html
|
||||
Nokogiri::HTML(body)
|
||||
rescue StandardError => e
|
||||
raise InvalidHtmlResponseError.new(error: 'Invalid Html', response: response, message: e.message)
|
||||
end
|
||||
|
||||
class InvalidHtmlResponseError < StandardError; end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# require after Base is defined
|
||||
require 'ncaa/parser/html/rankings'
|
||||
require 'ncaa/parser/html/standings'
|
|
@ -0,0 +1,58 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Ncaa
|
||||
module Parser
|
||||
module Html
|
||||
class Rankings < Base
|
||||
def build
|
||||
{
|
||||
type: type.inner_html.strip,
|
||||
uri: type.values.first.strip,
|
||||
last_updated: last_updated,
|
||||
data: rankings
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# @return [Array<Nokogiri::HTML>]
|
||||
def table
|
||||
@table ||= html.css('article.rankings-content table')
|
||||
end
|
||||
|
||||
# @return [Array<Symbol>]
|
||||
def keys
|
||||
@keys ||= table.css('thead th').map { |x| x.text.remove(/\s/).underscore.to_sym }
|
||||
end
|
||||
|
||||
# @return [Nokogiri::HTML]
|
||||
def type
|
||||
@type ||= html.css('form#rankings-menu-form select#edit-rankings-menu option[selected="selected"]').first
|
||||
end
|
||||
|
||||
# @return [Time]
|
||||
def last_updated
|
||||
@last_updated ||= begin
|
||||
updated = html.css('figure.rankings-last-updated').text
|
||||
date = updated[/\w+{3,}\.? \d+{1,2}, \d+{2,4}/]
|
||||
Time.parse(date)
|
||||
end
|
||||
end
|
||||
|
||||
# @return [Array<Hash>]
|
||||
def rankings
|
||||
@rankings ||= begin
|
||||
table.css('tbody tr').each.with_object([]) do |row, arr|
|
||||
team = {}
|
||||
columns = row.css('td')
|
||||
|
||||
keys.each.with_index { |key, i| team[key] = columns[i].text }
|
||||
|
||||
arr << team
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -0,0 +1,13 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Ncaa
|
||||
module Parser
|
||||
module Html
|
||||
class Standings < Base
|
||||
def build
|
||||
raise NotImplementedError.new
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue