forked from max-si-m/up_work_skills
-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.rb
40 lines (34 loc) · 991 Bytes
/
parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
require 'rubygems'
require 'net/http'
require 'nokogiri'
# Class for parsing skills on UpWork
# Simple usage :)
# skill_parser = Parsers::UpWork::SkillsParser.new ('a'..'z')
# skill_parser.scrape_pages
# p skill_parser.data
module Parsers
module UpWork
class SkillsParser
attr_accessor :pages, :data
# pages is array or range of pages
def initialize(pages)
fail ArgumentError 'Only array, or range' unless
pages.is_a?(Array) || pages.is_a?(Range)
@pages = pages || ('a'..'z')
@data = []
end
def scrape_pages
@pages.each do |page|
uri = URI("https://www.upwork.com/i/freelancer-skills-#{page}/")
doc = Nokogiri::HTML(Net::HTTP.get(uri)) do |config|
config.strict.nonet.noblanks.noerror
end
doc.css('.skill-item').each do |list|
skill = list.css('a').text
@data << skill if skill
end
end
end
end
end
end