-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathisc.rb
executable file
·70 lines (54 loc) · 1.27 KB
/
isc.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/ruby1.9.1
require 'csv'
require 'mechanize'
agent = Mechanize.new{ |agent| agent.history.max_size=0 }
agent.user_agent = 'Mozilla/5.0'
base = "http://www.cisce.ndtv.com/web/12th/12-"
students = CSV.open("isc_students.csv","w")
scores = CSV.open("isc_scores.csv","w")
student_path = "/html/body/table[1]/tr"
score_path = "/html/body/tr/td/table/tr[position()>1]"
servers = 4
max_failure = 3
(9001..9793).each do |school_id|
failure = 0
print "Pulling #{school_id}\n"
print " "
(1..999).each do |student_id|
if (failure>max_failure)
print " - done\n"
break
end
server = 1
begin
url = "#{base}#{server}/#{school_id}%03d.html" % student_id
page = agent.get(url)
failure = 0
rescue
if (server<servers)
server += 1
retry
else
failure += 1
print "#{failure}"
next
end
end
row = []
page.parser.xpath(student_path).each do |tr|
tr.xpath("td[2]").each do |td|
row << td.text.strip
end
end
students << row
page.parser.xpath(score_path).each do |tr|
score = row[0..0]
tr.xpath("td").each do |td|
score << td.text.strip
end
scores << score
end
end
scores.flush
students.flush
end