Update refresh_channels to properly utilize workers

pull/107/head
Omar Roth 6 years ago
parent f588132cad
commit f3c7409d72

@ -83,15 +83,7 @@ crawl_threads.times do
end end
end end
total_channels = PG_DB.query_one("SELECT count(*) FROM channels", as: Int64) refresh_channels(PG_DB, channel_threads)
channel_threads.times do |i|
limit = total_channels / channel_threads
offset = limit.not_nil! * i
spawn do
refresh_channels(PG_DB, limit, offset)
end
end
video_threads.times do |i| video_threads.times do |i|
spawn do spawn do

@ -44,25 +44,44 @@ def crawl_videos(db)
end end
end end
def refresh_channels(db, limit = 0, offset = 0) def refresh_channels(db, max_threads = 1)
loop do max_channel = Channel(Int32).new
db.query("SELECT id FROM channels ORDER BY updated limit $1 offset $2", limit, offset) do |rs|
rs.each do
client = make_client(YT_URL)
begin spawn do
max_threads = max_channel.receive
active_threads = 0
active_channel = Channel(Bool).new
loop do
db.query("SELECT id FROM channels ORDER BY updated") do |rs|
rs.each do
id = rs.read(String) id = rs.read(String)
channel = fetch_channel(id, client, db, false)
db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id) if active_threads >= max_threads
rescue ex if active_channel.receive
STDOUT << id << " : " << ex.message << "\n" active_threads -= 1
next end
end
active_threads += 1
spawn do
begin
client = make_client(YT_URL)
channel = fetch_channel(id, client, db, false)
db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id)
rescue ex
STDOUT << id << " : " << ex.message << "\n"
end
active_channel.send(true)
end
end end
end end
end end
Fiber.yield
end end
max_channel.send(max_threads)
end end
def refresh_videos(db) def refresh_videos(db)

Loading…
Cancel
Save