Update refresh_channels to properly utilize workers

pull/107/head
Omar Roth 7 years ago
parent f588132cad
commit f3c7409d72

@ -83,15 +83,7 @@ crawl_threads.times do
end
end
total_channels = PG_DB.query_one("SELECT count(*) FROM channels", as: Int64)
channel_threads.times do |i|
limit = total_channels / channel_threads
offset = limit.not_nil! * i
spawn do
refresh_channels(PG_DB, limit, offset)
end
end
refresh_channels(PG_DB, channel_threads)
video_threads.times do |i|
spawn do

@ -44,25 +44,44 @@ def crawl_videos(db)
end
end
def refresh_channels(db, limit = 0, offset = 0)
def refresh_channels(db, max_threads = 1)
max_channel = Channel(Int32).new
spawn do
max_threads = max_channel.receive
active_threads = 0
active_channel = Channel(Bool).new
loop do
db.query("SELECT id FROM channels ORDER BY updated limit $1 offset $2", limit, offset) do |rs|
db.query("SELECT id FROM channels ORDER BY updated") do |rs|
rs.each do
client = make_client(YT_URL)
id = rs.read(String)
if active_threads >= max_threads
if active_channel.receive
active_threads -= 1
end
end
active_threads += 1
spawn do
begin
id = rs.read(String)
client = make_client(YT_URL)
channel = fetch_channel(id, client, db, false)
db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id)
rescue ex
STDOUT << id << " : " << ex.message << "\n"
next
end
active_channel.send(true)
end
end
end
Fiber.yield
end
end
max_channel.send(max_threads)
end
def refresh_videos(db)

Loading…
Cancel
Save