@ -144,7 +144,7 @@ class ChannelRedirect < Exception
end
end
end
end
def get_batch_channels ( channels , db , refresh = false , pull_all_videos = true , max_threads = 10 )
def get_batch_channels ( channels , db , logger, refresh = false , pull_all_videos = true , max_threads = 10 )
finished_channel = Channel ( String | Nil ) . new
finished_channel = Channel ( String | Nil ) . new
spawn do
spawn do
@ -160,7 +160,7 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma
active_threads += 1
active_threads += 1
spawn do
spawn do
begin
begin
get_channel ( ucid , db , refresh, pull_all_videos )
get_channel ( ucid , db , logger, refresh, pull_all_videos )
finished_channel . send ( ucid )
finished_channel . send ( ucid )
rescue ex
rescue ex
finished_channel . send ( nil )
finished_channel . send ( nil )
@ -181,10 +181,10 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma
return final
return final
end
end
def get_channel ( id , db , refresh = true , pull_all_videos = true )
def get_channel ( id , db , logger, refresh = true , pull_all_videos = true )
if channel = db . query_one? ( " SELECT * FROM channels WHERE id = $1 " , id , as : InvidiousChannel )
if channel = db . query_one? ( " SELECT * FROM channels WHERE id = $1 " , id , as : InvidiousChannel )
if refresh && Time . utc - channel . updated > 10 . minutes
if refresh && Time . utc - channel . updated > 10 . minutes
channel = fetch_channel ( id , db , pull_all_videos : pull_all_videos )
channel = fetch_channel ( id , db , logger , pull_all_videos : pull_all_videos )
channel_array = channel . to_a
channel_array = channel . to_a
args = arg_array ( channel_array )
args = arg_array ( channel_array )
@ -192,7 +192,7 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
ON CONFLICT ( id ) DO UPDATE SET author = $2 , updated = $3 " , args: channel_array)
ON CONFLICT ( id ) DO UPDATE SET author = $2 , updated = $3 " , args: channel_array)
end
end
else
else
channel = fetch_channel ( id , db , pull_all_videos : pull_all_videos )
channel = fetch_channel ( id , db , logger , pull_all_videos : pull_all_videos )
channel_array = channel . to_a
channel_array = channel . to_a
args = arg_array ( channel_array )
args = arg_array ( channel_array )
@ -202,8 +202,12 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
return channel
return channel
end
end
def fetch_channel ( ucid , db , pull_all_videos = true , locale = nil )
def fetch_channel ( ucid , db , logger , pull_all_videos = true , locale = nil )
logger . trace ( " fetch_channel: #{ ucid } : pull_all_videos = #{ pull_all_videos } , locale = #{ locale } " )
logger . trace ( " fetch_channel: #{ ucid } : Downloading RSS feed " )
rss = YT_POOL . client & . get ( " /feeds/videos.xml?channel_id= #{ ucid } " ) . body
rss = YT_POOL . client & . get ( " /feeds/videos.xml?channel_id= #{ ucid } " ) . body
logger . trace ( " fetch_channel: #{ ucid } : Parsing RSS feed " )
rss = XML . parse_html ( rss )
rss = XML . parse_html ( rss )
author = rss . xpath_node ( % q ( / / feed / title ) )
author = rss . xpath_node ( % q ( / / feed / title ) )
@ -219,14 +223,19 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
auto_generated = true
auto_generated = true
end
end
logger . trace ( " fetch_channel: #{ ucid } : author = #{ author } , auto_generated = #{ auto_generated } " )
page = 1
page = 1
logger . trace ( " fetch_channel: #{ ucid } : Downloading channel videos page " )
response = get_channel_videos_response ( ucid , page , auto_generated : auto_generated )
response = get_channel_videos_response ( ucid , page , auto_generated : auto_generated )
videos = [ ] of SearchVideo
videos = [ ] of SearchVideo
begin
begin
initial_data = JSON . parse ( response . body ) . as_a . find & . [ " response " ]?
initial_data = JSON . parse ( response . body ) . as_a . find & . [ " response " ]?
raise InfoException . new ( " Could not extract channel JSON " ) if ! initial_data
raise InfoException . new ( " Could not extract channel JSON " ) if ! initial_data
logger . trace ( " fetch_channel: #{ ucid } : Extracting videos from channel videos page initial_data " )
videos = extract_videos ( initial_data . as_h , author , ucid )
videos = extract_videos ( initial_data . as_h , author , ucid )
rescue ex
rescue ex
if response . body . includes? ( " To continue with your YouTube experience, please fill out the form below. " ) ||
if response . body . includes? ( " To continue with your YouTube experience, please fill out the form below. " ) ||
@ -236,6 +245,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
raise ex
raise ex
end
end
logger . trace ( " fetch_channel: #{ ucid } : Extracting videos from channel RSS feed " )
rss . xpath_nodes ( " //feed/entry " ) . each do | entry |
rss . xpath_nodes ( " //feed/entry " ) . each do | entry |
video_id = entry . xpath_node ( " videoid " ) . not_nil! . content
video_id = entry . xpath_node ( " videoid " ) . not_nil! . content
title = entry . xpath_node ( " title " ) . not_nil! . content
title = entry . xpath_node ( " title " ) . not_nil! . content
@ -269,6 +279,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
views : views ,
views : views ,
} )
} )
logger . trace ( " fetch_channel: #{ ucid } : video #{ video_id } : Updating or inserting video " )
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# meaning the above timestamp is always null
# meaning the above timestamp is always null
was_insert = db . query_one ( " INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
was_insert = db . query_one ( " INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
@ -276,8 +288,13 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
updated = $4 , ucid = $5 , author = $6 , length_seconds = $7 , \
updated = $4 , ucid = $5 , author = $6 , length_seconds = $7 , \
live_now = $8 , views = $10 returning ( xmax = 0 ) as was_insert " , *video.to_tuple, as: Bool)
live_now = $8 , views = $10 returning ( xmax = 0 ) as was_insert " , *video.to_tuple, as: Bool)
if was_insert
logger . trace ( " fetch_channel: #{ ucid } : video #{ video_id } : Inserted, updating subscriptions " )
db . exec ( " UPDATE users SET notifications = array_append(notifications, $1), \
db . exec ( " UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY ( subscriptions ) " , video.id, video.ucid) if was_insert
feed_needs_update = true WHERE $2 = ANY ( subscriptions ) " , video.id, video.ucid)
else
logger . trace ( " fetch_channel: #{ ucid } : video #{ video_id } : Updated " )
end
end
end
if pull_all_videos
if pull_all_videos