From 2903e896ecf2404bf932438a33432125a6ad1fca Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Fri, 11 Nov 2022 20:26:34 +0100 Subject: [PATCH] channel: use YT API + extractors to fetch videos --- src/invidious/channels/channels.cr | 67 +++++++++---------- src/invidious/channels/playlists.cr | 2 +- src/invidious/channels/videos.cr | 88 ++++++++++++++++++------- src/invidious/routes/api/v1/channels.cr | 66 ++++++++----------- src/invidious/routes/channels.cr | 4 +- 5 files changed, 125 insertions(+), 102 deletions(-) diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index e3d3d9ee..27369f12 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -180,11 +180,16 @@ def fetch_channel(ucid, pull_all_videos : Bool) LOGGER.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}") - page = 1 + channel = InvidiousChannel.new({ + id: ucid, + author: author, + updated: Time.utc, + deleted: false, + subscribed: nil, + }) LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page") - initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) - videos = extract_videos(initial_data, author, ucid) + videos, continuation = IV::Channel::Tabs.get_videos(channel) LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") rss.xpath_nodes("//feed/entry").each do |entry| @@ -197,7 +202,9 @@ def fetch_channel(ucid, pull_all_videos : Bool) views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64? views ||= 0_i64 - channel_video = videos.select { |video| video.id == video_id }[0]? + channel_video = videos + .select(SearchVideo) + .select(&.id.== video_id)[0]? length_seconds = channel_video.try &.length_seconds length_seconds ||= 0 @@ -235,30 +242,25 @@ def fetch_channel(ucid, pull_all_videos : Bool) end if pull_all_videos - page += 1 - - ids = [] of String - loop do - initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) - videos = extract_videos(initial_data, author, ucid) - - count = videos.size - videos = videos.map { |video| ChannelVideo.new({ - id: video.id, - title: video.title, - published: video.published, - updated: Time.utc, - ucid: video.ucid, - author: video.author, - length_seconds: video.length_seconds, - live_now: video.live_now, - premiere_timestamp: video.premiere_timestamp, - views: video.views, - }) } - - videos.each do |video| - ids << video.id + # Keep fetching videos using the continuation token retrieved earlier + videos, continuation = IV::Channel::Tabs.get_videos(channel, continuation: continuation) + + count = 0 + videos.select(SearchVideo).each do |video| + count += 1 + video = ChannelVideo.new({ + id: video.id, + title: video.title, + published: video.published, + updated: Time.utc, + ucid: video.ucid, + author: video.author, + length_seconds: video.length_seconds, + live_now: video.live_now, + premiere_timestamp: video.premiere_timestamp, + views: video.views, + }) # We are notified of Red videos elsewhere (PubSub), which includes a correct published date, # so since they don't provide a published date here we can safely ignore them. @@ -269,17 +271,10 @@ def fetch_channel(ucid, pull_all_videos : Bool) end break if count < 25 - page += 1 + sleep 500.milliseconds end end - channel = InvidiousChannel.new({ - id: ucid, - author: author, - updated: Time.utc, - deleted: false, - subscribed: nil, - }) - + channel.updated = Time.utc return channel end diff --git a/src/invidious/channels/playlists.cr b/src/invidious/channels/playlists.cr index 8fdac3a7..772eecb9 100644 --- a/src/invidious/channels/playlists.cr +++ b/src/invidious/channels/playlists.cr @@ -24,7 +24,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by) initial_data = YoutubeAPI.browse(ucid, params: params || "") end - return extract_items(initial_data, ucid, author) + return extract_items(initial_data, author, ucid) end # ## NOTE: DEPRECATED diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index b495e597..23ad4e02 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -16,6 +16,14 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so .try { |i| Base64.urlsafe_encode(i) } .try { |i| URI.encode_www_form(i) } + sort_by_numerical = + case sort_by + when "newest" then 1_i64 + when "popular" then 2_i64 + when "oldest" then 3_i64 # Broken as of 10/2022 :c + else 1_i64 # Fallback to "newest" + end + object_inner_1 = { "110:embedded" => { "3:embedded" => { @@ -24,7 +32,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so "1:string" => object_inner_2_encoded, "2:string" => "00000000-0000-0000-0000-000000000000", }, - "3:varint" => 1_i64, + "3:varint" => sort_by_numerical, }, }, }, @@ -52,34 +60,66 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so return continuation end -def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest") - continuation = produce_channel_videos_continuation(ucid, page, - auto_generated: auto_generated, sort_by: sort_by, v2: true) - - return YoutubeAPI.browse(continuation) +# Used in bypass_captcha_job.cr +def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) + continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2) + return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en" end -def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") - videos = [] of SearchVideo +module Invidious::Channel::Tabs + extend self - # 2.times do |i| - # initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by) - videos = extract_videos(initial_data, author, ucid) - # end + # ------------------- + # Regular videos + # ------------------- - return videos.size, videos -end + def make_initial_video_ctoken(ucid, sort_by) : String + return produce_channel_videos_continuation(ucid, sort_by: sort_by) + end -def get_latest_videos(ucid) - initial_data = get_channel_videos_response(ucid) - author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s + # Wrapper for AboutChannel, as we still need to call get_videos with + # an author name and ucid directly (e.g in RSS feeds). + # TODO: figure out how to get rid of that + def get_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest") + return get_videos( + channel.author, channel.ucid, + continuation: continuation, sort_by: sort_by + ) + end - return extract_videos(initial_data, author, ucid) -end + # Wrapper for InvidiousChannel, as we still need to call get_videos with + # an author name and ucid directly (e.g in RSS feeds). + # TODO: figure out how to get rid of that + def get_videos(channel : InvidiousChannel, *, continuation : String? = nil, sort_by = "newest") + return get_videos( + channel.author, channel.id, + continuation: continuation, sort_by: sort_by + ) + end -# Used in bypass_captcha_job.cr -def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) - continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2) - return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en" + def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest") + continuation ||= make_initial_video_ctoken(ucid, sort_by) + initial_data = YoutubeAPI.browse(continuation: continuation) + + return extract_items(initial_data, author, ucid) + end + + def get_60_videos(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest") + if continuation.nil? + # Fetch the first "page" of video + items, next_continuation = get_videos(channel, sort_by: sort_by) + else + # Fetch a "page" of videos using the given continuation token + items, next_continuation = get_videos(channel, continuation: continuation) + end + + # If there is more to load, then load a second "page" + # and replace the previous continuation token + if !next_continuation.nil? + items_2, next_continuation = get_videos(channel, continuation: next_continuation) + items.concat items_2 + end + + return items, next_continuation + end end diff --git a/src/invidious/routes/api/v1/channels.cr b/src/invidious/routes/api/v1/channels.cr index 6b81c546..72d9ae5f 100644 --- a/src/invidious/routes/api/v1/channels.cr +++ b/src/invidious/routes/api/v1/channels.cr @@ -5,8 +5,6 @@ module Invidious::Routes::API::V1::Channels env.response.content_type = "application/json" ucid = env.params.url["ucid"] - sort_by = env.params.query["sort_by"]?.try &.downcase - sort_by ||= "newest" begin channel = get_about_info(ucid, locale) @@ -19,16 +17,13 @@ module Invidious::Routes::API::V1::Channels return error_json(500, ex) end - page = 1 - if channel.auto_generated - videos = [] of SearchVideo - count = 0 - else - begin - count, videos = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) - rescue ex - return error_json(500, ex) - end + # Retrieve "sort by" setting from URL parameters + sort_by = env.params.query["sort_by"]?.try &.downcase || "newest" + + begin + videos, _ = Channel::Tabs.get_videos(channel, sort_by: sort_by) + rescue ex + return error_json(500, ex) end JSON.build do |json| @@ -134,25 +129,11 @@ module Invidious::Routes::API::V1::Channels end def self.latest(env) - locale = env.get("preferences").as(Preferences).locale - - env.response.content_type = "application/json" - - ucid = env.params.url["ucid"] - - begin - videos = get_latest_videos(ucid) - rescue ex - return error_json(500, ex) - end + # Remove parameters that could affect this endpoint's behavior + env.params.query.delete("sort_by") if env.params.query.has_key?("sort_by") + env.params.query.delete("continuation") if env.params.query.has_key?("continuation") - JSON.build do |json| - json.array do - videos.each do |video| - video.to_json(locale, json) - end - end - end + return self.videos(env) end def self.videos(env) @@ -161,11 +142,6 @@ module Invidious::Routes::API::V1::Channels env.response.content_type = "application/json" ucid = env.params.url["ucid"] - page = env.params.query["page"]?.try &.to_i? - page ||= 1 - sort_by = env.params.query["sort"]?.try &.downcase - sort_by ||= env.params.query["sort_by"]?.try &.downcase - sort_by ||= "newest" begin channel = get_about_info(ucid, locale) @@ -178,17 +154,27 @@ module Invidious::Routes::API::V1::Channels return error_json(500, ex) end + # Retrieve some URL parameters + sort_by = env.params.query["sort_by"]?.try &.downcase || "newest" + continuation = env.params.query["continuation"]? + begin - count, videos = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) + videos, next_continuation = Channel::Tabs.get_60_videos( + channel, continuation: continuation, sort_by: sort_by + ) rescue ex return error_json(500, ex) end - JSON.build do |json| - json.array do - videos.each do |video| - video.to_json(locale, json) + return JSON.build do |json| + json.object do + json.field "videos" do + json.array do + videos.each &.to_json(locale, json) + end end + + json.field "continuation", next_continuation if next_continuation end end end diff --git a/src/invidious/routes/channels.cr b/src/invidious/routes/channels.cr index f26f29f5..2773deb7 100644 --- a/src/invidious/routes/channels.cr +++ b/src/invidious/routes/channels.cr @@ -32,7 +32,9 @@ module Invidious::Routes::Channels sort_options = {"newest", "oldest", "popular"} sort_by ||= "newest" - count, items = get_60_videos(channel.ucid, channel.author, 1, channel.auto_generated, sort_by) + items, continuation = Channel::Tabs.get_60_videos( + channel, continuation: continuation, sort_by: sort_by + ) end templated "channel"