From f267394bbe2bd972e0157913ae253bfaa79ead0f Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 31 Oct 2022 20:40:43 +0100 Subject: [PATCH 1/3] extractors: Add support for richGridRenderer --- src/invidious/yt_backend/extractors.cr | 49 +++++++++++++++----------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index dc65cc52..18b48152 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -436,21 +436,31 @@ private module Extractors content = extract_selected_tab(target["tabs"])["content"] if section_list_contents = content.dig?("sectionListRenderer", "contents") - section_list_contents.as_a.each do |renderer_container| - renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0] - - # Category extraction - if items_container = renderer_container_contents["shelfRenderer"]? - raw_items << renderer_container_contents - next - elsif items_container = renderer_container_contents["gridRenderer"]? - else - items_container = renderer_container_contents - end + raw_items = unpack_section_list(section_list_contents) + elsif rich_grid_contents = content.dig?("richGridRenderer", "contents") + raw_items = rich_grid_contents.as_a + end - items_container["items"]?.try &.as_a.each do |item| - raw_items << item - end + return raw_items + end + + private def self.unpack_section_list(contents) + raw_items = [] of JSON::Any + + contents.as_a.each do |renderer_container| + renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0] + + # Category extraction + if items_container = renderer_container_contents["shelfRenderer"]? + raw_items << renderer_container_contents + next + elsif items_container = renderer_container_contents["gridRenderer"]? + else + items_container = renderer_container_contents + end + + items_container["items"]?.try &.as_a.each do |item| + raw_items << item end end @@ -525,14 +535,11 @@ private module Extractors end private def self.extract(target) - raw_items = [] of JSON::Any - if content = target["gridContinuation"]? - raw_items = content["items"].as_a - elsif content = target["continuationItems"]? - raw_items = content.as_a - end + content = target["continuationItems"]? + content ||= target.dig?("gridContinuation", "items") + content ||= target.dig?("richGridContinuation", "contents") - return raw_items + return content.nil? ? [] of JSON::Any : content.as_a end def self.extractor_name From 46a63e6150f83bca90563068ebb12ecdf5e0d3c6 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 31 Oct 2022 21:30:10 +0100 Subject: [PATCH 2/3] extractors: Add support for reelItemRenderer --- src/invidious/yt_backend/extractors.cr | 87 +++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 18b48152..8112930d 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -17,6 +17,7 @@ private ITEM_PARSERS = { Parsers::PlaylistRendererParser, Parsers::CategoryRendererParser, Parsers::RichItemRendererParser, + Parsers::ReelItemRendererParser, } record AuthorFallback, name : String, id : String @@ -369,7 +370,7 @@ private module Parsers end # Parses an InnerTube richItemRenderer into a SearchVideo. - # Returns nil when the given object isn't a shelfRenderer + # Returns nil when the given object isn't a RichItemRenderer # # A richItemRenderer seems to be a simple wrapper for a videoRenderer, used # by the result page for hashtags. It is located inside a continuationItems @@ -390,6 +391,90 @@ private module Parsers return {{@type.name}} end end + + # Parses an InnerTube reelItemRenderer into a SearchVideo. + # Returns nil when the given object isn't a reelItemRenderer + # + # reelItemRenderer items are used in the new (2022) channel layout, + # in the "shorts" tab. + # + module ReelItemRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["reelItemRenderer"]? + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + video_id = item_contents["videoId"].as_s + + video_details_container = item_contents.dig( + "navigationEndpoint", "reelWatchEndpoint", + "overlay", "reelPlayerOverlayRenderer", + "reelPlayerHeaderSupportedRenderers", + "reelPlayerHeaderRenderer" + ) + + # Author infos + + author = video_details_container + .dig?("channelTitleText", "runs", 0, "text") + .try &.as_s || author_fallback.name + + ucid = video_details_container + .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId") + .try &.as_s || author_fallback.id + + # Title & publication date + + title = video_details_container.dig?("reelTitleText") + .try { |t| extract_text(t) } || "" + + published = video_details_container + .dig?("timestampText", "simpleText") + .try { |t| decode_date(t.as_s) } || Time.utc + + # View count + + view_count_text = video_details_container.dig?("viewCountText", "simpleText") + view_count_text ||= video_details_container + .dig?("viewCountText", "accessibility", "accessibilityData", "label") + + view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + + # Duration + + a11y_data = item_contents + .dig?("accessibility", "accessibilityData", "label") + .try &.as_s || "" + + regex_match = /- (?\d+ minutes? )?(?\d+ seconds?)+ -/.match(a11y_data) + + minutes = regex_match.try &.["min"].to_i(strict: false) || 0 + seconds = regex_match.try &.["sec"].to_i(strict: false) || 0 + + duration = (minutes*60 + seconds) + + SearchVideo.new({ + title: title, + id: video_id, + author: author, + ucid: ucid, + published: published, + views: view_count, + description_html: "", + length_seconds: duration, + live_now: false, + premium: false, + premiere_timestamp: Time.unix(0), + author_verified: false, + }) + end + + def self.parser_name + return {{@type.name}} + end + end end # The following are the extractors for extracting an array of items from From 9da1827e957f9a8c4a370968b85007ad0f85c196 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 2 Nov 2022 00:58:33 +0100 Subject: [PATCH 3/3] Dirty fix to get back the channel videos --- spec/invidious/helpers_spec.cr | 10 ++-- src/invidious/channels/videos.cr | 90 +++++++++++++++----------------- 2 files changed, 48 insertions(+), 52 deletions(-) diff --git a/spec/invidious/helpers_spec.cr b/spec/invidious/helpers_spec.cr index 5ecebef3..ab361770 100644 --- a/spec/invidious/helpers_spec.cr +++ b/spec/invidious/helpers_spec.cr @@ -5,13 +5,13 @@ CONFIG = Config.from_yaml(File.open("config/config.example.yml")) Spectator.describe "Helper" do describe "#produce_channel_videos_url" do it "correctly produces url for requesting page `x` of a channel's videos" do - expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en") + # expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en") + # + # expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en") - expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en") + # expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en") - expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en") - - expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en") + # expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en") end end diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index 48453bb7..b495e597 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -1,53 +1,48 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) - object = { - "80226972:embedded" => { - "2:string" => ucid, - "3:base64" => { - "2:string" => "videos", - "6:varint" => 2_i64, - "7:varint" => 1_i64, - "12:varint" => 1_i64, - "13:string" => "", - "23:varint" => 0_i64, - }, + object_inner_2 = { + "2:0:embedded" => { + "1:0:varint" => 0_i64, }, + "5:varint" => 50_i64, + "6:varint" => 1_i64, + "7:varint" => (page * 30).to_i64, + "9:varint" => 1_i64, + "10:varint" => 0_i64, } - if !v2 - if auto_generated - seed = Time.unix(1525757349) - until seed >= Time.utc - seed += 1.month - end - timestamp = seed - (page - 1).months - - object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" - else - object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" - end - else - object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 + object_inner_2_encoded = object_inner_2 + .try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } - object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ - "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ - "1:varint" => 30_i64 * (page - 1), - }))), - }))) - end + object_inner_1 = { + "110:embedded" => { + "3:embedded" => { + "15:embedded" => { + "1:embedded" => { + "1:string" => object_inner_2_encoded, + "2:string" => "00000000-0000-0000-0000-000000000000", + }, + "3:varint" => 1_i64, + }, + }, + }, + } - case sort_by - when "newest" - when "popular" - object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64 - when "oldest" - object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64 - else nil # Ignore - end + object_inner_1_encoded = object_inner_1 + .try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } - object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"]))) - object["80226972:embedded"].delete("3:base64") + object = { + "80226972:embedded" => { + "2:string" => ucid, + "3:string" => object_inner_1_encoded, + "35:string" => "browse-feed#{ucid}videos102", + }, + } continuation = object.try { |i| Protodec::Any.cast_json(i) } .try { |i| Protodec::Any.from_json(i) } @@ -67,10 +62,11 @@ end def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") videos = [] of SearchVideo - 2.times do |i| - initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - videos.concat extract_videos(initial_data, author, ucid) - end + # 2.times do |i| + # initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) + initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by) + videos = extract_videos(initial_data, author, ucid) + # end return videos.size, videos end