extractors: Add continuation token parser

2 years ago · ce7db8d2cb
parent bdc51cd20f
commit ce7db8d2cb
7 changed files with 63 additions and 62 deletions
--- a/spec/invidious/hashtag_spec.cr
+++ b/spec/invidious/hashtag_spec.cr
@ -4,7 +4,7 @@ Spectator.describe Invidious::Hashtag do
  it "parses richItemRenderer containers (test 1)" do
    # Enable mock
    test_content = load_mock("hashtag/martingarrix_page1")
-    videos = extract_items(test_content)
+    videos, _ = extract_items(test_content)

    expect(typeof(videos)).to eq(Array(SearchItem))
    expect(videos.size).to eq(60)
@ -57,7 +57,7 @@ Spectator.describe Invidious::Hashtag do
  it "parses richItemRenderer containers (test 2)" do
    # Enable mock
    test_content = load_mock("hashtag/martingarrix_page2")
-    videos = extract_items(test_content)
+    videos, _ = extract_items(test_content)

    expect(typeof(videos)).to eq(Array(SearchItem))
    expect(videos.size).to eq(60)
--- a/src/invidious/channels/playlists.cr
+++ b/src/invidious/channels/playlists.cr
@ -1,18 +1,7 @@
 def fetch_channel_playlists(ucid, author, continuation, sort_by)
  if continuation
    response_json = YoutubeAPI.browse(continuation)
-    continuation_items = response_json["onResponseReceivedActions"]?
-      .try &.[0]["appendContinuationItemsAction"]["continuationItems"]
-
-    return [] of SearchItem, nil if !continuation_items
-
-    items = [] of SearchItem
-    continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
-      parse_item(item, author, ucid).try { |t| items << t }
-    }
-
-    continuation = continuation_items.as_a.last["continuationItemRenderer"]?
-      .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
+    items, continuation = extract_items(response_json, author, ucid)
  else
    url = "/channel/#{ucid}/playlists?flow=list&view=1"

@ -30,8 +19,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
    initial_data = extract_initial_data(response.body)
    return [] of SearchItem, nil if !initial_data

-    items = extract_items(initial_data, author, ucid)
-    continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
+    items, continuation = extract_items(initial_data, author, ucid)
  end

  return items, continuation
--- a/src/invidious/hashtag.cr
+++ b/src/invidious/hashtag.cr
@ -8,7 +8,8 @@ module Invidious::Hashtag
    client_config = YoutubeAPI::ClientConfig.new(region: region)
    response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config)

-    return extract_items(response)
+    items, _ = extract_items(response)
+    return items
  end

  def generate_continuation(hashtag : String, cursor : Int)
--- a/src/invidious/helpers/serialized_yt_data.cr
+++ b/src/invidious/helpers/serialized_yt_data.cr
@ -265,4 +265,11 @@ class Category
  end
 end

+struct Continuation
+  getter token
+
+  def initialize(@token : String)
+  end
+end
+
 alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
--- a/src/invidious/search/processors.cr
+++ b/src/invidious/search/processors.cr
@ -9,7 +9,8 @@ module Invidious::Search
      client_config = YoutubeAPI::ClientConfig.new(region: query.region)
      initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config)

-      return extract_items(initial_data)
+      items, _ = extract_items(initial_data)
+      return items
    end

    # Search a youtube channel
@ -30,16 +31,7 @@ module Invidious::Search
      continuation = produce_channel_search_continuation(ucid, query.text, query.page)
      response_json = YoutubeAPI.browse(continuation)

-      continuation_items = response_json["onResponseReceivedActions"]?
-        .try &.[0]["appendContinuationItemsAction"]["continuationItems"]
-
-      return [] of SearchItem if !continuation_items
-
-      items = [] of SearchItem
-      continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
-        parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
-      end
-
+      items, _ = extract_items(response_json, "", ucid)
      return items
    end

--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@ -7,7 +7,7 @@ require "../helpers/serialized_yt_data"
 private ITEM_CONTAINER_EXTRACTOR = {
  Extractors::YouTubeTabs,
  Extractors::SearchResults,
-  Extractors::Continuation,
+  Extractors::ContinuationContent,
 }

 private ITEM_PARSERS = {
@ -18,6 +18,7 @@ private ITEM_PARSERS = {
  Parsers::CategoryRendererParser,
  Parsers::RichItemRendererParser,
  Parsers::ReelItemRendererParser,
+  Parsers::ContinuationItemRendererParser,
 }

 private alias InitialData = Hash(String, JSON::Any)
@ -347,14 +348,9 @@ private module Parsers
        content_container = item_contents["contents"]
      end

-      raw_contents = content_container["items"]?.try &.as_a
-      if !raw_contents.nil?
-        raw_contents.each do |item|
-          result = parse_item(item)
-          if !result.nil?
-            contents << result
-          end
-        end
+      content_container["items"]?.try &.as_a.each do |item|
+        result = parse_item(item, author_fallback.name, author_fallback.id)
+        contents << result if result.is_a?(SearchItem)
      end

      Category.new({
@ -477,6 +473,35 @@ private module Parsers
      return {{@type.name}}
    end
  end
+
+  # Parses an InnerTube continuationItemRenderer into a Continuation.
+  # Returns nil when the given object isn't a continuationItemRenderer.
+  #
+  # continuationItemRenderer contains various metadata ued to load more
+  # content (i.e when the user scrolls down). The interesting bit is the
+  # protobuf object known as the "continutation token". Previously, those
+  # were generated from sratch, but recent (as of 11/2022) Youtube changes
+  # are forcing us to extract them from replies.
+  #
+  module ContinuationItemRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = item["continuationItemRenderer"]?
+        return self.parse(item_contents)
+      end
+    end
+
+    private def self.parse(item_contents)
+      token = item_contents
+        .dig?("continuationEndpoint", "continuationCommand", "token")
+        .try &.as_s
+
+      return Continuation.new(token) if token
+    end
+
+    def self.parser_name
+      return {{@type.name}}
+    end
+  end
 end

 # The following are the extractors for extracting an array of items from
@ -746,13 +771,18 @@ def extract_items(
  initial_data : InitialData,
  author_fallback : String? = nil,
  author_id_fallback : String? = nil
-) : Array(SearchItem)
+) : {Array(SearchItem), String?}
  items = [] of SearchItem
+  continuation = nil

  extract_items(initial_data) do |item|
    parsed = parse_item(item, author_fallback, author_id_fallback)
-    items << parsed if !parsed.nil?
+
+    case parsed
+    when .is_a?(Continuation) then continuation = parsed.token
+    when .is_a?(SearchItem)   then items << parsed
+    end
  end

-  return items
+  return items, continuation
 end
--- a/src/invidious/yt_backend/extractors_utils.cr
+++ b/src/invidious/yt_backend/extractors_utils.cr
@ -68,10 +68,10 @@ rescue ex
  return false
 end

-def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
-  extracted = extract_items(initial_data, author_fallback, author_id_fallback)
+def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) : Array(SearchVideo)
+  extracted, _ = extract_items(initial_data, author_fallback, author_id_fallback)

-  target = [] of SearchItem
+  target = [] of (SearchItem | Continuation)
  extracted.each do |i|
    if i.is_a?(Category)
      i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video }
@ -79,28 +79,11 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
      target << i
    end
  end
-  return target.select(SearchVideo).map(&.as(SearchVideo))
+
+  return target.select(SearchVideo)
 end

 def extract_selected_tab(tabs)
  # Extract the selected tab from the array of tabs Youtube returns
  return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"]
 end
-
-def fetch_continuation_token(items : Array(JSON::Any))
-  # Fetches the continuation token from an array of items
-  return items.last["continuationItemRenderer"]?
-    .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
-end
-
-def fetch_continuation_token(initial_data : Hash(String, JSON::Any))
-  # Fetches the continuation token from initial data
-  if initial_data["onResponseReceivedActions"]?
-    continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
-  else
-    tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
-    continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"]
-  end
-
-  return fetch_continuation_token(continuation_items.as_a)
-end