Use new youtube API to fetch channel videos (#1355)

* Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line
4 years ago · 4a6e920d0e
parent 13f58d602f
commit 4a6e920d0e
2 changed files with 205 additions and 157 deletions
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
  page = 1
-  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
+  response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
  response = YT_POOL.client &.get(url)
  videos = [] of SearchVideo
  begin
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
    ids = [] of String
    loop do
-      url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
+      response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
      response = YT_POOL.client &.get(url)
      initial_data = JSON.parse(response.body).as_a.find &.["response"]?
      raise "Could not extract JSON" if !initial_data
      videos = extract_videos(initial_data.as_h, author, ucid)
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
  return items, continuation
 end
-def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
  object = {
    "80226972:embedded" => {
      "2:string" => ucid,
@ -411,6 +409,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
    },
  }
  if !v2
    if auto_generated
      seed = Time.unix(1525757349)
      until seed >= Time.utc
@ -424,6 +423,20 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
      object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
      object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
    end
  else
    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
    object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
      "1:embedded" => {
        "1:varint" => 6307666885028338688_i64,
        "2:embedded" => {
          "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
            "1:varint" => 30_i64 * (page - 1),
          }))),
        },
      },
    })))
  end
  case sort_by
  when "newest"
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
  })
 end
 def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
  response = YT_POOL.client &.get(url)
  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
  return response if !initial_data
  needs_v2 = initial_data
    .try &.["response"]?.try &.["alerts"]?
    .try &.as_a.any? { |alert|
      alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
    }
  if needs_v2
    url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
    response = YT_POOL.client &.get(url)
  end
  response
 end
 def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
  videos = [] of SearchVideo
  2.times do |i|
-    url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
+    response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
    response = YT_POOL.client &.get(url)
    initial_data = JSON.parse(response.body).as_a.find &.["response"]?
    break if !initial_data
    videos.concat extract_videos(initial_data.as_h, author, ucid)
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
 end
 def get_latest_videos(ucid)
-  url = produce_channel_videos_url(ucid, 0)
+  response = get_channel_videos_response(ucid, 1)
  response = YT_POOL.client &.get(url)
  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
  return [] of SearchVideo if !initial_data
  author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -164,20 +164,8 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
  extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end
-def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
-  items = [] of SearchItem
+  if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
  initial_data.try { |t| t["contents"]? || t["response"]? }
    .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
      t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
      t["continuationContents"]? }
    .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
    .try &.["contents"].as_a
      .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
        .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
          t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
        .each { |item|
          if i = item["videoRenderer"]?
    video_id = i["videoId"].as_s
    title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
@ -188,7 +176,9 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
    view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-            length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+    length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
      i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
        .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
    live_now = false
    paid = false
@ -212,7 +202,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
      end
    end
-            items << SearchVideo.new({
+    SearchVideo.new({
      title:              title,
      id:                 video_id,
      author:             author,
@ -238,7 +228,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-            items << SearchChannel.new({
+    SearchChannel.new({
      author:           author,
      ucid:             author_id,
      author_thumbnail: author_thumbnail,
@ -254,7 +244,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
-            items << SearchPlaylist.new({
+    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author_fallback || "",
@ -288,7 +278,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    # TODO: i["publishedTimeText"]?
-            items << SearchPlaylist.new({
+    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author,
@ -305,7 +295,37 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
  elsif i = item["horizontalCardListRenderer"]?
  elsif i = item["searchPyvRenderer"]? # Ad
  end
 end
 def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
  items = [] of SearchItem
  channel_v2_response = initial_data
    .try &.["response"]?
    .try &.["continuationContents"]?
    .try &.["gridContinuation"]?
    .try &.["items"]?
  if channel_v2_response
    channel_v2_response.try &.as_a.each { |item|
        extract_item(item, author_fallback, author_id_fallback)
          .try { |t| items << t }
    }
  else
    initial_data.try { |t| t["contents"]? || t["response"]? }
      .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
        t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
        t["continuationContents"]? }
      .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
      .try &.["contents"].as_a
        .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
          .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
            t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
          .each { |item|
            extract_item(item, author_fallback, author_id_fallback)
              .try { |t| items << t }
          } }
    end
  items
 end