From 6c8f9a69914395616aa0e5ec29f3c775b0dc00b1 Mon Sep 17 00:00:00 2001 From: Chunky programmer <78101139+ChunkyProgrammer@users.noreply.github.com> Date: Tue, 6 Jun 2023 19:31:55 -0400 Subject: [PATCH] Parse hashtag header when getting the first hashtag page --- src/invidious/hashtag.cr | 37 +++++++++++-- src/invidious/helpers/serialized_yt_data.cr | 59 +++++++++++++++++++++ src/invidious/routes/api/v1/search.cr | 12 +---- src/invidious/routes/search.cr | 3 +- src/invidious/yt_backend/extractors.cr | 34 ++++++++++++ 5 files changed, 129 insertions(+), 16 deletions(-) diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index d9d584c9..fd468ed6 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -1,15 +1,42 @@ module Invidious::Hashtag extend self - def fetch(hashtag : String, page : Int, region : String? = nil) : Array(SearchItem) + def fetch(hashtag : String, page : Int, region : String? = nil) : HashtagPage cursor = (page - 1) * 60 - ctoken = generate_continuation(hashtag, cursor) - + header = nil client_config = YoutubeAPI::ClientConfig.new(region: region) - response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) + # for any page besides the first page, get the list of videos + if cursor > 0 + ctoken = generate_continuation(hashtag, cursor) + response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) + else + # get first page + header info + response = YoutubeAPI.browse("FEhashtag", params: get_hashtag_first_page(hashtag), client_config: client_config) + if header = response.dig?("header") + header = parse_item(header).as(HashtagHeader) + end + end items, _ = extract_items(response) - return items + return HashtagPage.new({ + videos: items, + header: header, + }) + end + + def get_hashtag_first_page(hashtag : String) + object = { + "93:embedded" => { + "1:string" => hashtag, + "2:varint" => 0_i64, + "3:varint" => 1_i64, + }, + } + + return object.try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } end def generate_continuation(hashtag : String, cursor : Int) diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index 7c12ad0e..632f045d 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -274,4 +274,63 @@ struct Continuation end end +struct HashtagPage + include DB::Serializable + + property videos : Array(SearchItem) | Array(Video) + property header : HashtagHeader? + + def to_json(locale : String?, json : JSON::Builder) + json.object do + json.field "type", "hashtag" + if self.header != nil + json.field "header" do + self.header.to_json(json) + end + end + json.field "results" do + json.array do + self.videos.each do |item| + item.to_json(locale, json) + end + end + end + end + end + + # TODO: remove the locale and follow the crystal convention + def to_json(locale : String?, _json : Nil) + JSON.build do |json| + to_json(locale, json) + end + end + + def to_json(json : JSON::Builder) + to_json(nil, json) + end +end + +struct HashtagHeader + include DB::Serializable + + property tag : String + property channel_count : Int64 + property video_count : Int64 + + def to_json(json : JSON::Builder) + json.object do + json.field "type", "hashtagHeader" + json.field "hashtag", self.tag + json.field "channelCount", self.channel_count + json.field "videoCount", self.video_count + end + end + + def to_json(_json : Nil) + JSON.build do |json| + to_json(json) + end + end +end + alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category diff --git a/src/invidious/routes/api/v1/search.cr b/src/invidious/routes/api/v1/search.cr index 9fb283c2..0070ee47 100644 --- a/src/invidious/routes/api/v1/search.cr +++ b/src/invidious/routes/api/v1/search.cr @@ -66,21 +66,13 @@ module Invidious::Routes::API::V1::Search env.response.content_type = "application/json" begin - results = Invidious::Hashtag.fetch(hashtag, page, region) + hashtagPage = Invidious::Hashtag.fetch(hashtag, page, region) rescue ex return error_json(400, ex) end JSON.build do |json| - json.object do - json.field "results" do - json.array do - results.each do |item| - item.to_json(locale, json) - end - end - end - end + hashtagPage.to_json(locale, json) end end end diff --git a/src/invidious/routes/search.cr b/src/invidious/routes/search.cr index 6c3088de..5161b0d7 100644 --- a/src/invidious/routes/search.cr +++ b/src/invidious/routes/search.cr @@ -91,7 +91,8 @@ module Invidious::Routes::Search end begin - videos = Invidious::Hashtag.fetch(hashtag, page) + hashtagPage = Invidious::Hashtag.fetch(hashtag, page) + videos = hashtagPage.videos rescue ex return error_template(500, ex) end diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 6686e6e7..32537de4 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -20,6 +20,7 @@ private ITEM_PARSERS = { Parsers::ReelItemRendererParser, Parsers::ItemSectionRendererParser, Parsers::ContinuationItemRendererParser, + Parsers::HashtagHeaderRenderer, } private alias InitialData = Hash(String, JSON::Any) @@ -550,6 +551,39 @@ private module Parsers return {{@type.name}} end end + + # Parses an InnerTube hashtagHeaderRender into a HashtagHeaderRender. + # Returns nil when the given object isn't a hashtagHeaderRender. + # + # hashtagHeaderRender contains metadate of the hashtag page such as video count and channel count + # + module HashtagHeaderRenderer + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["hashtagHeaderRenderer"]? + return self.parse(item_contents) + end + end + + private def self.parse(item_contents) + info = extract_text(item_contents.dig?("hashtagInfoText")) || "" + + regex_match = /(?\d+\S)\D+(?\d+\S)/.match(info) + + hashtag = extract_text(item_contents.dig?("hashtag")) || "" + videos = short_text_to_number(regex_match.try &.["videos"]?.try &.to_s || "0") + channels = short_text_to_number(regex_match.try &.["channels"]?.try &.to_s || "0") + + return HashtagHeader.new({ + tag: hashtag, + channel_count: channels, + video_count: videos, + }) + end + + def self.parser_name + return {{@type.name}} + end + end end # The following are the extractors for extracting an array of items from