Add parser for categories (shelfRenderer)
This commit adds a new parser for YT's shelfRenderers which are typically used to denote different categories.The code for featured channels parsing has also been moved to use the new parser but some additional refactoring are needed there. The ContinuationExtractor has also been improved and is now capable of extraction continuation data that is packaged under "appendContinuationItemsAction" In additional this commit adds some useful helper functions to extract the current selected tab the continuation token. This is to mainly reduce code size and repetition.pull/2059/head
parent
a027fbf7af
commit
8000d538db
@ -1,170 +0,0 @@
|
|||||||
struct FeaturedChannel
|
|
||||||
include DB::Serializable
|
|
||||||
|
|
||||||
property author : String
|
|
||||||
property ucid : String
|
|
||||||
property author_thumbnail : String
|
|
||||||
property subscriber_count : Int32
|
|
||||||
property video_count : Int32
|
|
||||||
property description_html : String?
|
|
||||||
|
|
||||||
def to_json(locale, json : JSON::Builder)
|
|
||||||
json.object do
|
|
||||||
json.field "author", self.author
|
|
||||||
json.field "authorId", self.ucid
|
|
||||||
json.field "authorUrl", "/channel/#{self.ucid}"
|
|
||||||
json.field "authorThumbnails" do
|
|
||||||
json.array do
|
|
||||||
qualities = {32, 48, 76, 100, 176, 512}
|
|
||||||
|
|
||||||
qualities.each do |quality|
|
|
||||||
json.object do
|
|
||||||
json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
|
|
||||||
json.field "width", quality
|
|
||||||
json.field "height", quality
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
json.field "description", html_to_content(self.description_html)
|
|
||||||
json.field "descriptionHtml", self.description_html
|
|
||||||
json.field "subCount", self.subscriber_count
|
|
||||||
json.field "videoCount", self.video_count
|
|
||||||
json.field "badges", self.badges
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_json(locale, json : JSON::Builder | Nil = nil)
|
|
||||||
if json
|
|
||||||
to_json(locale, json)
|
|
||||||
else
|
|
||||||
JSON.build do |json|
|
|
||||||
to_json(locale, json)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
struct Category
|
|
||||||
include DB::Serializable
|
|
||||||
|
|
||||||
property title : String
|
|
||||||
property contents : Array(FeaturedChannel) | FeaturedChannel
|
|
||||||
property browse_endpoint_param : String?
|
|
||||||
property continuation_token : String?
|
|
||||||
|
|
||||||
def to_json(locale, json : JSON::Builder)
|
|
||||||
json.object do
|
|
||||||
json.field "title", self.title
|
|
||||||
json.field "contents", self.contents
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_json(locale, json : JSON::Builder | Nil = nil)
|
|
||||||
if json
|
|
||||||
to_json(locale, json)
|
|
||||||
else
|
|
||||||
JSON.build do |json|
|
|
||||||
to_json(locale, json)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def _extract_channel_data(channel)
|
|
||||||
ucid = channel["channelId"].as_s
|
|
||||||
author = channel["title"]["simpleText"].as_s
|
|
||||||
author_thumbnail = channel["thumbnail"]["thumbnails"].as_a[0]["url"].as_s
|
|
||||||
subscriber_count = channel["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
|
|
||||||
.try { |text| short_text_to_number(text.split(" ")[0]) } || 0
|
|
||||||
|
|
||||||
video_count = channel["videoCountText"]?.try &.["runs"][0]["text"].as_s.gsub(/\D/, "").to_i || 0
|
|
||||||
|
|
||||||
if channel["descriptionSnippet"]?
|
|
||||||
description = channel["descriptionSnippet"]["runs"][0]["text"].as_s
|
|
||||||
description_html = HTML.escape(description).gsub("\n", "")
|
|
||||||
else
|
|
||||||
description_html = nil
|
|
||||||
end
|
|
||||||
|
|
||||||
FeaturedChannel.new({
|
|
||||||
author: author,
|
|
||||||
ucid: ucid,
|
|
||||||
author_thumbnail: author_thumbnail,
|
|
||||||
subscriber_count: subscriber_count,
|
|
||||||
video_count: video_count,
|
|
||||||
description_html: description_html,
|
|
||||||
})
|
|
||||||
end
|
|
||||||
|
|
||||||
def process_featured_channels(data, submenu_data, title = nil, continuation_items = false)
|
|
||||||
all_categories = [] of Category
|
|
||||||
|
|
||||||
if submenu_data.is_a?(Bool)
|
|
||||||
return all_categories
|
|
||||||
end
|
|
||||||
|
|
||||||
# Extraction process differs when there's more than one category
|
|
||||||
if data.size > 1
|
|
||||||
data.each do |raw_category|
|
|
||||||
raw_category = raw_category["itemSectionRenderer"]["contents"].as_a[0]["shelfRenderer"]
|
|
||||||
|
|
||||||
category_title = raw_category["title"]["runs"][0]["text"].as_s
|
|
||||||
browse_endpoint_param = raw_category["endpoint"]["browseEndpoint"]["params"].as_s
|
|
||||||
|
|
||||||
# Category has multiple channels
|
|
||||||
if raw_category["content"].as_h.has_key?("horizontalListRenderer")
|
|
||||||
contents = [] of FeaturedChannel
|
|
||||||
raw_category["content"]["horizontalListRenderer"]["items"].as_a.each do |channel|
|
|
||||||
contents << _extract_channel_data(channel["gridChannelRenderer"])
|
|
||||||
end
|
|
||||||
# Single channel
|
|
||||||
else
|
|
||||||
channel = raw_category["content"]["expandedShelfContentsRenderer"]["items"][0]["channelRenderer"]
|
|
||||||
contents = _extract_channel_data(channel)
|
|
||||||
end
|
|
||||||
|
|
||||||
all_categories << Category.new({
|
|
||||||
title: category_title,
|
|
||||||
contents: contents,
|
|
||||||
browse_endpoint_param: browse_endpoint_param,
|
|
||||||
continuation_token: nil,
|
|
||||||
})
|
|
||||||
end
|
|
||||||
else
|
|
||||||
if !continuation_items
|
|
||||||
raw_category_contents = data[0]["itemSectionRenderer"]["contents"].as_a[0]["gridRenderer"]["items"].as_a
|
|
||||||
else
|
|
||||||
raw_category_contents = data[0].as_a
|
|
||||||
end
|
|
||||||
|
|
||||||
category_title = submenu_data.try &.[0]["title"].as_s || title || ""
|
|
||||||
|
|
||||||
browse_endpoint_param = nil # Not needed
|
|
||||||
continuation_token = nil
|
|
||||||
|
|
||||||
# If a continuation token is needed it'll always be after at least twelve channels
|
|
||||||
if raw_category_contents.size > 12
|
|
||||||
continuation_token = raw_category_contents[-1]["continuationItemRenderer"]?.try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s || nil
|
|
||||||
|
|
||||||
if !continuation_token.nil?
|
|
||||||
raw_category_contents = raw_category_contents[0..-2]
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
contents = [] of FeaturedChannel
|
|
||||||
raw_category_contents.each do |channel|
|
|
||||||
contents << _extract_channel_data(channel["gridChannelRenderer"])
|
|
||||||
end
|
|
||||||
|
|
||||||
all_categories << Category.new({
|
|
||||||
title: category_title,
|
|
||||||
contents: contents,
|
|
||||||
browse_endpoint_param: browse_endpoint_param,
|
|
||||||
continuation_token: continuation_token,
|
|
||||||
})
|
|
||||||
end
|
|
||||||
|
|
||||||
return all_categories
|
|
||||||
end
|
|
@ -0,0 +1,258 @@
|
|||||||
|
struct SearchVideo
|
||||||
|
include DB::Serializable
|
||||||
|
|
||||||
|
property title : String
|
||||||
|
property id : String
|
||||||
|
property author : String
|
||||||
|
property ucid : String
|
||||||
|
property published : Time
|
||||||
|
property views : Int64
|
||||||
|
property description_html : String
|
||||||
|
property length_seconds : Int32
|
||||||
|
property live_now : Bool
|
||||||
|
property paid : Bool
|
||||||
|
property premium : Bool
|
||||||
|
property premiere_timestamp : Time?
|
||||||
|
|
||||||
|
def to_xml(auto_generated, query_params, xml : XML::Builder)
|
||||||
|
query_params["v"] = self.id
|
||||||
|
|
||||||
|
xml.element("entry") do
|
||||||
|
xml.element("id") { xml.text "yt:video:#{self.id}" }
|
||||||
|
xml.element("yt:videoId") { xml.text self.id }
|
||||||
|
xml.element("yt:channelId") { xml.text self.ucid }
|
||||||
|
xml.element("title") { xml.text self.title }
|
||||||
|
xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
|
||||||
|
|
||||||
|
xml.element("author") do
|
||||||
|
if auto_generated
|
||||||
|
xml.element("name") { xml.text self.author }
|
||||||
|
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
|
||||||
|
else
|
||||||
|
xml.element("name") { xml.text author }
|
||||||
|
xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
xml.element("content", type: "xhtml") do
|
||||||
|
xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
|
||||||
|
xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
|
||||||
|
xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
|
||||||
|
end
|
||||||
|
|
||||||
|
xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
|
||||||
|
|
||||||
|
xml.element("media:group") do
|
||||||
|
xml.element("media:title") { xml.text self.title }
|
||||||
|
xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
|
||||||
|
width: "320", height: "180")
|
||||||
|
xml.element("media:description") { xml.text html_to_content(self.description_html) }
|
||||||
|
end
|
||||||
|
|
||||||
|
xml.element("media:community") do
|
||||||
|
xml.element("media:statistics", views: self.views)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil)
|
||||||
|
if xml
|
||||||
|
to_xml(HOST_URL, auto_generated, query_params, xml)
|
||||||
|
else
|
||||||
|
XML.build do |json|
|
||||||
|
to_xml(HOST_URL, auto_generated, query_params, xml)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder)
|
||||||
|
json.object do
|
||||||
|
json.field "type", "video"
|
||||||
|
json.field "title", self.title
|
||||||
|
json.field "videoId", self.id
|
||||||
|
|
||||||
|
json.field "author", self.author
|
||||||
|
json.field "authorId", self.ucid
|
||||||
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
||||||
|
|
||||||
|
json.field "videoThumbnails" do
|
||||||
|
generate_thumbnails(json, self.id)
|
||||||
|
end
|
||||||
|
|
||||||
|
json.field "description", html_to_content(self.description_html)
|
||||||
|
json.field "descriptionHtml", self.description_html
|
||||||
|
|
||||||
|
json.field "viewCount", self.views
|
||||||
|
json.field "published", self.published.to_unix
|
||||||
|
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
|
||||||
|
json.field "lengthSeconds", self.length_seconds
|
||||||
|
json.field "liveNow", self.live_now
|
||||||
|
json.field "paid", self.paid
|
||||||
|
json.field "premium", self.premium
|
||||||
|
json.field "isUpcoming", self.is_upcoming
|
||||||
|
|
||||||
|
if self.premiere_timestamp
|
||||||
|
json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder | Nil = nil)
|
||||||
|
if json
|
||||||
|
to_json(locale, json)
|
||||||
|
else
|
||||||
|
JSON.build do |json|
|
||||||
|
to_json(locale, json)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def is_upcoming
|
||||||
|
premiere_timestamp ? true : false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
struct SearchPlaylistVideo
|
||||||
|
include DB::Serializable
|
||||||
|
|
||||||
|
property title : String
|
||||||
|
property id : String
|
||||||
|
property length_seconds : Int32
|
||||||
|
end
|
||||||
|
|
||||||
|
struct SearchPlaylist
|
||||||
|
include DB::Serializable
|
||||||
|
|
||||||
|
property title : String
|
||||||
|
property id : String
|
||||||
|
property author : String
|
||||||
|
property ucid : String
|
||||||
|
property video_count : Int32
|
||||||
|
property videos : Array(SearchPlaylistVideo)
|
||||||
|
property thumbnail : String?
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder)
|
||||||
|
json.object do
|
||||||
|
json.field "type", "playlist"
|
||||||
|
json.field "title", self.title
|
||||||
|
json.field "playlistId", self.id
|
||||||
|
json.field "playlistThumbnail", self.thumbnail
|
||||||
|
|
||||||
|
json.field "author", self.author
|
||||||
|
json.field "authorId", self.ucid
|
||||||
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
||||||
|
|
||||||
|
json.field "videoCount", self.video_count
|
||||||
|
json.field "videos" do
|
||||||
|
json.array do
|
||||||
|
self.videos.each do |video|
|
||||||
|
json.object do
|
||||||
|
json.field "title", video.title
|
||||||
|
json.field "videoId", video.id
|
||||||
|
json.field "lengthSeconds", video.length_seconds
|
||||||
|
|
||||||
|
json.field "videoThumbnails" do
|
||||||
|
generate_thumbnails(json, video.id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder | Nil = nil)
|
||||||
|
if json
|
||||||
|
to_json(locale, json)
|
||||||
|
else
|
||||||
|
JSON.build do |json|
|
||||||
|
to_json(locale, json)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
struct SearchChannel
|
||||||
|
include DB::Serializable
|
||||||
|
|
||||||
|
property author : String
|
||||||
|
property ucid : String
|
||||||
|
property author_thumbnail : String
|
||||||
|
property subscriber_count : Int32
|
||||||
|
property video_count : Int32
|
||||||
|
property description_html : String
|
||||||
|
property auto_generated : Bool
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder)
|
||||||
|
json.object do
|
||||||
|
json.field "type", "channel"
|
||||||
|
json.field "author", self.author
|
||||||
|
json.field "authorId", self.ucid
|
||||||
|
json.field "authorUrl", "/channel/#{self.ucid}"
|
||||||
|
|
||||||
|
json.field "authorThumbnails" do
|
||||||
|
json.array do
|
||||||
|
qualities = {32, 48, 76, 100, 176, 512}
|
||||||
|
|
||||||
|
qualities.each do |quality|
|
||||||
|
json.object do
|
||||||
|
json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
|
||||||
|
json.field "width", quality
|
||||||
|
json.field "height", quality
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
json.field "autoGenerated", self.auto_generated
|
||||||
|
json.field "subCount", self.subscriber_count
|
||||||
|
json.field "videoCount", self.video_count
|
||||||
|
|
||||||
|
json.field "description", html_to_content(self.description_html)
|
||||||
|
json.field "descriptionHtml", self.description_html
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder | Nil = nil)
|
||||||
|
if json
|
||||||
|
to_json(locale, json)
|
||||||
|
else
|
||||||
|
JSON.build do |json|
|
||||||
|
to_json(locale, json)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class Category
|
||||||
|
include DB::Serializable
|
||||||
|
|
||||||
|
property title : String
|
||||||
|
property contents : Array(SearchItem) | SearchItem
|
||||||
|
property browse_endpoint_data : String?
|
||||||
|
property continuation_token : String?
|
||||||
|
property badges : Array(Tuple(String, String))?
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder)
|
||||||
|
json.object do
|
||||||
|
json.field "title", self.title
|
||||||
|
json.field "contents", self.contents
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(locale, json : JSON::Builder | Nil = nil)
|
||||||
|
if json
|
||||||
|
to_json(locale, json)
|
||||||
|
else
|
||||||
|
JSON.build do |json|
|
||||||
|
to_json(locale, json)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
|
Loading…
Reference in New Issue