Refactor 'description_html'
This commit is contained in:
parent
12b2ab5da8
commit
b43e9ed7e7
8 changed files with 69 additions and 103 deletions
|
@ -473,9 +473,8 @@ get "/watch" do |env|
|
|||
|
||||
aspect_ratio = "16:9"
|
||||
|
||||
video.description = fill_links(video.description, "https", "www.youtube.com")
|
||||
video.description = replace_links(video.description)
|
||||
description = video.short_description
|
||||
video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
|
||||
video.description_html = replace_links(video.description_html)
|
||||
|
||||
host_url = make_host_url(config, Kemal.config)
|
||||
host_params = env.request.query_params
|
||||
|
@ -648,9 +647,8 @@ get "/embed/:id" do |env|
|
|||
|
||||
aspect_ratio = nil
|
||||
|
||||
video.description = fill_links(video.description, "https", "www.youtube.com")
|
||||
video.description = replace_links(video.description)
|
||||
description = video.short_description
|
||||
video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
|
||||
video.description_html = replace_links(video.description_html)
|
||||
|
||||
host_url = make_host_url(config, Kemal.config)
|
||||
host_params = env.request.query_params
|
||||
|
@ -2466,7 +2464,7 @@ get "/feed/channel/:ucid" do |env|
|
|||
|
||||
author = entry.xpath_node("author/name").not_nil!.content
|
||||
ucid = entry.xpath_node("channelid").not_nil!.content
|
||||
description = entry.xpath_node("group/description").not_nil!.content
|
||||
description_html = entry.xpath_node("group/description").not_nil!.to_s
|
||||
views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64
|
||||
|
||||
videos << SearchVideo.new(
|
||||
|
@ -2476,8 +2474,7 @@ get "/feed/channel/:ucid" do |env|
|
|||
ucid: ucid,
|
||||
published: published,
|
||||
views: views,
|
||||
description: description,
|
||||
description_html: "",
|
||||
description_html: description_html,
|
||||
length_seconds: 0,
|
||||
live_now: false,
|
||||
paid: false,
|
||||
|
@ -3460,11 +3457,8 @@ get "/api/v1/top" do |env|
|
|||
json.field "published", video.published.to_unix
|
||||
json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale))
|
||||
|
||||
description = video.description.gsub("<br>", "\n")
|
||||
description = description.gsub("<br/>", "\n")
|
||||
description = XML.parse_html(description)
|
||||
json.field "description", description.content
|
||||
json.field "descriptionHtml", video.description
|
||||
json.field "description", html_to_content(video.description_html)
|
||||
json.field "descriptionHtml", video.description_html
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -3511,8 +3505,7 @@ get "/api/v1/channels/:ucid" do |env|
|
|||
author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content
|
||||
author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"]
|
||||
author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
|
||||
description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")]))
|
||||
description_html, description = html_to_content(description_html)
|
||||
description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || ""
|
||||
|
||||
paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
|
||||
is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
|
||||
|
@ -3607,7 +3600,7 @@ get "/api/v1/channels/:ucid" do |env|
|
|||
|
||||
json.field "autoGenerated", auto_generated
|
||||
json.field "isFamilyFriendly", is_family_friendly
|
||||
json.field "description", description
|
||||
json.field "description", html_to_content(description_html)
|
||||
json.field "descriptionHtml", description_html
|
||||
|
||||
json.field "allowedRegions", allowed_regions
|
||||
|
@ -3884,7 +3877,7 @@ get "/api/v1/playlists/:plid" do |env|
|
|||
end
|
||||
end
|
||||
|
||||
json.field "description", playlist.description
|
||||
json.field "description", html_to_content(playlist.description_html)
|
||||
json.field "descriptionHtml", playlist.description_html
|
||||
json.field "videoCount", playlist.video_count
|
||||
|
||||
|
|
|
@ -138,13 +138,8 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
|
|||
node_comment = node["commentRenderer"]
|
||||
end
|
||||
|
||||
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff')
|
||||
if content_html
|
||||
content_html = HTML.escape(content_html)
|
||||
end
|
||||
|
||||
content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
|
||||
content_html, content = html_to_content(content_html)
|
||||
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s ||
|
||||
content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || ""
|
||||
|
||||
author = node_comment["authorText"]?.try &.["simpleText"]
|
||||
author ||= ""
|
||||
|
@ -179,7 +174,7 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
|
|||
json.field "isEdited", false
|
||||
end
|
||||
|
||||
json.field "content", content
|
||||
json.field "content", html_to_content(content_html)
|
||||
json.field "contentHtml", content_html
|
||||
json.field "published", published.to_unix
|
||||
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))
|
||||
|
|
|
@ -177,23 +177,17 @@ def login_req(login_form, f_req)
|
|||
return HTTP::Params.encode(data)
|
||||
end
|
||||
|
||||
def html_to_content(description_html)
|
||||
if !description_html
|
||||
description = ""
|
||||
description_html = ""
|
||||
else
|
||||
description_html = description_html.to_s
|
||||
description = description_html.gsub("<br>", "\n")
|
||||
description = description.gsub("<br/>", "\n")
|
||||
def html_to_content(description_html : String)
|
||||
description = description_html.gsub(/(<br>)|(<br\/>)/, {
|
||||
"<br>": "\n",
|
||||
"<br/>": "\n",
|
||||
})
|
||||
|
||||
if description.empty?
|
||||
description = ""
|
||||
else
|
||||
description = XML.parse_html(description).content.strip("\n ")
|
||||
end
|
||||
if !description.empty?
|
||||
description = XML.parse_html(description).content.strip("\n ")
|
||||
end
|
||||
|
||||
return description_html, description
|
||||
return description
|
||||
end
|
||||
|
||||
def extract_videos(nodeset, ucid = nil, author_name = nil)
|
||||
|
@ -230,8 +224,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
|
|||
author ||= ""
|
||||
author_id ||= ""
|
||||
|
||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
||||
description_html, description = html_to_content(description_html)
|
||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
|
||||
|
||||
tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
|
||||
if !tile
|
||||
|
@ -330,7 +323,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
|
|||
author_thumbnail: author_thumbnail,
|
||||
subscriber_count: subscriber_count,
|
||||
video_count: video_count,
|
||||
description: description,
|
||||
description_html: description_html
|
||||
)
|
||||
else
|
||||
|
@ -396,7 +388,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
|
|||
ucid: author_id,
|
||||
published: published,
|
||||
views: view_count,
|
||||
description: description,
|
||||
description_html: description_html,
|
||||
length_seconds: length_seconds,
|
||||
live_now: live_now,
|
||||
|
|
|
@ -47,7 +47,6 @@ struct Playlist
|
|||
author: String,
|
||||
author_thumbnail: String,
|
||||
ucid: String,
|
||||
description: String,
|
||||
description_html: String,
|
||||
video_count: Int32,
|
||||
views: Int64,
|
||||
|
@ -214,9 +213,8 @@ def fetch_playlist(plid, locale)
|
|||
end
|
||||
title = title.content.strip(" \n")
|
||||
|
||||
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1]))
|
||||
description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"]))
|
||||
description_html, description = html_to_content(description_html)
|
||||
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s ||
|
||||
document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || ""
|
||||
|
||||
# YouTube allows anonymous playlists, so most of this can be empty or optional
|
||||
anchor = document.xpath_node(%q(//ul[@class="pl-header-details"]))
|
||||
|
@ -245,7 +243,6 @@ def fetch_playlist(plid, locale)
|
|||
author: author,
|
||||
author_thumbnail: author_thumbnail,
|
||||
ucid: ucid,
|
||||
description: description,
|
||||
description_html: description_html,
|
||||
video_count: video_count,
|
||||
views: views,
|
||||
|
|
|
@ -31,7 +31,7 @@ struct SearchVideo
|
|||
xml.element("media:title") { xml.text self.title }
|
||||
xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
|
||||
width: "320", height: "180")
|
||||
xml.element("media:description") { xml.text self.description }
|
||||
xml.element("media:description") { xml.text html_to_content(self.description_html) }
|
||||
end
|
||||
|
||||
xml.element("media:community") do
|
||||
|
@ -64,7 +64,7 @@ struct SearchVideo
|
|||
generate_thumbnails(json, self.id, config, kemal_config)
|
||||
end
|
||||
|
||||
json.field "description", self.description
|
||||
json.field "description", html_to_content(self.description_html)
|
||||
json.field "descriptionHtml", self.description_html
|
||||
|
||||
json.field "viewCount", self.views
|
||||
|
@ -94,7 +94,6 @@ struct SearchVideo
|
|||
ucid: String,
|
||||
published: Time,
|
||||
views: Int64,
|
||||
description: String,
|
||||
description_html: String,
|
||||
length_seconds: Int32,
|
||||
live_now: Bool,
|
||||
|
@ -187,7 +186,7 @@ struct SearchChannel
|
|||
|
||||
json.field "subCount", self.subscriber_count
|
||||
json.field "videoCount", self.video_count
|
||||
json.field "description", self.description
|
||||
json.field "description", html_to_content(self.description_html)
|
||||
json.field "descriptionHtml", self.description_html
|
||||
end
|
||||
end
|
||||
|
@ -208,7 +207,6 @@ struct SearchChannel
|
|||
author_thumbnail: String,
|
||||
subscriber_count: Int32,
|
||||
video_count: Int32,
|
||||
description: String,
|
||||
description_html: String,
|
||||
})
|
||||
end
|
||||
|
|
|
@ -286,10 +286,8 @@ struct Video
|
|||
generate_storyboards(json, self.id, self.storyboards, config, kemal_config)
|
||||
end
|
||||
|
||||
description_html, description = html_to_content(self.description)
|
||||
|
||||
json.field "description", description
|
||||
json.field "descriptionHtml", description_html
|
||||
json.field "description", html_to_content(self.description_html)
|
||||
json.field "descriptionHtml", self.description_html
|
||||
json.field "published", self.published.to_unix
|
||||
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
|
||||
json.field "keywords", self.keywords
|
||||
|
@ -467,6 +465,17 @@ struct Video
|
|||
end
|
||||
end
|
||||
|
||||
# `description_html` is stored in DB as `description`, which can be
|
||||
# quite confusing. Since it currently isn't very practical to rename
|
||||
# it, we instead define a getter and setter here.
|
||||
def description_html
|
||||
self.description
|
||||
end
|
||||
|
||||
def description_html=(other : String)
|
||||
self.description = other
|
||||
end
|
||||
|
||||
def allow_ratings
|
||||
allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool
|
||||
|
||||
|
@ -796,14 +805,19 @@ struct Video
|
|||
end
|
||||
|
||||
def short_description
|
||||
description = self.description.gsub("<br>", " ")
|
||||
description = description.gsub("<br/>", " ")
|
||||
description = XML.parse_html(description).content[0..200].gsub('"', """).gsub("\n", " ").strip(" ")
|
||||
if description.empty?
|
||||
description = " "
|
||||
short_description = self.description_html.gsub(/(<br>)|(<br\/>|"|\n)/, {
|
||||
"<br>" => " ",
|
||||
"<br/>" => " ",
|
||||
"\"" => """,
|
||||
"\n" => " ",
|
||||
})
|
||||
short_description = XML.parse_html(short_description).content[0..200].strip(" ")
|
||||
|
||||
if short_description.empty?
|
||||
short_description = " "
|
||||
end
|
||||
|
||||
return description
|
||||
return short_description
|
||||
end
|
||||
|
||||
def length_seconds
|
||||
|
@ -1151,28 +1165,23 @@ def fetch_video(id, proxies, region)
|
|||
end
|
||||
|
||||
title = info["title"]
|
||||
author = info["author"]
|
||||
ucid = info["ucid"]
|
||||
author = info["author"]? || ""
|
||||
ucid = info["ucid"]? || ""
|
||||
|
||||
views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
|
||||
views = views.try &.["content"].to_i64?
|
||||
views ||= 0_i64
|
||||
.try &.["content"].to_i64? || 0_i64
|
||||
|
||||
likes = html.xpath_node(%q(//button[@title="I like this"]/span))
|
||||
likes = likes.try &.content.delete(",").try &.to_i?
|
||||
likes ||= 0
|
||||
.try &.content.delete(",").try &.to_i? || 0
|
||||
|
||||
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
|
||||
dislikes = dislikes.try &.content.delete(",").try &.to_i?
|
||||
dislikes ||= 0
|
||||
.try &.content.delete(",").try &.to_i? || 0
|
||||
|
||||
avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1)
|
||||
avg_rating = avg_rating.nan? ? 0.0 : avg_rating
|
||||
info["avg_rating"] = "#{avg_rating}"
|
||||
|
||||
description = html.xpath_node(%q(//p[@id="eow-description"]))
|
||||
description = description ? description.to_xml(options: XML::SaveOptions::NO_DECL) : %q(<p id="eow-description"></p>)
|
||||
|
||||
description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || ""
|
||||
wilson_score = ci_lower_bound(likes, likes + dislikes)
|
||||
|
||||
published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"]
|
||||
|
@ -1188,7 +1197,8 @@ def fetch_video(id, proxies, region)
|
|||
genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"]
|
||||
genre ||= ""
|
||||
|
||||
genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]
|
||||
genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]?
|
||||
genre_url ||= ""
|
||||
|
||||
# YouTube provides invalid URLs for some genres, so we fix that here
|
||||
case genre
|
||||
|
@ -1205,30 +1215,12 @@ def fetch_video(id, proxies, region)
|
|||
when "Trailers"
|
||||
genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
|
||||
end
|
||||
genre_url ||= ""
|
||||
|
||||
license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li))
|
||||
if license
|
||||
license = license.content
|
||||
else
|
||||
license = ""
|
||||
end
|
||||
license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || ""
|
||||
sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0"
|
||||
author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]? || ""
|
||||
|
||||
sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")]))
|
||||
if sub_count_text
|
||||
sub_count_text = sub_count_text["title"]
|
||||
else
|
||||
sub_count_text = "0"
|
||||
end
|
||||
|
||||
author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img))
|
||||
if author_thumbnail
|
||||
author_thumbnail = author_thumbnail["data-thumb"]
|
||||
else
|
||||
author_thumbnail = ""
|
||||
end
|
||||
|
||||
video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description,
|
||||
video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html,
|
||||
nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail)
|
||||
|
||||
return video
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
var player_data = {
|
||||
aspect_ratio: '<%= aspect_ratio %>',
|
||||
title: "<%= video.title.dump_unquoted %>",
|
||||
description: "<%= HTML.escape(description) %>",
|
||||
description: "<%= HTML.escape(video.short_description) %>",
|
||||
thumbnail: "<%= thumbnail %>"
|
||||
}
|
||||
</script>
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
<% content_for "header" do %>
|
||||
<meta name="thumbnail" content="<%= thumbnail %>">
|
||||
<meta name="description" content="<%= description %>">
|
||||
<meta name="description" content="<%= video.short_description %>">
|
||||
<meta name="keywords" content="<%= video.keywords.join(",") %>">
|
||||
<meta property="og:site_name" content="Invidious">
|
||||
<meta property="og:url" content="<%= host_url %>/watch?v=<%= video.id %>">
|
||||
<meta property="og:title" content="<%= HTML.escape(video.title) %>">
|
||||
<meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg">
|
||||
<meta property="og:description" content="<%= description %>">
|
||||
<meta property="og:description" content="<%= video.short_description %>">
|
||||
<meta property="og:type" content="video.other">
|
||||
<meta property="og:video:url" content="<%= host_url %>/embed/<%= video.id %>">
|
||||
<meta property="og:video:secure_url" content="<%= host_url %>/embed/<%= video.id %>">
|
||||
|
@ -17,7 +17,7 @@
|
|||
<meta name="twitter:site" content="@omarroth1">
|
||||
<meta name="twitter:url" content="<%= host_url %>/watch?v=<%= video.id %>">
|
||||
<meta name="twitter:title" content="<%= HTML.escape(video.title) %>">
|
||||
<meta name="twitter:description" content="<%= description %>">
|
||||
<meta name="twitter:description" content="<%= video.short_description %>">
|
||||
<meta name="twitter:image" content="<%= host_url %>/vi/<%= video.id %>/maxres.jpg">
|
||||
<meta name="twitter:player" content="<%= host_url %>/embed/<%= video.id %>">
|
||||
<meta name="twitter:player:width" content="1280">
|
||||
|
@ -185,7 +185,7 @@ var video_data = {
|
|||
</p>
|
||||
|
||||
<div>
|
||||
<%= video.description %>
|
||||
<%= video.description_html %>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
|
Loading…
Reference in a new issue