Refactor 'description_html'

pull/592/head
Omar Roth 6 years ago
parent 12b2ab5da8
commit b43e9ed7e7
No known key found for this signature in database
GPG Key ID: B8254FB7EC3D37F2

@ -473,9 +473,8 @@ get "/watch" do |env|
aspect_ratio = "16:9"
video.description = fill_links(video.description, "https", "www.youtube.com")
video.description = replace_links(video.description)
description = video.short_description
video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
video.description_html = replace_links(video.description_html)
host_url = make_host_url(config, Kemal.config)
host_params = env.request.query_params
@ -648,9 +647,8 @@ get "/embed/:id" do |env|
aspect_ratio = nil
video.description = fill_links(video.description, "https", "www.youtube.com")
video.description = replace_links(video.description)
description = video.short_description
video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
video.description_html = replace_links(video.description_html)
host_url = make_host_url(config, Kemal.config)
host_params = env.request.query_params
@ -2466,7 +2464,7 @@ get "/feed/channel/:ucid" do |env|
author = entry.xpath_node("author/name").not_nil!.content
ucid = entry.xpath_node("channelid").not_nil!.content
description = entry.xpath_node("group/description").not_nil!.content
description_html = entry.xpath_node("group/description").not_nil!.to_s
views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64
videos << SearchVideo.new(
@ -2476,8 +2474,7 @@ get "/feed/channel/:ucid" do |env|
ucid: ucid,
published: published,
views: views,
description: description,
description_html: "",
description_html: description_html,
length_seconds: 0,
live_now: false,
paid: false,
@ -3460,11 +3457,8 @@ get "/api/v1/top" do |env|
json.field "published", video.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale))
description = video.description.gsub("<br>", "\n")
description = description.gsub("<br/>", "\n")
description = XML.parse_html(description)
json.field "description", description.content
json.field "descriptionHtml", video.description
json.field "description", html_to_content(video.description_html)
json.field "descriptionHtml", video.description_html
end
end
end
@ -3511,8 +3505,7 @@ get "/api/v1/channels/:ucid" do |env|
author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content
author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"]
author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")]))
description_html, description = html_to_content(description_html)
description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || ""
paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
@ -3607,7 +3600,7 @@ get "/api/v1/channels/:ucid" do |env|
json.field "autoGenerated", auto_generated
json.field "isFamilyFriendly", is_family_friendly
json.field "description", description
json.field "description", html_to_content(description_html)
json.field "descriptionHtml", description_html
json.field "allowedRegions", allowed_regions
@ -3884,7 +3877,7 @@ get "/api/v1/playlists/:plid" do |env|
end
end
json.field "description", playlist.description
json.field "description", html_to_content(playlist.description_html)
json.field "descriptionHtml", playlist.description_html
json.field "videoCount", playlist.video_count

@ -138,13 +138,8 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
node_comment = node["commentRenderer"]
end
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff')
if content_html
content_html = HTML.escape(content_html)
end
content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a)
content_html, content = html_to_content(content_html)
content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s ||
content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || ""
author = node_comment["authorText"]?.try &.["simpleText"]
author ||= ""
@ -179,7 +174,7 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m
json.field "isEdited", false
end
json.field "content", content
json.field "content", html_to_content(content_html)
json.field "contentHtml", content_html
json.field "published", published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale))

@ -177,23 +177,17 @@ def login_req(login_form, f_req)
return HTTP::Params.encode(data)
end
def html_to_content(description_html)
if !description_html
description = ""
description_html = ""
else
description_html = description_html.to_s
description = description_html.gsub("<br>", "\n")
description = description.gsub("<br/>", "\n")
def html_to_content(description_html : String)
description = description_html.gsub(/(<br>)|(<br\/>)/, {
"<br>": "\n",
"<br/>": "\n",
})
if description.empty?
description = ""
else
description = XML.parse_html(description).content.strip("\n ")
end
if !description.empty?
description = XML.parse_html(description).content.strip("\n ")
end
return description_html, description
return description
end
def extract_videos(nodeset, ucid = nil, author_name = nil)
@ -230,8 +224,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
author ||= ""
author_id ||= ""
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
description_html, description = html_to_content(description_html)
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
if !tile
@ -330,7 +323,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
author_thumbnail: author_thumbnail,
subscriber_count: subscriber_count,
video_count: video_count,
description: description,
description_html: description_html
)
else
@ -396,7 +388,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
ucid: author_id,
published: published,
views: view_count,
description: description,
description_html: description_html,
length_seconds: length_seconds,
live_now: live_now,

@ -47,7 +47,6 @@ struct Playlist
author: String,
author_thumbnail: String,
ucid: String,
description: String,
description_html: String,
video_count: Int32,
views: Int64,
@ -214,9 +213,8 @@ def fetch_playlist(plid, locale)
end
title = title.content.strip(" \n")
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1]))
description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"]))
description_html, description = html_to_content(description_html)
description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s ||
document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || ""
# YouTube allows anonymous playlists, so most of this can be empty or optional
anchor = document.xpath_node(%q(//ul[@class="pl-header-details"]))
@ -245,7 +243,6 @@ def fetch_playlist(plid, locale)
author: author,
author_thumbnail: author_thumbnail,
ucid: ucid,
description: description,
description_html: description_html,
video_count: video_count,
views: views,

@ -31,7 +31,7 @@ struct SearchVideo
xml.element("media:title") { xml.text self.title }
xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg",
width: "320", height: "180")
xml.element("media:description") { xml.text self.description }
xml.element("media:description") { xml.text html_to_content(self.description_html) }
end
xml.element("media:community") do
@ -64,7 +64,7 @@ struct SearchVideo
generate_thumbnails(json, self.id, config, kemal_config)
end
json.field "description", self.description
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
json.field "viewCount", self.views
@ -94,7 +94,6 @@ struct SearchVideo
ucid: String,
published: Time,
views: Int64,
description: String,
description_html: String,
length_seconds: Int32,
live_now: Bool,
@ -187,7 +186,7 @@ struct SearchChannel
json.field "subCount", self.subscriber_count
json.field "videoCount", self.video_count
json.field "description", self.description
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
end
end
@ -208,7 +207,6 @@ struct SearchChannel
author_thumbnail: String,
subscriber_count: Int32,
video_count: Int32,
description: String,
description_html: String,
})
end

@ -286,10 +286,8 @@ struct Video
generate_storyboards(json, self.id, self.storyboards, config, kemal_config)
end
description_html, description = html_to_content(self.description)
json.field "description", description
json.field "descriptionHtml", description_html
json.field "description", html_to_content(self.description_html)
json.field "descriptionHtml", self.description_html
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
json.field "keywords", self.keywords
@ -467,6 +465,17 @@ struct Video
end
end
# `description_html` is stored in DB as `description`, which can be
# quite confusing. Since it currently isn't very practical to rename
# it, we instead define a getter and setter here.
def description_html
self.description
end
def description_html=(other : String)
self.description = other
end
def allow_ratings
allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool
@ -796,14 +805,19 @@ struct Video
end
def short_description
description = self.description.gsub("<br>", " ")
description = description.gsub("<br/>", " ")
description = XML.parse_html(description).content[0..200].gsub('"', "&quot;").gsub("\n", " ").strip(" ")
if description.empty?
description = " "
short_description = self.description_html.gsub(/(<br>)|(<br\/>|"|\n)/, {
"<br>" => " ",
"<br/>" => " ",
"\"" => "&quot;",
"\n" => " ",
})
short_description = XML.parse_html(short_description).content[0..200].strip(" ")
if short_description.empty?
short_description = " "
end
return description
return short_description
end
def length_seconds
@ -1151,28 +1165,23 @@ def fetch_video(id, proxies, region)
end
title = info["title"]
author = info["author"]
ucid = info["ucid"]
author = info["author"]? || ""
ucid = info["ucid"]? || ""
views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
views = views.try &.["content"].to_i64?
views ||= 0_i64
.try &.["content"].to_i64? || 0_i64
likes = html.xpath_node(%q(//button[@title="I like this"]/span))
likes = likes.try &.content.delete(",").try &.to_i?
likes ||= 0
.try &.content.delete(",").try &.to_i? || 0
dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
dislikes = dislikes.try &.content.delete(",").try &.to_i?
dislikes ||= 0
.try &.content.delete(",").try &.to_i? || 0
avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1)
avg_rating = avg_rating.nan? ? 0.0 : avg_rating
info["avg_rating"] = "#{avg_rating}"
description = html.xpath_node(%q(//p[@id="eow-description"]))
description = description ? description.to_xml(options: XML::SaveOptions::NO_DECL) : %q(<p id="eow-description"></p>)
description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || ""
wilson_score = ci_lower_bound(likes, likes + dislikes)
published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"]
@ -1188,7 +1197,8 @@ def fetch_video(id, proxies, region)
genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"]
genre ||= ""
genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]
genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]?
genre_url ||= ""
# YouTube provides invalid URLs for some genres, so we fix that here
case genre
@ -1205,30 +1215,12 @@ def fetch_video(id, proxies, region)
when "Trailers"
genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
end
genre_url ||= ""
license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li))
if license
license = license.content
else
license = ""
end
sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")]))
if sub_count_text
sub_count_text = sub_count_text["title"]
else
sub_count_text = "0"
end
author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img))
if author_thumbnail
author_thumbnail = author_thumbnail["data-thumb"]
else
author_thumbnail = ""
end
license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || ""
sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0"
author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]? || ""
video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description,
video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html,
nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail)
return video

@ -43,7 +43,7 @@
var player_data = {
aspect_ratio: '<%= aspect_ratio %>',
title: "<%= video.title.dump_unquoted %>",
description: "<%= HTML.escape(description) %>",
description: "<%= HTML.escape(video.short_description) %>",
thumbnail: "<%= thumbnail %>"
}
</script>

@ -1,12 +1,12 @@
<% content_for "header" do %>
<meta name="thumbnail" content="<%= thumbnail %>">
<meta name="description" content="<%= description %>">
<meta name="description" content="<%= video.short_description %>">
<meta name="keywords" content="<%= video.keywords.join(",") %>">
<meta property="og:site_name" content="Invidious">
<meta property="og:url" content="<%= host_url %>/watch?v=<%= video.id %>">
<meta property="og:title" content="<%= HTML.escape(video.title) %>">
<meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg">
<meta property="og:description" content="<%= description %>">
<meta property="og:description" content="<%= video.short_description %>">
<meta property="og:type" content="video.other">
<meta property="og:video:url" content="<%= host_url %>/embed/<%= video.id %>">
<meta property="og:video:secure_url" content="<%= host_url %>/embed/<%= video.id %>">
@ -17,7 +17,7 @@
<meta name="twitter:site" content="@omarroth1">
<meta name="twitter:url" content="<%= host_url %>/watch?v=<%= video.id %>">
<meta name="twitter:title" content="<%= HTML.escape(video.title) %>">
<meta name="twitter:description" content="<%= description %>">
<meta name="twitter:description" content="<%= video.short_description %>">
<meta name="twitter:image" content="<%= host_url %>/vi/<%= video.id %>/maxres.jpg">
<meta name="twitter:player" content="<%= host_url %>/embed/<%= video.id %>">
<meta name="twitter:player:width" content="1280">
@ -185,7 +185,7 @@ var video_data = {
</p>
<div>
<%= video.description %>
<%= video.description_html %>
</div>
<hr>

Loading…
Cancel
Save