From 1eca969cf6b4096789014619285c98d1def40ee3 Mon Sep 17 00:00:00 2001
From: Omar Roth
Date: Mon, 15 Jun 2020 17:33:23 -0500
Subject: [PATCH] Add support for polymer redesign
---
config/sql/videos.sql | 17 -
spec/helpers_spec.cr | 4 +-
src/invidious.cr | 173 ++---
src/invidious/channels.cr | 32 +-
src/invidious/comments.cr | 24 +-
src/invidious/helpers/helpers.cr | 168 +++-
src/invidious/helpers/jobs.cr | 2 +-
src/invidious/helpers/signatures.cr | 4 +-
src/invidious/helpers/utils.cr | 2 +-
src/invidious/mixes.cr | 1 -
src/invidious/search.cr | 69 +-
src/invidious/trending.cr | 24 +-
src/invidious/users.cr | 4 +-
src/invidious/videos.cr | 890 ++++++++--------------
src/invidious/views/components/item.ecr | 4 +-
src/invidious/views/components/player.ecr | 12 +-
src/invidious/views/watch.ecr | 54 +-
17 files changed, 616 insertions(+), 868 deletions(-)
diff --git a/config/sql/videos.sql b/config/sql/videos.sql
index 6ded01de..8def2f83 100644
--- a/config/sql/videos.sql
+++ b/config/sql/videos.sql
@@ -7,23 +7,6 @@ CREATE TABLE public.videos
id text NOT NULL,
info text,
updated timestamp with time zone,
- title text,
- views bigint,
- likes integer,
- dislikes integer,
- wilson_score double precision,
- published timestamp with time zone,
- description text,
- language text,
- author text,
- ucid text,
- allowed_regions text[],
- is_family_friendly boolean,
- genre text,
- genre_url text,
- license text,
- sub_count_text text,
- author_thumbnail text,
CONSTRAINT videos_pkey PRIMARY KEY (id)
);
diff --git a/spec/helpers_spec.cr b/spec/helpers_spec.cr
index 37e36c61..26922bb2 100644
--- a/spec/helpers_spec.cr
+++ b/spec/helpers_spec.cr
@@ -27,9 +27,9 @@ describe "Helper" do
describe "#produce_channel_search_url" do
it "correctly produces token for searching a specific channel" do
- produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("/browse_ajax?continuation=4qmFsgI-EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWnpaV0Z5WTJnd0FqZ0JZQUZxQUxnQkFIb0RNVEF3WgA%3D&gl=US&hl=en")
+ produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("/browse_ajax?continuation=4qmFsgI2EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0RNVEF3dUFFQVoA&gl=US&hl=en")
- produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("/browse_ajax?continuation=4qmFsgJ8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWnpaV0Z5WTJnd0FqZ0JZQUZxQUxnQkFIb0JNQT09Wj7Qn9C-INC-0LbQuOCktuClgeCkquCkpOCkv-CksOCkquCkv-WtkOiAjOaZguCuuOCvjeCuseCvgOCuqeCuvw%3D%3D&gl=US&hl=en")
+ produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("/browse_ajax?continuation=4qmFsgJ0EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0JNTGdCQUE9PVo-0J_QviDQvtC20LjgpLbgpYHgpKrgpKTgpL_gpLDgpKrgpL_lrZDogIzmmYLgrrjgr43grrHgr4Dgrqngrr8%3D&gl=US&hl=en")
end
end
diff --git a/src/invidious.cr b/src/invidious.cr
index 958f95f7..c95c6419 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -510,16 +510,16 @@ get "/watch" do |env|
comment_html ||= ""
end
- fmt_stream = video.fmt_stream(decrypt_function)
- adaptive_fmts = video.adaptive_fmts(decrypt_function)
+ fmt_stream = video.fmt_stream
+ adaptive_fmts = video.adaptive_fmts
if params.local
- fmt_stream.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path }
- adaptive_fmts.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path }
+ fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) }
+ adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) }
end
- video_streams = video.video_streams(adaptive_fmts)
- audio_streams = video.audio_streams(adaptive_fmts)
+ video_streams = video.video_streams
+ audio_streams = video.audio_streams
# Older videos may not have audio sources available.
# We redirect here so they're not unplayable
@@ -549,33 +549,23 @@ get "/watch" do |env|
aspect_ratio = "16:9"
- video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
- video.description_html = replace_links(video.description_html)
-
- host_url = make_host_url(config, Kemal.config)
-
- if video.player_response["streamingData"]?.try &.["hlsManifestUrl"]?
- hlsvp = video.player_response["streamingData"]["hlsManifestUrl"].as_s
- hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", host_url)
- end
-
thumbnail = "/vi/#{video.id}/maxres.jpg"
if params.raw
if params.listen
- url = audio_streams[0]["url"]
+ url = audio_streams[0]["url"].as_s
audio_streams.each do |fmt|
- if fmt["bitrate"] == params.quality.rchop("k")
- url = fmt["url"]
+ if fmt["bitrate"].as_i == params.quality.rchop("k").to_i
+ url = fmt["url"].as_s
end
end
else
- url = fmt_stream[0]["url"]
+ url = fmt_stream[0]["url"].as_s
fmt_stream.each do |fmt|
- if fmt["label"].split(" - ")[0] == params.quality
- url = fmt["url"]
+ if fmt["quality"].as_s == params.quality
+ url = fmt["url"].as_s
end
end
end
@@ -583,24 +573,6 @@ get "/watch" do |env|
next env.redirect url
end
- rvs = [] of Hash(String, String)
- video.info["rvs"]?.try &.split(",").each do |rv|
- rvs << HTTP::Params.parse(rv).to_h
- end
-
- rating = video.info["avg_rating"].to_f64
- if video.views > 0
- engagement = ((video.dislikes.to_f + video.likes.to_f)/video.views * 100)
- else
- engagement = 0
- end
-
- playability_status = video.player_response["playabilityStatus"]?
- if playability_status && playability_status["status"] == "LIVE_STREAM_OFFLINE" && !video.premiere_timestamp
- reason = playability_status["reason"]?.try &.as_s
- end
- reason ||= ""
-
templated "watch"
end
@@ -752,16 +724,16 @@ get "/embed/:id" do |env|
notifications.delete(id)
end
- fmt_stream = video.fmt_stream(decrypt_function)
- adaptive_fmts = video.adaptive_fmts(decrypt_function)
+ fmt_stream = video.fmt_stream
+ adaptive_fmts = video.adaptive_fmts
if params.local
- fmt_stream.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path }
- adaptive_fmts.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path }
+ fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) }
+ adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) }
end
- video_streams = video.video_streams(adaptive_fmts)
- audio_streams = video.audio_streams(adaptive_fmts)
+ video_streams = video.video_streams
+ audio_streams = video.audio_streams
if audio_streams.empty? && !video.live_now
if params.quality == "dash"
@@ -788,25 +760,13 @@ get "/embed/:id" do |env|
aspect_ratio = nil
- video.description_html = fill_links(video.description_html, "https", "www.youtube.com")
- video.description_html = replace_links(video.description_html)
-
- host_url = make_host_url(config, Kemal.config)
-
- if video.player_response["streamingData"]?.try &.["hlsManifestUrl"]?
- hlsvp = video.player_response["streamingData"]["hlsManifestUrl"].as_s
- hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", host_url)
- end
-
thumbnail = "/vi/#{video.id}/maxres.jpg"
if params.raw
- url = fmt_stream[0]["url"]
+ url = fmt_stream[0]["url"].as_s
fmt_stream.each do |fmt|
- if fmt["label"].split(" - ")[0] == params.quality
- url = fmt["url"]
- end
+ url = fmt["url"].as_s if fmt["quality"].as_s == params.quality
end
next env.redirect url
@@ -1469,7 +1429,6 @@ post "/login" do |env|
traceback = IO::Memory.new
# See https://github.com/ytdl-org/youtube-dl/blob/2019.04.07/youtube_dl/extractor/youtube.py#L82
- # TODO: Convert to QUIC
begin
client = QUIC::Client.new(LOGIN_URL)
headers = HTTP::Headers.new
@@ -2329,8 +2288,7 @@ get "/modify_notifications" do |env|
end
headers = cookies.add_request_headers(headers)
- match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/)
- if match
+ if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/)
session_token = match["session_token"]
else
next env.redirect referer
@@ -3575,14 +3533,14 @@ get "/channel/:ucid" do |env|
item.author
end
end
- items = items.select { |item| item.is_a?(SearchPlaylist) }.map { |item| item.as(SearchPlaylist) }
+ items = items.select(&.is_a?(SearchPlaylist)).map(&.as(SearchPlaylist))
items.each { |item| item.author = "" }
else
sort_options = {"newest", "oldest", "popular"}
sort_by ||= "newest"
- items, count = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by)
- items.select! { |item| !item.paid }
+ count, items = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by)
+ items.reject! &.paid
env.set "search", "channel:#{channel.ucid} "
end
@@ -5125,7 +5083,7 @@ get "/api/manifest/dash/id/:id" do |env|
next
end
- if dashmpd = video.player_response["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s
+ if dashmpd = video.dash_manifest_url
manifest = YT_POOL.client &.get(URI.parse(dashmpd).full_path).body
manifest = manifest.gsub(/[^<]+<\/BaseURL>/) do |baseurl|
@@ -5142,16 +5100,16 @@ get "/api/manifest/dash/id/:id" do |env|
next manifest
end
- adaptive_fmts = video.adaptive_fmts(decrypt_function)
+ adaptive_fmts = video.adaptive_fmts
if local
adaptive_fmts.each do |fmt|
- fmt["url"] = URI.parse(fmt["url"]).full_path
+ fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path)
end
end
- audio_streams = video.audio_streams(adaptive_fmts)
- video_streams = video.video_streams(adaptive_fmts).sort_by { |stream| {stream["size"].split("x")[0].to_i, stream["fps"].to_i} }.reverse
+ audio_streams = video.audio_streams
+ video_streams = video.video_streams.sort_by { |stream| {stream["width"].as_i, stream["fps"].as_i} }.reverse
XML.build(indent: " ", encoding: "UTF-8") do |xml|
xml.element("MPD", "xmlns": "urn:mpeg:dash:schema:mpd:2011",
@@ -5161,24 +5119,22 @@ get "/api/manifest/dash/id/:id" do |env|
i = 0
{"audio/mp4", "audio/webm"}.each do |mime_type|
- mime_streams = audio_streams.select { |stream| stream["type"].starts_with? mime_type }
- if mime_streams.empty?
- next
- end
+ mime_streams = audio_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type }
+ next if mime_streams.empty?
xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true) do
mime_streams.each do |fmt|
- codecs = fmt["type"].split("codecs=")[1].strip('"')
- bandwidth = fmt["bitrate"].to_i * 1000
- itag = fmt["itag"]
- url = fmt["url"]
+ codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"')
+ bandwidth = fmt["bitrate"].as_i
+ itag = fmt["itag"].as_i
+ url = fmt["url"].as_s
xml.element("Representation", id: fmt["itag"], codecs: codecs, bandwidth: bandwidth) do
xml.element("AudioChannelConfiguration", schemeIdUri: "urn:mpeg:dash:23003:3:audio_channel_configuration:2011",
value: "2")
xml.element("BaseURL") { xml.text url }
- xml.element("SegmentBase", indexRange: fmt["index"]) do
- xml.element("Initialization", range: fmt["init"])
+ xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do
+ xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}")
end
end
end
@@ -5187,21 +5143,24 @@ get "/api/manifest/dash/id/:id" do |env|
i += 1
end
+ potential_heights = {4320, 2160, 1440, 1080, 720, 480, 360, 240, 144}
+
{"video/mp4", "video/webm"}.each do |mime_type|
- mime_streams = video_streams.select { |stream| stream["type"].starts_with? mime_type }
+ mime_streams = video_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type }
next if mime_streams.empty?
heights = [] of Int32
xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true, scanType: "progressive") do
mime_streams.each do |fmt|
- codecs = fmt["type"].split("codecs=")[1].strip('"')
- bandwidth = fmt["bitrate"]
- itag = fmt["itag"]
- url = fmt["url"]
- width, height = fmt["size"].split("x").map { |i| i.to_i }
+ codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"')
+ bandwidth = fmt["bitrate"].as_i
+ itag = fmt["itag"].as_i
+ url = fmt["url"].as_s
+ width = fmt["width"].as_i
+ height = fmt["height"].as_i
# Resolutions reported by YouTube player (may not accurately reflect source)
- height = [4320, 2160, 1440, 1080, 720, 480, 360, 240, 144].sort_by { |i| (height - i).abs }[0]
+ height = potential_heights.min_by { |i| (height - i).abs }
next if unique_res && heights.includes? height
heights << height
@@ -5209,8 +5168,8 @@ get "/api/manifest/dash/id/:id" do |env|
startWithSAP: "1", maxPlayoutRate: "1",
bandwidth: bandwidth, frameRate: fmt["fps"]) do
xml.element("BaseURL") { xml.text url }
- xml.element("SegmentBase", indexRange: fmt["index"]) do
- xml.element("Initialization", range: fmt["init"])
+ xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do
+ xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}")
end
end
end
@@ -5224,10 +5183,10 @@ get "/api/manifest/dash/id/:id" do |env|
end
get "/api/manifest/hls_variant/*" do |env|
- manifest = YT_POOL.client &.get(env.request.path)
+ response = YT_POOL.client &.get(env.request.path)
- if manifest.status_code != 200
- env.response.status_code = manifest.status_code
+ if response.status_code != 200
+ env.response.status_code = response.status_code
next
end
@@ -5247,10 +5206,10 @@ get "/api/manifest/hls_variant/*" do |env|
end
get "/api/manifest/hls_playlist/*" do |env|
- manifest = YT_POOL.client &.get(env.request.path)
+ response = YT_POOL.client &.get(env.request.path)
- if manifest.status_code != 200
- env.response.status_code = manifest.status_code
+ if response.status_code != 200
+ env.response.status_code = response.status_code
next
end
@@ -5320,7 +5279,7 @@ get "/latest_version" do |env|
end
id ||= env.params.query["id"]?
- itag ||= env.params.query["itag"]?
+ itag ||= env.params.query["itag"]?.try &.to_i
region = env.params.query["region"]?
@@ -5335,26 +5294,16 @@ get "/latest_version" do |env|
video = get_video(id, PG_DB, region: region)
- fmt_stream = video.fmt_stream(decrypt_function)
- adaptive_fmts = video.adaptive_fmts(decrypt_function)
+ fmt = video.fmt_stream.find(nil) { |f| f["itag"].as_i == itag } || video.adaptive_fmts.find(nil) { |f| f["itag"].as_i == itag }
+ url = fmt.try &.["url"]?.try &.as_s
- urls = (fmt_stream + adaptive_fmts).select { |fmt| fmt["itag"] == itag }
- if urls.empty?
+ if !url
env.response.status_code = 404
next
- elsif urls.size > 1
- env.response.status_code = 409
- next
- end
-
- url = urls[0]["url"]
- if local
- url = URI.parse(url).full_path.not_nil!
end
- if title
- url += "&title=#{title}"
- end
+ url = URI.parse(url).full_path.not_nil! if local
+ url = "#{url}&title=#{title}" if title
env.redirect url
end
diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr
index f1a57eee..cbfa521d 100644
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@@ -232,9 +232,9 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
if auto_generated
- videos = extract_videos(nodeset)
+ videos = extract_videos_html(nodeset)
else
- videos = extract_videos(nodeset, ucid, author)
+ videos = extract_videos_html(nodeset, ucid, author)
end
end
@@ -317,9 +317,9 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
nodeset = nodeset.not_nil!
if auto_generated
- videos = extract_videos(nodeset)
+ videos = extract_videos_html(nodeset)
else
- videos = extract_videos(nodeset, ucid, author)
+ videos = extract_videos_html(nodeset, ucid, author)
end
count = nodeset.size
@@ -429,7 +429,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
if auto_generated
items = extract_shelf_items(nodeset, ucid, author)
else
- items = extract_items(nodeset, ucid, author)
+ items = extract_items_html(nodeset, ucid, author)
end
return items, continuation
@@ -584,16 +584,8 @@ def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
headers = HTTP::Headers.new
headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"]
- headers["content-type"] = "application/x-www-form-urlencoded"
- headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ=="
- headers["x-spf-previous"] = ""
- headers["x-spf-referer"] = ""
-
- headers["x-youtube-client-name"] = "1"
- headers["x-youtube-client-version"] = "2.20180719"
-
- session_token = response.body.match(/"XSRF_TOKEN":"(?[A-Za-z0-9\_\-\=]+)"/).try &.["session_token"]? || ""
+ session_token = response.body.match(/"XSRF_TOKEN":"(?[^"]+)"/).try &.["session_token"]? || ""
post_req = {
session_token: session_token,
}
@@ -633,13 +625,7 @@ def fetch_channel_community(ucid, continuation, locale, format, thin_mode)
next if !post
- if !post["contentText"]?
- content_html = ""
- else
- content_html = post["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s ||
- post["contentText"]["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || ""
- end
-
+ content_html = post["contentText"]?.try { |t| parse_content(t) } || ""
author = post["authorText"]?.try &.["simpleText"]? || ""
json.object do
@@ -960,7 +946,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
2.times do |i|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
- response = YT_POOL.client &.get(url, headers)
+ response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data
videos.concat extract_videos(initial_data.as_h)
@@ -980,7 +966,7 @@ def get_latest_videos(ucid)
document = XML.parse_html(json["content_html"].as_s)
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
- videos = extract_videos(nodeset, ucid)
+ videos = extract_videos_html(nodeset, ucid)
end
return videos
diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr
index 24564bb9..5490d2ea 100644
--- a/src/invidious/comments.cr
+++ b/src/invidious/comments.cr
@@ -59,7 +59,7 @@ end
def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, sort_by = "top")
video = get_video(id, db, region: region)
- session_token = video.info["session_token"]?
+ session_token = video.session_token
case cursor
when nil, ""
@@ -85,17 +85,9 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
session_token: session_token,
}
- headers = HTTP::Headers.new
-
- headers["content-type"] = "application/x-www-form-urlencoded"
- headers["cookie"] = video.info["cookie"]
-
- headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ=="
- headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"
- headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"
-
- headers["x-youtube-client-name"] = "1"
- headers["x-youtube-client-version"] = "2.20180719"
+ headers = HTTP::Headers{
+ "cookie" => video.cookie,
+ }
response = YT_POOL.client(region, &.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req))
response = JSON.parse(response.body)
@@ -150,8 +142,7 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
node_comment = node["commentRenderer"]
end
- content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s ||
- node_comment["contentText"]["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || ""
+ content_html = node_comment["contentText"]?.try { |t| parse_content(t) } || ""
author = node_comment["authorText"]?.try &.["simpleText"]? || ""
json.field "author", author
@@ -523,6 +514,11 @@ def fill_links(html, scheme, host)
return html.to_xml(options: XML::SaveOptions::NO_DECL)
end
+def parse_content(content : JSON::Any) : String
+ content["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s ||
+ content["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || ""
+end
+
def content_to_comment_html(content)
comment_html = content.map do |run|
text = HTML.escape(run["text"].as_s)
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index b572ee1c..7a251052 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -313,13 +313,149 @@ def html_to_content(description_html : String)
return description
end
-def extract_videos(nodeset, ucid = nil, author_name = nil)
- videos = extract_items(nodeset, ucid, author_name)
- videos.select { |item| item.is_a?(SearchVideo) }.map { |video| video.as(SearchVideo) }
+def extract_videos(initial_data : Hash(String, JSON::Any))
+ extract_items(initial_data).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
-def extract_items(nodeset, ucid = nil, author_name = nil)
- # TODO: Make this a 'common', so it makes more sense to be used here
+def extract_items(initial_data : Hash(String, JSON::Any))
+ items = [] of SearchItem
+
+ initial_data.try { |t|
+ t["contents"]? || t["response"]?
+ }.try { |t|
+ t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a[0]?.try &.["tabRenderer"]["content"] ||
+ t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
+ t["continuationContents"]?
+ }.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+ .try &.["contents"]
+ .as_a.each { |c|
+ c.try &.["itemSectionRenderer"]["contents"].as_a
+ .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || t }
+ .each { |item|
+ if i = item["videoRenderer"]?
+ video_id = i["videoId"].as_s
+ title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
+
+ author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
+
+ published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+ view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+ length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+
+ live_now = false
+ paid = false
+ premium = false
+
+ premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+
+ i["badges"]?.try &.as_a.each do |badge|
+ b = badge["metadataBadgeRenderer"]
+ case b["label"].as_s
+ when "LIVE NOW"
+ live_now = true
+ when "New", "4K", "CC"
+ # TODO
+ when "Premium"
+ paid = true
+
+ # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
+ premium = true
+ else nil # Ignore
+ end
+ end
+
+ items << SearchVideo.new(
+ title: title,
+ id: video_id,
+ author: author,
+ ucid: author_id,
+ published: published,
+ views: view_count,
+ description_html: description_html,
+ length_seconds: length_seconds,
+ live_now: live_now,
+ paid: paid,
+ premium: premium,
+ premiere_timestamp: premiere_timestamp
+ )
+ elsif i = item["channelRenderer"]?
+ author = i["title"]["simpleText"]?.try &.as_s || ""
+ author_id = i["channelId"]?.try &.as_s || ""
+
+ author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
+ subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+
+ auto_generated = false
+ auto_generated = true if !i["videoCountText"]?
+ video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+
+ items << SearchChannel.new(
+ author: author,
+ ucid: author_id,
+ author_thumbnail: author_thumbnail,
+ subscriber_count: subscriber_count,
+ video_count: video_count,
+ description_html: description_html,
+ auto_generated: auto_generated,
+ )
+ elsif i = item["playlistRenderer"]?
+ title = i["title"]["simpleText"]?.try &.as_s || ""
+ plid = i["playlistId"]?.try &.as_s || ""
+
+ video_count = i["videoCount"]?.try &.as_s.to_i || 0
+ playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+
+ author_info = i["shortBylineText"]["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
+
+ videos = i["videos"]?.try &.as_a.map do |v|
+ v = v["childVideoRenderer"]
+ v_title = v["title"]["simpleText"]?.try &.as_s || ""
+ v_id = v["videoId"]?.try &.as_s || ""
+ v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+ SearchPlaylistVideo.new(
+ title: v_title,
+ id: v_id,
+ length_seconds: v_length_seconds
+ )
+ end || [] of SearchPlaylistVideo
+
+ # TODO: i["publishedTimeText"]?
+
+ items << SearchPlaylist.new(
+ title: title,
+ id: plid,
+ author: author,
+ ucid: author_id,
+ video_count: video_count,
+ videos: videos,
+ thumbnail: playlist_thumbnail
+ )
+ elsif i = item["radioRenderer"]? # Mix
+ # TODO
+ elsif i = item["showRenderer"]? # Show
+ # TODO
+ elsif i = item["shelfRenderer"]?
+ elsif i = item["horizontalCardListRenderer"]?
+ elsif i = item["searchPyvRenderer"]? # Ad
+ end
+ }
+ }
+
+ items
+end
+
+def extract_videos_html(nodeset, ucid = nil, author_name = nil)
+ extract_items_html(nodeset, ucid, author_name).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
+end
+
+def extract_items_html(nodeset, ucid = nil, author_name = nil)
+ # TODO: Make this a 'CommonItem', so it makes more sense to be used here
items = [] of SearchItem
nodeset.each do |node|
@@ -456,7 +592,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil)
paid = true
end
- premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64
+ premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64?
if premiere_timestamp
premiere_timestamp = Time.unix(premiere_timestamp)
end
@@ -683,12 +819,12 @@ def check_table(db, logger, table_name, struct_type = nil)
return if column_array.size <= struct_array.size
- # column_array.each do |column|
- # if !struct_array.includes? column
- # logger.puts("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
- # db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
- # end
- # end
+ column_array.each do |column|
+ if !struct_array.includes? column
+ logger.puts("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
+ db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE")
+ end
+ end
end
class PG::ResultSet
@@ -864,12 +1000,12 @@ def create_notification_stream(env, topics, connection_channel)
end
end
-def extract_initial_data(body)
- initial_data = body.match(/window\["ytInitialData"\] = (?.*?);\n/).try &.["info"] || "{}"
+def extract_initial_data(body) : Hash(String, JSON::Any)
+ initial_data = body.match(/window\["ytInitialData"\]\s*=\s*(?.*?);+\n/).try &.["info"] || "{}"
if initial_data.starts_with?("JSON.parse(\"")
- return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s)
+ return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s).as_h
else
- return JSON.parse(initial_data)
+ return JSON.parse(initial_data).as_h
end
end
diff --git a/src/invidious/helpers/jobs.cr b/src/invidious/helpers/jobs.cr
index a9aee064..e3d7b520 100644
--- a/src/invidious/helpers/jobs.cr
+++ b/src/invidious/helpers/jobs.cr
@@ -201,7 +201,7 @@ end
def bypass_captcha(captcha_key, logger)
loop do
begin
- {"/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")}.each do |path|
+ {"/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")}.each do |path|
response = YT_POOL.client &.get(path)
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.")
html = XML.parse_html(response.body)
diff --git a/src/invidious/helpers/signatures.cr b/src/invidious/helpers/signatures.cr
index 0aaacd04..5eabb91b 100644
--- a/src/invidious/helpers/signatures.cr
+++ b/src/invidious/helpers/signatures.cr
@@ -1,8 +1,8 @@
alias SigProc = Proc(Array(String), Int32, Array(String))
def fetch_decrypt_function(id = "CvFH_6DNRCY")
- document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
- url = document.match(/src="(?.*player_ias[^\/]+\/en_US\/base.js)"/).not_nil!["url"]
+ document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en").body
+ url = document.match(/src="(?\/yts\/jsbin\/player_ias-[^\/]+\/en_US\/base.js)"/).not_nil!["url"]
player = YT_POOL.client &.get(url).body
function_name = player.match(/^(?[^=]+)=function\(\w\){\w=\w\.split\(""\);[^\. ]+\.[^( ]+/m).not_nil!["name"]
diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr
index a39a0b16..a51f15ce 100644
--- a/src/invidious/helpers/utils.cr
+++ b/src/invidious/helpers/utils.cr
@@ -8,7 +8,7 @@ def add_yt_headers(request)
request.headers["accept-language"] ||= "en-us,en;q=0.5"
return if request.resource.starts_with? "/sorry/index"
request.headers["x-youtube-client-name"] ||= "1"
- request.headers["x-youtube-client-version"] ||= "1.20180719"
+ request.headers["x-youtube-client-version"] ||= "2.20200609"
if !CONFIG.cookies.empty?
request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}"
end
diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr
index 04a37b87..6c01d78b 100644
--- a/src/invidious/mixes.cr
+++ b/src/invidious/mixes.cr
@@ -20,7 +20,6 @@ end
def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
headers = HTTP::Headers.new
- headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
if cookies
headers = cookies.add_request_headers(headers)
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index 7a88f316..b4bd6226 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -96,6 +96,10 @@ struct SearchVideo
end
end
+ def is_upcoming
+ premiere_timestamp ? true : false
+ end
+
db_mapping({
title: String,
id: String,
@@ -227,61 +231,35 @@ end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
def channel_search(query, page, channel)
- response = YT_POOL.client &.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US")
- document = XML.parse_html(response.body)
- canonical = document.xpath_node(%q(//link[@rel="canonical"]))
-
- if !canonical
- response = YT_POOL.client &.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US")
- document = XML.parse_html(response.body)
- canonical = document.xpath_node(%q(//link[@rel="canonical"]))
- end
-
- if !canonical
- response = YT_POOL.client &.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US")
- document = XML.parse_html(response.body)
- canonical = document.xpath_node(%q(//link[@rel="canonical"]))
- end
+ response = YT_POOL.client &.get("/channel/#{channel}?hl=en&gl=US")
+ response = YT_POOL.client &.get("/user/#{channel}?hl=en&gl=US") if response.headers["location"]?
+ response = YT_POOL.client &.get("/c/#{channel}?hl=en&gl=US") if response.headers["location"]?
- if !canonical
- return 0, [] of SearchItem
- end
+ ucid = response.body.match(/\\"channelId\\":\\"(?[^\\]+)\\"/).try &.["ucid"]?
- ucid = canonical["href"].split("/")[-1]
+ return 0, [] of SearchItem if !ucid
url = produce_channel_search_url(ucid, query, page)
response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ return 0, [] of SearchItem if !initial_data
+ items = extract_items(initial_data.as_h)
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-
- count = nodeset.size
- items = extract_items(nodeset)
- else
- count = 0
- items = [] of SearchItem
- end
-
- return count, items
+ return items.size, items
end
def search(query, page = 1, search_params = produce_search_params(content_type: "all"), region = nil)
- if query.empty?
- return {0, [] of SearchItem}
- end
+ return 0, [] of SearchItem if query.empty?
- html = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body)
- if html.empty?
- return {0, [] of SearchItem}
- end
+ body = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en").body)
+ return 0, [] of SearchItem if body.empty?
- html = XML.parse_html(html)
- nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
- items = extract_items(nodeset)
+ initial_data = extract_initial_data(body)
+ items = extract_items(initial_data)
- return {nodeset.size, items}
+ # initial_data["estimatedResults"]?.try &.as_s.to_i64
+
+ return items.size, items
end
def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "",
@@ -387,12 +365,9 @@ def produce_channel_search_url(ucid, query, page)
"2:string" => ucid,
"3:base64" => {
"2:string" => "search",
- "6:varint" => 2_i64,
"7:varint" => 1_i64,
- "12:varint" => 1_i64,
- "13:string" => "",
- "23:varint" => 0_i64,
"15:string" => "#{page}",
+ "23:varint" => 0_i64,
},
"11:string" => query,
},
diff --git a/src/invidious/trending.cr b/src/invidious/trending.cr
index 017c42f5..8d078387 100644
--- a/src/invidious/trending.cr
+++ b/src/invidious/trending.cr
@@ -1,7 +1,4 @@
def fetch_trending(trending_type, region, locale)
- headers = HTTP::Headers.new
- headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
-
region ||= "US"
region = region.upcase
@@ -11,7 +8,7 @@ def fetch_trending(trending_type, region, locale)
if trending_type && trending_type != "Default"
trending_type = trending_type.downcase.capitalize
- response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en", headers).body
+ response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body
initial_data = extract_initial_data(response)
@@ -21,31 +18,28 @@ def fetch_trending(trending_type, region, locale)
if url
url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
url = url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
- url += "&disable_polymer=1&gl=#{region}&hl=en"
+ url = "#{url}&gl=#{region}&hl=en"
trending = YT_POOL.client &.get(url).body
plid = extract_plid(url)
else
- trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
+ trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body
end
else
- trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
+ trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body
end
- trending = XML.parse_html(trending)
- nodeset = trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"]))
- trending = extract_videos(nodeset)
+ initial_data = extract_initial_data(trending)
+ trending = extract_videos(initial_data)
return {trending, plid}
end
def extract_plid(url)
- plid = URI.parse(url)
- .try { |i| HTTP::Params.parse(i.query.not_nil!)["bp"] }
+ return url.try { |i| URI.parse(i).query }
+ .try { |i| HTTP::Params.parse(i)["bp"] }
.try { |i| URI.decode_www_form(i) }
.try { |i| Base64.decode(i) }
.try { |i| IO::Memory.new(i) }
.try { |i| Protodec::Any.parse(i) }
- .try { |i| i["44:0:embedded"]["2:1:string"].as_s }
-
- return plid
+ .try &.["44:0:embedded"]?.try &.["2:1:string"]?.try &.as_s
end
diff --git a/src/invidious/users.cr b/src/invidious/users.cr
index 0aa94d82..ba15692c 100644
--- a/src/invidious/users.cr
+++ b/src/invidious/users.cr
@@ -267,7 +267,7 @@ def subscribe_ajax(channel_id, action, env_headers)
end
headers = cookies.add_request_headers(headers)
- if match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/)
+ if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/)
session_token = match["session_token"]
headers["content-type"] = "application/x-www-form-urlencoded"
@@ -300,7 +300,7 @@ end
# end
# headers = cookies.add_request_headers(headers)
#
-# if match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/)
+# if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/)
# session_token = match["session_token"]
#
# headers["content-type"] = "application/x-www-form-urlencoded"
diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr
index ed5847e4..f2638f14 100644
--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@@ -246,12 +246,9 @@ struct VideoPreferences
end
struct Video
- property player_json : JSON::Any?
- property recommended_json : JSON::Any?
-
- module HTTPParamConverter
+ module JSONConverter
def self.from_rs(rs)
- HTTP::Params.parse(rs.read(String))
+ JSON.parse(rs.read(String)).as_h
end
end
@@ -271,7 +268,7 @@ struct Video
generate_storyboards(json, self.id, self.storyboards)
end
- json.field "description", html_to_content(self.description_html)
+ json.field "description", self.description
json.field "descriptionHtml", self.description_html
json.field "published", self.published.to_unix
json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
@@ -310,13 +307,13 @@ struct Video
json.field "lengthSeconds", self.length_seconds
json.field "allowRatings", self.allow_ratings
- json.field "rating", self.info["avg_rating"].to_f32
+ json.field "rating", self.average_rating
json.field "isListed", self.is_listed
json.field "liveNow", self.live_now
json.field "isUpcoming", self.is_upcoming
if self.premiere_timestamp
- json.field "premiereTimestamp", self.premiere_timestamp.not_nil!.to_unix
+ json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
end
if hlsvp = self.hls_manifest_url
@@ -328,21 +325,21 @@ struct Video
json.field "adaptiveFormats" do
json.array do
- self.adaptive_fmts(decrypt_function).each do |fmt|
+ self.adaptive_fmts.each do |fmt|
json.object do
- json.field "index", fmt["index"]
- json.field "bitrate", fmt["bitrate"]
- json.field "init", fmt["init"]
+ json.field "index", "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}"
+ json.field "bitrate", fmt["bitrate"].as_i.to_s
+ json.field "init", "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}"
json.field "url", fmt["url"]
- json.field "itag", fmt["itag"]
- json.field "type", fmt["type"]
- json.field "clen", fmt["clen"]
- json.field "lmt", fmt["lmt"]
- json.field "projectionType", fmt["projection_type"]
+ json.field "itag", fmt["itag"].as_i.to_s
+ json.field "type", fmt["mimeType"]
+ json.field "clen", fmt["contentLength"]
+ json.field "lmt", fmt["lastModified"]
+ json.field "projectionType", fmt["projectionType"]
fmt_info = itag_to_metadata?(fmt["itag"])
if fmt_info
- fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.to_i || 30
+ fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.as_i || 30
json.field "fps", fps
json.field "container", fmt_info["ext"]
json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"]
@@ -368,16 +365,16 @@ struct Video
json.field "formatStreams" do
json.array do
- self.fmt_stream(decrypt_function).each do |fmt|
+ self.fmt_stream.each do |fmt|
json.object do
json.field "url", fmt["url"]
- json.field "itag", fmt["itag"]
- json.field "type", fmt["type"]
+ json.field "itag", fmt["itag"].as_i.to_s
+ json.field "type", fmt["mimeType"]
json.field "quality", fmt["quality"]
fmt_info = itag_to_metadata?(fmt["itag"])
if fmt_info
- fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.to_i || 30
+ fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.as_i || 30
json.field "fps", fps
json.field "container", fmt_info["ext"]
json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"]
@@ -415,9 +412,7 @@ struct Video
json.field "recommendedVideos" do
json.array do
- self.info["rvs"]?.try &.split(",").each do |rv|
- rv = HTTP::Params.parse(rv)
-
+ self.related_videos.each do |rv|
if rv["id"]?
json.object do
json.field "videoId", rv["id"]
@@ -436,7 +431,7 @@ struct Video
qualities.each do |quality|
json.object do
- json.field "url", rv["author_thumbnail"].gsub(/s\d+-/, "s#{quality}-")
+ json.field "url", rv["author_thumbnail"]?.try &.gsub(/s\d+-/, "s#{quality}-")
json.field "width", quality
json.field "height", quality
end
@@ -445,9 +440,9 @@ struct Video
end
end
- json.field "lengthSeconds", rv["length_seconds"].to_i
- json.field "viewCountText", rv["short_view_count_text"]
- json.field "viewCount", rv["view_count"]?.try &.to_i64
+ json.field "lengthSeconds", rv["length_seconds"]?.try &.to_i
+ json.field "viewCountText", rv["short_view_count_text"]?
+ json.field "viewCount", rv["view_count"]?.try &.empty? ? nil : rv["view_count"].to_i64
end
end
end
@@ -466,256 +461,150 @@ struct Video
end
end
- # `description_html` is stored in DB as `description`, which can be
- # quite confusing. Since it currently isn't very practical to rename
- # it, we instead define a getter and setter here.
- def description_html
- self.description
+ def title
+ info["videoDetails"]["title"]?.try &.as_s || ""
end
- def description_html=(other : String)
- self.description = other
+ def ucid
+ info["videoDetails"]["channelId"]?.try &.as_s || ""
end
- def allow_ratings
- allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool
-
- if allow_ratings.nil?
- return true
- end
+ def author
+ info["videoDetails"]["author"]?.try &.as_s || ""
+ end
- return allow_ratings
+ def length_seconds : Int32
+ info["microformat"]?.try &.["playerMicroformatRenderer"]?.try &.["lengthSeconds"]?.try &.as_s.to_i ||
+ info["videoDetails"]["lengthSeconds"]?.try &.as_s.to_i || 0
end
- def live_now
- live_now = player_response["videoDetails"]?.try &.["isLive"]?.try &.as_bool
+ def views : Int64
+ info["videoDetails"]["viewCount"]?.try &.as_s.to_i64 || 0_i64
+ end
- if live_now.nil?
- return false
- end
+ def likes : Int64
+ info["likes"]?.try &.as_i64 || 0_i64
+ end
- return live_now
+ def dislikes : Int64
+ info["dislikes"]?.try &.as_i64 || 0_i64
end
- def is_listed
- is_listed = player_response["videoDetails"]?.try &.["isCrawlable"]?.try &.as_bool
+ def average_rating : Float64
+ # (likes / (likes + dislikes) * 4 + 1)
+ info["videoDetails"]["averageRating"]?.try { |t| t.as_f? || t.as_i64?.try &.to_f64 }.try &.round(4) || 0.0
+ end
- if is_listed.nil?
- return true
- end
+ def published : Time
+ info["microformat"]?.try &.["playerMicroformatRenderer"]?.try &.["publishDate"]?.try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location.local) } || Time.local
+ end
- return is_listed
+ def published=(other : Time)
+ info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d"))
end
- def is_upcoming
- is_upcoming = player_response["videoDetails"]?.try &.["isUpcoming"]?.try &.as_bool
+ def cookie
+ info["cookie"]?.try &.as_h.map { |k, v| "#{k}=#{v}" }.join("; ") || ""
+ end
- if is_upcoming.nil?
- return false
- end
+ def allow_ratings
+ r = info["videoDetails"]["allowRatings"]?.try &.as_bool
+ r.nil? ? false : r
+ end
- return is_upcoming
+ def live_now
+ info["videoDetails"]["isLiveContent"]?.try &.as_bool || false
end
- def premiere_timestamp
- if self.is_upcoming
- premiere_timestamp = player_response["playabilityStatus"]?
- .try &.["liveStreamability"]?
- .try &.["liveStreamabilityRenderer"]?
- .try &.["offlineSlate"]?
- .try &.["liveStreamOfflineSlateRenderer"]?
- .try &.["scheduledStartTime"]?.try &.as_s.to_i64
- end
+ def is_listed
+ info["videoDetails"]["isCrawlable"]?.try &.as_bool || false
+ end
- if premiere_timestamp
- premiere_timestamp = Time.unix(premiere_timestamp)
- end
+ def is_upcoming
+ info["videoDetails"]["isUpcoming"]?.try &.as_bool || false
+ end
- return premiere_timestamp
+ def premiere_timestamp : Time?
+ info["microformat"]?.try &.["playerMicroformatRenderer"]?
+ .try &.["liveBroadcastDetails"]?.try &.["startTimestamp"]?.try { |t| Time.parse_rfc3339(t.as_s) }
end
def keywords
- keywords = player_response["videoDetails"]?.try &.["keywords"]?.try &.as_a
- keywords ||= [] of String
-
- return keywords
+ info["videoDetails"]["keywords"]?.try &.as_a.map &.as_s || [] of String
end
- def fmt_stream(decrypt_function)
- streams = [] of HTTP::Params
-
- if fmt_streams = player_response["streamingData"]?.try &.["formats"]?
- fmt_streams.as_a.each do |fmt_stream|
- if !fmt_stream.as_h?
- next
- end
-
- fmt = {} of String => String
-
- fmt["lmt"] = fmt_stream["lastModified"]?.try &.as_s || "0"
- fmt["projection_type"] = "1"
- fmt["type"] = fmt_stream["mimeType"].as_s
- fmt["clen"] = fmt_stream["contentLength"]?.try &.as_s || "0"
- fmt["bitrate"] = fmt_stream["bitrate"]?.try &.as_i.to_s || "0"
- fmt["itag"] = fmt_stream["itag"].as_i.to_s
- if fmt_stream["url"]?
- fmt["url"] = fmt_stream["url"].as_s
- end
- if cipher = fmt_stream["cipher"]? || fmt_stream["signatureCipher"]?
- HTTP::Params.parse(cipher.as_s).each do |key, value|
- fmt[key] = value
- end
- end
- fmt["quality"] = fmt_stream["quality"].as_s
-
- if fmt_stream["width"]?
- fmt["size"] = "#{fmt_stream["width"]}x#{fmt_stream["height"]}"
- fmt["height"] = fmt_stream["height"].as_i.to_s
- end
-
- if fmt_stream["fps"]?
- fmt["fps"] = fmt_stream["fps"].as_i.to_s
- end
+ def related_videos
+ info["relatedVideos"]?.try &.as_a.map { |h| h.as_h.transform_values &.as_s } || [] of Hash(String, String)
+ end
- if fmt_stream["qualityLabel"]?
- fmt["quality_label"] = fmt_stream["qualityLabel"].as_s
- end
+ def allowed_regions
+ info["microformat"]?.try &.["playerMicroformatRenderer"]?
+ .try &.["availableCountries"]?.try &.as_a.map &.as_s || [] of String
+ end
- params = HTTP::Params.new
- fmt.each do |key, value|
- params[key] = value
- end
+ def author_thumbnail : String
+ info["authorThumbnail"]?.try &.as_s || ""
+ end
- streams << params
- end
+ def sub_count_text : String
+ info["subCountText"]?.try &.as_s || "-"
+ end
- streams.sort_by! { |stream| stream["height"].to_i }.reverse!
- elsif fmt_stream = self.info["url_encoded_fmt_stream_map"]?
- fmt_stream.split(",").each do |string|
- if !string.empty?
- streams << HTTP::Params.parse(string)
+ def fmt_stream
+ return @fmt_stream.as(Array(Hash(String, JSON::Any))) if @fmt_stream
+ fmt_stream = info["streamingData"]?.try &.["formats"]?.try &.as_a.map &.as_h || [] of Hash(String, JSON::Any)
+ fmt_stream.each do |fmt|
+ if s = (fmt["cipher"]? || fmt["signatureCipher"]?).try { |h| HTTP::Params.parse(h.as_s) }
+ s.each do |k, v|
+ fmt[k] = JSON::Any.new(v)
end
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}#{decrypt_signature(fmt)}")
end
- end
-
- streams.each { |s| s.add("label", "#{s["quality"]} - #{s["type"].split(";")[0].split("/")[1]}") }
- streams = streams.uniq { |s| s["label"] }
- if self.info["region"]?
- streams.each do |fmt|
- fmt["url"] += "®ion=" + self.info["region"]
- end
- end
-
- streams.each do |fmt|
- fmt["url"] += "&host=" + (URI.parse(fmt["url"]).host || "")
- fmt["url"] += decrypt_signature(fmt, decrypt_function)
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}&host=#{URI.parse(fmt["url"].as_s).host}")
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}®ion=#{self.info["region"]}") if self.info["region"]?
end
-
- return streams
+ fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 }
+ @fmt_stream = fmt_stream
+ return @fmt_stream.as(Array(Hash(String, JSON::Any)))
end
- def adaptive_fmts(decrypt_function)
- adaptive_fmts = [] of HTTP::Params
-
- if fmts = player_response["streamingData"]?.try &.["adaptiveFormats"]?
- fmts.as_a.each do |adaptive_fmt|
- next if !adaptive_fmt.as_h?
- fmt = {} of String => String
-
- if init = adaptive_fmt["initRange"]?
- fmt["init"] = "#{init["start"]}-#{init["end"]}"
+ def adaptive_fmts
+ return @adaptive_fmts.as(Array(Hash(String, JSON::Any))) if @adaptive_fmts
+ fmt_stream = info["streamingData"]?.try &.["adaptiveFormats"]?.try &.as_a.map &.as_h || [] of Hash(String, JSON::Any)
+ fmt_stream.each do |fmt|
+ if s = (fmt["cipher"]? || fmt["signatureCipher"]?).try { |h| HTTP::Params.parse(h.as_s) }
+ s.each do |k, v|
+ fmt[k] = JSON::Any.new(v)
end
- fmt["init"] ||= "0-0"
-
- fmt["lmt"] = adaptive_fmt["lastModified"]?.try &.as_s || "0"
- fmt["projection_type"] = "1"
- fmt["type"] = adaptive_fmt["mimeType"].as_s
- fmt["clen"] = adaptive_fmt["contentLength"]?.try &.as_s || "0"
- fmt["bitrate"] = adaptive_fmt["bitrate"]?.try &.as_i.to_s || "0"
- fmt["itag"] = adaptive_fmt["itag"].as_i.to_s
- if adaptive_fmt["url"]?
- fmt["url"] = adaptive_fmt["url"].as_s
- end
- if cipher = adaptive_fmt["cipher"]? || adaptive_fmt["signatureCipher"]?
- HTTP::Params.parse(cipher.as_s).each do |key, value|
- fmt[key] = value
- end
- end
- if index = adaptive_fmt["indexRange"]?
- fmt["index"] = "#{index["start"]}-#{index["end"]}"
- end
- fmt["index"] ||= "0-0"
-
- if adaptive_fmt["width"]?
- fmt["size"] = "#{adaptive_fmt["width"]}x#{adaptive_fmt["height"]}"
- end
-
- if adaptive_fmt["fps"]?
- fmt["fps"] = adaptive_fmt["fps"].as_i.to_s
- end
-
- if adaptive_fmt["qualityLabel"]?
- fmt["quality_label"] = adaptive_fmt["qualityLabel"].as_s
- end
-
- params = HTTP::Params.new
- fmt.each do |key, value|
- params[key] = value
- end
-
- adaptive_fmts << params
- end
- elsif fmts = self.info["adaptive_fmts"]?
- fmts.split(",") do |string|
- adaptive_fmts << HTTP::Params.parse(string)
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}#{decrypt_signature(fmt)}")
end
- end
-
- if self.info["region"]?
- adaptive_fmts.each do |fmt|
- fmt["url"] += "®ion=" + self.info["region"]
- end
- end
- adaptive_fmts.each do |fmt|
- fmt["url"] += "&host=" + (URI.parse(fmt["url"]).host || "")
- fmt["url"] += decrypt_signature(fmt, decrypt_function)
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}&host=#{URI.parse(fmt["url"].as_s).host}")
+ fmt["url"] = JSON::Any.new("#{fmt["url"]}®ion=#{self.info["region"]}") if self.info["region"]?
end
-
- return adaptive_fmts
+ fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 }
+ @adaptive_fmts = fmt_stream
+ return @adaptive_fmts.as(Array(Hash(String, JSON::Any)))
end
- def video_streams(adaptive_fmts)
- video_streams = adaptive_fmts.select { |s| s["type"].starts_with? "video" }
-
- return video_streams
+ def video_streams
+ adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("video")
end
- def audio_streams(adaptive_fmts)
- audio_streams = adaptive_fmts.select { |s| s["type"].starts_with? "audio" }
- audio_streams.sort_by! { |s| s["bitrate"].to_i }.reverse!
- audio_streams.each do |stream|
- stream["bitrate"] = (stream["bitrate"].to_f64/1000).to_i.to_s
- end
-
- return audio_streams
- end
-
- def player_response
- @player_json = JSON.parse(@info["player_response"]) if !@player_json
- @player_json.not_nil!
+ def audio_streams
+ adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("audio")
end
def storyboards
- storyboards = player_response["storyboards"]?
+ storyboards = info["storyboards"]?
.try &.as_h
.try &.["playerStoryboardSpecRenderer"]?
.try &.["spec"]?
.try &.as_s.split("|")
if !storyboards
- if storyboard = player_response["storyboards"]?
+ if storyboard = info["storyboards"]?
.try &.as_h
.try &.["playerLiveStoryboardSpecRenderer"]?
.try &.["spec"]?
@@ -743,9 +632,7 @@ struct Video
storyboard_height: Int32,
storyboard_count: Int32)
- if !storyboards
- return items
- end
+ return items if !storyboards
url = URI.parse(storyboards.shift)
params = HTTP::Params.parse(url.query || "")
@@ -779,82 +666,98 @@ struct Video
end
def paid
- reason = player_response["playabilityStatus"]?.try &.["reason"]?
+ reason = info["playabilityStatus"]?.try &.["reason"]?
paid = reason == "This video requires payment to watch." ? true : false
-
- return paid
+ paid
end
def premium
- if info["premium"]?
- self.info["premium"] == "true"
- else
- false
+ keywords.includes? "YouTube Red"
+ end
+
+ def captions : Array(Caption)
+ return @captions.as(Array(Caption)) if @captions
+ captions = info["captions"]?.try &.["playerCaptionsTracklistRenderer"]?.try &.["captionTracks"]?.try &.as_a.map do |caption|
+ caption = Caption.from_json(caption.to_json)
+ caption.name.simpleText = caption.name.simpleText.split(" - ")[0]
+ caption
end
+ captions ||= [] of Caption
+ @captions = captions
+ return @captions.as(Array(Caption))
end
- def captions
- captions = [] of Caption
- if player_response["captions"]?
- caption_list = player_response["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]?.try &.as_a
- caption_list ||= [] of JSON::Any
+ def description
+ description = info["microformat"]?.try &.["playerMicroformatRenderer"]?
+ .try &.["description"]?.try &.["simpleText"]?.try &.as_s || ""
+ end
- caption_list.each do |caption|
- caption = Caption.from_json(caption.to_json)
- caption.name.simpleText = caption.name.simpleText.split(" - ")[0]
- captions << caption
- end
- end
+ # TODO
+ def description=(value : String)
+ @description = value
+ end
- return captions
+ def description_html
+ info["descriptionHtml"]?.try &.as_s || ""
+ end
+
+ def description_html=(value : String)
+ info["descriptionHtml"] = JSON::Any.new(value)
end
def short_description
- short_description = self.description_html.gsub(/(
)|(
|"|\n)/, {
- "
": " ",
- "
": " ",
- "\"": """,
- "\n": " ",
- })
- short_description = XML.parse_html(short_description).content[0..200].strip(" ")
-
- if short_description.empty?
- short_description = " "
- end
+ info["shortDescription"]?.try &.as_s || ""
+ end
+
+ def hls_manifest_url : String?
+ info["streamingData"]?.try &.["hlsManifestUrl"]?.try &.as_s
+ end
+
+ def dash_manifest_url
+ info["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s
+ end
+
+ def genre : String
+ info["genre"]?.try &.as_s || ""
+ end
+
+ def genre_url : String
+ info["genreUcid"]? ? "/channel/#{info["genreUcid"]}" : ""
+ end
+
+ def license : String?
+ info["license"]?.try &.as_s
+ end
+
+ def is_family_friendly : Bool
+ info["microformat"]?.try &.["playerMicroformatRenderer"]["isFamilySafe"]?.try &.as_bool || false
+ end
+
+ def wilson_score : Float64
+ ci_lower_bound(likes, likes + dislikes).round(4)
+ end
- return short_description
+ def engagement : Float64
+ ((likes + dislikes) / views).round(4)
end
- def length_seconds
- player_response["videoDetails"]["lengthSeconds"].as_s.to_i
+ def reason : String?
+ info["reason"]?.try &.as_s
+ end
+
+ def session_token : String?
+ info["sessionToken"]?.try &.as_s?
end
db_mapping({
- id: String,
- info: {
- type: HTTP::Params,
- default: HTTP::Params.parse(""),
- converter: Video::HTTPParamConverter,
- },
- updated: Time,
- title: String,
- views: Int64,
- likes: Int32,
- dislikes: Int32,
- wilson_score: Float64,
- published: Time,
- description: String,
- language: String?,
- author: String,
- ucid: String,
- allowed_regions: Array(String),
- is_family_friendly: Bool,
- genre: String,
- genre_url: String,
- license: String,
- sub_count_text: String,
- author_thumbnail: String,
+ id: String,
+ info: {type: Hash(String, JSON::Any), converter: Video::JSONConverter},
+ updated: Time,
})
+
+ @captions : Array(Caption)?
+ @adaptive_fmts : Array(Hash(String, JSON::Any))?
+ @fmt_stream : Array(Hash(String, JSON::Any))?
end
struct Caption
@@ -878,121 +781,64 @@ class VideoRedirect < Exception
end
end
-def get_video(id, db, refresh = true, region = nil, force_refresh = false)
- if (video = db.query_one?("SELECT * FROM videos WHERE id = $1", id, as: Video)) && !region
- # If record was last updated over 10 minutes ago, or video has since premiered,
- # refresh (expire param in response lasts for 6 hours)
- if (refresh &&
- (Time.utc - video.updated > 10.minutes) ||
- (video.premiere_timestamp && video.premiere_timestamp.as(Time) < Time.utc)) ||
- force_refresh
- begin
- video = fetch_video(id, region)
- video_array = video.to_a
-
- args = arg_array(video_array[1..-1], 2)
-
- db.exec("UPDATE videos SET (info,updated,title,views,likes,dislikes,wilson_score,\
- published,description,language,author,ucid,allowed_regions,is_family_friendly,\
- genre,genre_url,license,sub_count_text,author_thumbnail)\
- = (#{args}) WHERE id = $1", args: video_array)
- rescue ex
- db.exec("DELETE FROM videos * WHERE id = $1", id)
- raise ex
- end
- end
- else
- video = fetch_video(id, region)
- video_array = video.to_a
-
- args = arg_array(video_array)
-
- if !region
- db.exec("INSERT INTO videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", args: video_array)
- end
- end
-
- return video
+def parse_related(r : JSON::Any) : JSON::Any?
+ # TODO: r["endScreenPlaylistRenderer"], etc.
+ return if !r["endScreenVideoRenderer"]?
+ r = r["endScreenVideoRenderer"].as_h
+
+ return if !r["lengthInSeconds"]?
+
+ rv = {} of String => JSON::Any
+ rv["author"] = r["shortBylineText"]["runs"][0]?.try &.["text"] || JSON::Any.new("")
+ rv["ucid"] = r["shortBylineText"]["runs"][0]?.try &.["navigationEndpoint"]["browseEndpoint"]["browseId"] || JSON::Any.new("")
+ rv["author_url"] = JSON::Any.new("/channel/#{rv["ucid"]}")
+ rv["length_seconds"] = JSON::Any.new(r["lengthInSeconds"].as_i.to_s)
+ rv["title"] = r["title"]["simpleText"]
+ rv["short_view_count_text"] = JSON::Any.new(r["shortViewCountText"]?.try &.["simpleText"]?.try &.as_s || "")
+ rv["view_count"] = JSON::Any.new(r["title"]["accessibility"]?.try &.["accessibilityData"]["label"].as_s.match(/(?[1-9](\d+,?)*) views/).try &.["views"].gsub(/\D/, "") || "")
+ rv["id"] = r["videoId"]
+ JSON::Any.new(rv)
end
-def extract_recommended(recommended_videos)
- rvs = [] of HTTP::Params
-
- recommended_videos.try &.each do |compact_renderer|
- if compact_renderer["compactRadioRenderer"]? || compact_renderer["compactPlaylistRenderer"]?
- # TODO
- elsif video_renderer = compact_renderer["compactVideoRenderer"]?
- recommended_video = HTTP::Params.new
- recommended_video["id"] = video_renderer["videoId"].as_s
- recommended_video["title"] = video_renderer["title"]["simpleText"].as_s
-
- next if !video_renderer["shortBylineText"]?
-
- recommended_video["author"] = video_renderer["shortBylineText"]["runs"].as_a[0]["text"].as_s
- recommended_video["ucid"] = video_renderer["shortBylineText"]["runs"].as_a[0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s
- recommended_video["author_thumbnail"] = video_renderer["channelThumbnail"]["thumbnails"][0]["url"].as_s
-
- if view_count = video_renderer["viewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"][0]?.try &.["text"].as_s }.try &.delete(", views watching").to_i64?.try &.to_s
- recommended_video["view_count"] = view_count
- recommended_video["short_view_count_text"] = "#{number_to_short_text(view_count.to_i64)} views"
- end
- recommended_video["length_seconds"] = decode_length_seconds(video_renderer["lengthText"]?.try &.["simpleText"]?.try &.as_s || "0:00").to_s
-
- rvs << recommended_video
- end
+def extract_polymer_config(body)
+ params = {} of String => JSON::Any
+ player_response = body.match(/window\["ytInitialPlayerResponse"\]\s*=\s*(?.*?);\n/)
+ .try { |r| JSON.parse(r["info"]).as_h }
+
+ if body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
+ body.includes?("https://www.google.com/sorry/index")
+ params["reason"] = JSON::Any.new("Could not extract video info. Instance is likely blocked.")
+ elsif !player_response
+ params["reason"] = JSON::Any.new("Video unavailable.")
+ elsif player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK"
+ reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s| s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("") } ||
+ player_response["playabilityStatus"]["reason"].as_s
+ params["reason"] = JSON::Any.new(reason)
end
- rvs
-end
-
-def extract_polymer_config(body, html)
- params = HTTP::Params.new
+ params["sessionToken"] = JSON::Any.new(body.match(/"XSRF_TOKEN":"(?[^"]+)"/).try &.["session_token"]?)
+ params["shortDescription"] = JSON::Any.new(body.match(/"og:description" content="(?[^"]+)"/).try &.["description"]?)
- params["session_token"] = body.match(/"XSRF_TOKEN":"(?[A-Za-z0-9\_\-\=]+)"/).try &.["session_token"] || ""
+ return params if !player_response
- html_info = JSON.parse(body.match(/ytplayer\.config = (?.*?);ytplayer\.load/).try &.["info"] || "{}").try &.["args"]?.try &.as_h
-
- if html_info
- html_info.each do |key, value|
- params[key] = value.to_s
- end
+ {"captions", "microformat", "playabilityStatus", "storyboards", "videoDetails"}.each do |f|
+ params[f] = player_response[f] if player_response[f]?
end
- initial_data = extract_initial_data(body)
-
- primary_results = initial_data["contents"]?
- .try &.["twoColumnWatchNextResults"]?
- .try &.["results"]?
- .try &.["results"]?
- .try &.["contents"]?
+ yt_initial_data = body.match(/window\["ytInitialData"\]\s*=\s*(?.*?);\n/)
+ .try { |r| JSON.parse(r["info"]).as_h }
- comment_continuation = primary_results.try &.as_a.select { |object| object["itemSectionRenderer"]? }[0]?
- .try &.["itemSectionRenderer"]?
- .try &.["continuations"]?
- .try &.[0]?
- .try &.["nextContinuationData"]?
-
- params["ctoken"] = comment_continuation.try &.["continuation"]?.try &.as_s || ""
- params["itct"] = comment_continuation.try &.["clickTrackingParams"]?.try &.as_s || ""
-
- rvs = initial_data["contents"]?
- .try &.["twoColumnWatchNextResults"]?
- .try &.["secondaryResults"]?
- .try &.["secondaryResults"]?
- .try &.["results"]?
- .try &.as_a
-
- params["rvs"] = extract_recommended(rvs).join(",")
-
- # TODO: Watching now
- params["views"] = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]?
- .try &.["videoPrimaryInfoRenderer"]?
- .try &.["viewCount"]?
- .try &.["videoViewCountRenderer"]?
- .try &.["viewCount"]?
- .try &.["simpleText"]?
- .try &.as_s.gsub(/\D/, "").to_i64.to_s || "0"
+ params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]?
+ .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r|
+ parse_related r
+ }.try { |a| JSON::Any.new(a) } || yt_initial_data.try &.["webWatchNextResponseExtensionData"]?.try &.["relatedVideoArgs"]?
+ .try &.as_s.split(",").map { |r|
+ r = HTTP::Params.parse(r).to_h
+ JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) }))
+ }.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any)
+ primary_results = yt_initial_data.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]?
+ .try &.["results"]?.try &.["contents"]?
sentiment_bar = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]?
.try &.["videoPrimaryInfoRenderer"]?
.try &.["sentimentBar"]?
@@ -1000,34 +846,13 @@ def extract_polymer_config(body, html)
.try &.["tooltip"]?
.try &.as_s
- likes, dislikes = sentiment_bar.try &.split(" / ").map { |a| a.delete(", ").to_i32 }[0, 2] || {0, 0}
-
- params["likes"] = "#{likes}"
- params["dislikes"] = "#{dislikes}"
-
- published = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]?
- .try &.["videoSecondaryInfoRenderer"]?
- .try &.["dateText"]?
- .try &.["simpleText"]?
- .try &.as_s.split(" ")[-3..-1].join(" ")
-
- if published
- params["published"] = Time.parse(published, "%b %-d, %Y", Time::Location.local).to_unix.to_s
- else
- params["published"] = Time.utc(1990, 1, 1).to_unix.to_s
- end
-
- params["description_html"] = ""
+ likes, dislikes = sentiment_bar.try &.split(" / ", 2).map &.gsub(/\D/, "").to_i64 || {0_i64, 0_i64}
+ params["likes"] = JSON::Any.new(likes)
+ params["dislikes"] = JSON::Any.new(dislikes)
- description_html = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]?
- .try &.["videoSecondaryInfoRenderer"]?
- .try &.["description"]?
- .try &.["runs"]?
- .try &.as_a
-
- if description_html
- params["description_html"] = content_to_comment_html(description_html)
- end
+ params["descriptionHtml"] = JSON::Any.new(primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]?
+ .try &.["videoSecondaryInfoRenderer"]?.try &.["description"]?.try &.["runs"]?
+ .try &.as_a.try { |t| content_to_comment_html(t).gsub("\n", "
") } || "")
metadata = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]?
.try &.["videoSecondaryInfoRenderer"]?
@@ -1036,10 +861,6 @@ def extract_polymer_config(body, html)
.try &.["rows"]?
.try &.as_a
- params["genre"] = ""
- params["genre_ucid"] = ""
- params["license"] = ""
-
metadata.try &.each do |row|
title = row["metadataRowRenderer"]?.try &.["title"]?.try &.["simpleText"]?.try &.as_s
contents = row["metadataRowRenderer"]?
@@ -1050,220 +871,125 @@ def extract_polymer_config(body, html)
contents = contents.try &.["runs"]?
.try &.as_a[0]?
- params["genre"] = contents.try &.["text"]?
- .try &.as_s || ""
- params["genre_ucid"] = contents.try &.["navigationEndpoint"]?
- .try &.["browseEndpoint"]?
- .try &.["browseId"]?.try &.as_s || ""
+ params["genre"] = JSON::Any.new(contents.try &.["text"]?.try &.as_s || "")
+ params["genreUcid"] = JSON::Any.new(contents.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]?
+ .try &.["browseId"]?.try &.as_s || "")
elsif title.try &.== "License"
contents = contents.try &.["runs"]?
.try &.as_a[0]?
- params["license"] = contents.try &.["text"]?
- .try &.as_s || ""
+ params["license"] = JSON::Any.new(contents.try &.["text"]?.try &.as_s || "")
elsif title.try &.== "Licensed to YouTube by"
- params["license"] = contents.try &.["simpleText"]?
- .try &.as_s || ""
+ params["license"] = JSON::Any.new(contents.try &.["simpleText"]?.try &.as_s || "")
end
end
author_info = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]?
- .try &.["videoSecondaryInfoRenderer"]?
- .try &.["owner"]?
- .try &.["videoOwnerRenderer"]?
+ .try &.["videoSecondaryInfoRenderer"]?.try &.["owner"]?.try &.["videoOwnerRenderer"]?
- params["author_thumbnail"] = author_info.try &.["thumbnail"]?
- .try &.["thumbnails"]?
- .try &.as_a[0]?
- .try &.["url"]?
- .try &.as_s || ""
+ params["authorThumbnail"] = JSON::Any.new(author_info.try &.["thumbnail"]?
+ .try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"]?
+ .try &.as_s || "")
- params["sub_count_text"] = author_info.try &.["subscriberCountText"]?
- .try &.["simpleText"]?
- .try &.as_s.gsub(/\D/, "") || "0"
+ params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]?
+ .try { |t| t["simpleText"]? || t["runs"]?.try &.[0]?.try &.["text"]? }.try &.as_s.split(" ", 2)[0] || "-")
- return params
-end
+ initial_data = body.match(/ytplayer\.config\s*=\s*(?.*?);ytplayer\.web_player_context_config/)
+ .try { |r| JSON.parse(r["info"]) }.try &.["args"]["player_response"]?
+ .try &.as_s?.try &.try { |r| JSON.parse(r).as_h }
-def extract_player_config(body, html)
- params = HTTP::Params.new
+ return params if !initial_data
- if md = body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/)
- params["session_token"] = md["session_token"]
+ {"playabilityStatus", "streamingData"}.each do |f|
+ params[f] = initial_data[f] if initial_data[f]?
end
- if md = body.match(/'RELATED_PLAYER_ARGS': (?.*?),\n/)
- recommended_json = JSON.parse(md["json"])
- rvs_params = recommended_json["rvs"].as_s.split(",").map { |params| HTTP::Params.parse(params) }
-
- if watch_next_response = recommended_json["watch_next_response"]?
- watch_next_json = JSON.parse(watch_next_response.as_s)
- rvs = watch_next_json["contents"]?
- .try &.["twoColumnWatchNextResults"]?
- .try &.["secondaryResults"]?
- .try &.["secondaryResults"]?
- .try &.["results"]?
- .try &.as_a
-
- rvs = extract_recommended(rvs).compact_map do |rv|
- if !rv["short_view_count_text"]?
- rv_params = rvs_params.select { |rv_params| rv_params["id"]? == (rv["id"]? || "") }[0]?
+ params
+end
- if rv_params.try &.["short_view_count_text"]?
- rv["short_view_count_text"] = rv_params.not_nil!["short_view_count_text"]
- rv
- else
- nil
- end
- else
- rv
- end
+def get_video(id, db, refresh = true, region = nil, force_refresh = false)
+ if (video = db.query_one?("SELECT * FROM videos WHERE id = $1", id, as: Video)) && !region
+ # If record was last updated over 10 minutes ago, or video has since premiered,
+ # refresh (expire param in response lasts for 6 hours)
+ if (refresh &&
+ (Time.utc - video.updated > 10.minutes) ||
+ (video.premiere_timestamp.try &.< Time.utc)) ||
+ force_refresh
+ begin
+ video = fetch_video(id, region)
+ db.exec("UPDATE videos SET (id, info, updated) = ($1, $2, $3) WHERE id = $1", video.id, video.info.to_json, video.updated)
+ rescue ex
+ db.exec("DELETE FROM videos * WHERE id = $1", id)
+ raise ex
end
- params["rvs"] = (rvs.map &.to_s).join(",")
- end
- end
-
- html_info = body.match(/ytplayer\.config = (?.*?);ytplayer\.load/).try &.["info"]
-
- if html_info
- JSON.parse(html_info)["args"].as_h.each do |key, value|
- params[key] = value.to_s
end
else
- error_message = html.xpath_node(%q(//h1[@id="unavailable-message"]))
- if error_message
- params["reason"] = error_message.content.strip
- elsif body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
- body.includes?("https://www.google.com/sorry/index")
- params["reason"] = "Could not extract video info. Instance is likely blocked."
- else
- params["reason"] = "Video unavailable."
+ video = fetch_video(id, region)
+ if !region
+ db.exec("INSERT INTO videos VALUES ($1, $2, $3) ON CONFLICT (id) DO NOTHING", video.id, video.info.to_json, video.updated)
end
end
- return params
+ return video
end
def fetch_video(id, region)
- response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
+ response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999"))
if md = response.headers["location"]?.try &.match(/v=(?[a-zA-Z0-9_-]{11})/)
raise VideoRedirect.new(video_id: md["id"])
end
- html = XML.parse_html(response.body)
- info = extract_player_config(response.body, html)
- info["cookie"] = response.cookies.to_h.map { |name, cookie| "#{name}=#{cookie.value}" }.join("; ")
-
- allowed_regions = html.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).try &.["content"].split(",")
- if !allowed_regions || allowed_regions == [""]
- allowed_regions = [] of String
- end
+ info = extract_polymer_config(response.body)
+ info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) })
+ allowed_regions = info["microformat"]?.try &.["playerMicroformatRenderer"]["availableCountries"]?.try &.as_a.map &.as_s || [] of String
# Check for region-blocks
- if info["reason"]? && info["reason"].includes?("your country")
+ if info["reason"]?.try &.as_s.includes?("your country")
bypass_regions = PROXY_LIST.keys & allowed_regions
if !bypass_regions.empty?
region = bypass_regions[rand(bypass_regions.size)]
- response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
-
- html = XML.parse_html(response.body)
- info = extract_player_config(response.body, html)
+ response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999"))
- info["region"] = region if region
- info["cookie"] = response.cookies.to_h.map { |name, cookie| "#{name}=#{cookie.value}" }.join("; ")
+ region_info = extract_polymer_config(response.body)
+ region_info["region"] = JSON::Any.new(region) if region
+ region_info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) })
+ info = region_info if !region_info["reason"]?
end
end
# Try to pull streams from embed URL
if info["reason"]?
embed_page = YT_POOL.client &.get("/embed/#{id}").body
- sts = embed_page.match(/"sts"\s*:\s*(?\d+)/).try &.["sts"]?
- sts ||= ""
- embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body)
+ sts = embed_page.match(/"sts"\s*:\s*(?\d+)/).try &.["sts"]? || ""
+ embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?html5=1&video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&sts=#{sts}").body)
- if !embed_info["reason"]?
- embed_info.each do |key, value|
- info[key] = value.to_s
+ if embed_info["player_response"]?
+ player_response = JSON.parse(embed_info["player_response"])
+ {"captions", "microformat", "playabilityStatus", "streamingData", "videoDetails", "storyboards"}.each do |f|
+ info[f] = player_response[f] if player_response[f]?
end
- else
- raise info["reason"]
end
- end
-
- if info["reason"]? && !info["player_response"]?
- raise info["reason"]
- end
- player_json = JSON.parse(info["player_response"])
- if reason = player_json["playabilityStatus"]?.try &.["reason"]?.try &.as_s
- raise reason
- end
-
- title = player_json["videoDetails"]["title"].as_s
- author = player_json["videoDetails"]["author"]?.try &.as_s || ""
- ucid = player_json["videoDetails"]["channelId"]?.try &.as_s || ""
-
- info["premium"] = html.xpath_node(%q(.//span[text()="Premium"])) ? "true" : "false"
-
- views = html.xpath_node(%q(//meta[@itemprop="interactionCount"]))
- .try &.["content"].to_i64? || 0_i64
-
- likes = html.xpath_node(%q(//button[@title="I like this"]/span))
- .try &.content.delete(",").try &.to_i? || 0
-
- dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span))
- .try &.content.delete(",").try &.to_i? || 0
-
- avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1)
- avg_rating = avg_rating.nan? ? 0.0 : avg_rating
- info["avg_rating"] = "#{avg_rating}"
+ initial_data = JSON.parse(embed_info["watch_next_response"]) if embed_info["watch_next_response"]?
- description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || ""
- wilson_score = ci_lower_bound(likes, likes + dislikes)
-
- published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"]
- published ||= Time.utc.to_s("%Y-%m-%d")
- published = Time.parse(published, "%Y-%m-%d", Time::Location.local)
-
- is_family_friendly = html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).try &.["content"] == "True"
- is_family_friendly ||= true
-
- genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"]
- genre ||= ""
-
- genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]?
- genre_url ||= ""
-
- # YouTube provides invalid URLs for some genres, so we fix that here
- case genre
- when "Comedy"
- genre_url = "/channel/UCQZ43c4dAA9eXCQuXWu9aTw"
- when "Education"
- genre_url = "/channel/UCdxpofrI-dO6oYfsqHDHphw"
- when "Gaming"
- genre_url = "/channel/UCOpNcN46UbXVtpKMrmU4Abg"
- when "Movies"
- genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
- when "Nonprofits & Activism"
- genre_url = "/channel/UCfFyYRYslvuhwMDnx6KjUvw"
- when "Trailers"
- genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g"
- else nil # Ignore
+ info["relatedVideos"] = initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]?
+ .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r|
+ parse_related r
+ }.try { |a| JSON::Any.new(a) } || embed_info["rvs"]?.try &.split(",").map { |r|
+ r = HTTP::Params.parse(r).to_h
+ JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) }))
+ }.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any)
end
- license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || ""
- sub_count_text = html.xpath_node(%q(//span[contains(@class, "subscriber-count")])).try &.["title"]? || "0"
- author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]?.try &.gsub(/^\/\//, "https://") || ""
-
- video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html,
- nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail)
+ raise info["reason"]?.try &.as_s || "" if !info["videoDetails"]?
+ video = Video.new(id, info, Time.utc)
return video
end
-def itag_to_metadata?(itag : String)
- return VIDEO_FORMATS[itag]?
+def itag_to_metadata?(itag : JSON::Any)
+ return VIDEO_FORMATS[itag.to_s]?
end
def process_continuation(db, query, plid, id)
diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr
index e9baba2c..0c19fc1b 100644
--- a/src/invidious/views/components/item.ecr
+++ b/src/invidious/views/components/item.ecr
@@ -85,7 +85,7 @@
- <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp && item.premiere_timestamp.not_nil! > Time.utc %>
+ <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp.try &.> Time.utc %>
<%= translate(locale, "Premieres in `x`", recode_date((item.premiere_timestamp.as(Time) - Time.utc).ago, locale)) %>
<% elsif Time.utc - item.published > 1.minute %>
<%= translate(locale, "Shared `x` ago", recode_date(item.published, locale)) %>
@@ -144,7 +144,7 @@
- <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp && item.premiere_timestamp.not_nil! > Time.utc %>
+ <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp.try &.> Time.utc %>
<%= translate(locale, "Premieres in `x`", recode_date((item.premiere_timestamp.as(Time) - Time.utc).ago, locale)) %>
<% elsif Time.utc - item.published > 1.minute %>
<%= translate(locale, "Shared `x` ago", recode_date(item.published, locale)) %>
diff --git a/src/invidious/views/components/player.ecr b/src/invidious/views/components/player.ecr
index 3c30f69e..6b01d25f 100644
--- a/src/invidious/views/components/player.ecr
+++ b/src/invidious/views/components/player.ecr
@@ -3,23 +3,23 @@
<% if params.autoplay %>autoplay<% end %>
<% if params.video_loop %>loop<% end %>
<% if params.controls %>controls<% end %>>
- <% if hlsvp && !CONFIG.disabled?("livestreams") %>
-