Pull 'extract_videos' out into seperate function

6 years ago · 15c26d022b
parent 2f8716d97f
commit 15c26d022b
4 changed files with 157 additions and 249 deletions
--- a/src/invidious.cr
+++ b/src/invidious.cr
@ -1283,23 +1283,31 @@ get "/feed/channel/:ucid" do |env|
  if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
    rss = client.get("/feeds/videos.xml?user=#{ucid}")
    rss = XML.parse_html(rss.body)
    ucid = rss.xpath_node("//feed/channelid")
    if !ucid
      error_message = "User does not exist."
      halt env, status_code: 404, response: error_message
    end
-    next env.redirect "/channel/#{ucid}"
+    ucid = ucid.content
    next env.redirect "/feed/channel/#{ucid}"
  end
  url = produce_videos_url(ucid)
  response = client.get(url)
-  response = JSON.parse(response.body)
+  json = JSON.parse(response.body)
-  if !response["content_html"]?
+
  if json["content_html"].as_s.empty?
    if response.status_code == 500
      error_message = "This channel does not exist."
      halt env, status_code: 404, response: error_message
    else
      next ""
    end
-  content_html = response["content_html"].as_s
+  end
  content_html = json["content_html"].as_s
  document = XML.parse_html(content_html)
  channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
@ -1321,7 +1329,8 @@ get "/feed/channel/:ucid" do |env|
        xml.element("uri") { xml.text "#{host_url}/channel/#{ucid}" }
      end
-      extract_channel_videos(document, channel.author, ucid).each do |video|
+      nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
      extract_videos(nodeset).each do |video|
        xml.element("entry") do
          xml.element("id") { xml.text "yt:video:#{video.id}" }
          xml.element("yt:videoId") { xml.text video.id }
@ -1480,12 +1489,14 @@ get "/channel/:ucid" do |env|
  if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
    rss = client.get("/feeds/videos.xml?user=#{ucid}")
    rss = XML.parse_html(rss.body)
    ucid = rss.xpath_node("//feed/channelid")
    if !ucid
      error_message = "User does not exist."
      next templated "error"
    end
    ucid = ucid.content
    next env.redirect "/channel/#{ucid}"
  end
@ -1520,7 +1531,7 @@ get "/channel/:ucid" do |env|
    id = HTTP::Params.parse(href.query.not_nil!)["v"]
    title = node.content
-    videos << ChannelVideo.new(id, title, Time.now, Time.now, ucid, author)
+    videos << ChannelVideo.new(id, title, Time.now, Time.now, "", "")
  end
  templated "channel"
@ -2002,54 +2013,24 @@ get "/api/v1/trending" do |env|
  trending = XML.parse_html(trending)
  videos = JSON.build do |json|
    json.array do
-      trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"])).each do |node|
+      nodeset = trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"]))
-        anchor = node.xpath_node(%q(.//h3/a)).not_nil!
+      extract_videos(nodeset).each do |video|
        title = anchor.content
        id = anchor["href"].lchop("/watch?v=")
        anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).not_nil!
        author = anchor.content
        author_url = anchor["href"]
        metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
        if metadata.size == 0
          next
        elsif metadata.size == 1
          view_count = metadata[0].content.rchop(" watching").delete(",").to_i64
          published = Time.now
        else
          published = decode_date(metadata[0].content)
          view_count = metadata[1].content.rchop(" views")
          if view_count == "No"
            view_count = 0_i64
          else
            view_count = view_count.delete(",").to_i64
          end
        end
        description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
        description, description_html = html_to_description(description_html)
        length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
        json.object do
-          json.field "title", title
+          json.field "title", video.title
-          json.field "videoId", id
+          json.field "videoId", video.id
          json.field "videoThumbnails" do
-            generate_thumbnails(json, id)
+            generate_thumbnails(json, video.id)
          end
-          json.field "lengthSeconds", length_seconds
+          json.field "lengthSeconds", video.length_seconds
-          json.field "viewCount", view_count
+          json.field "viewCount", video.views
-          json.field "author", author
+          json.field "author", video.author
-          json.field "authorUrl", author_url
+          json.field "authorUrl", "/channel/#{video.ucid}"
-          json.field "published", published.epoch
+          json.field "published", video.published.epoch
-          json.field "description", description
+          json.field "description", video.description
-          json.field "descriptionHtml", description_html
+          json.field "descriptionHtml", video.description_html
        end
      end
    end
@ -2096,16 +2077,17 @@ get "/api/v1/channels/:ucid" do |env|
  client = make_client(YT_URL)
  if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
-    rss = client.get("/feeds/videos.xml?user=#{ucid}").body
+    rss = client.get("/feeds/videos.xml?user=#{ucid}")
-    rss = XML.parse_html(rss)
+    rss = XML.parse_html(rss.body)
    ucid = rss.xpath_node("//feed/channelid")
-    if ucid
+    if !ucid
      ucid = ucid.content
    else
      env.response.content_type = "application/json"
      next {"error" => "User does not exist"}.to_json
    end
    ucid = ucid.content
    next env.redirect "/api/v1/channels/#{ucid}"
  end
  channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
@ -2212,25 +2194,36 @@ get "/api/v1/channels/:ucid/videos" do |env|
  client = make_client(YT_URL)
  if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
-    rss = client.get("/feeds/videos.xml?user=#{ucid}").body
+    rss = client.get("/feeds/videos.xml?user=#{ucid}")
-    rss = XML.parse_html(rss)
+    rss = XML.parse_html(rss.body)
    ucid = rss.xpath_node("//feed/channelid")
-    if ucid
+    if !ucid
      ucid = ucid.content
    else
      env.response.content_type = "application/json"
      next {"error" => "User does not exist"}.to_json
    end
    ucid = ucid.content
    url = "/api/v1/channels/#{ucid}/videos"
    if env.params.query
      url += "?#{env.params.query}"
    end
    next env.redirect url
  end
  url = produce_videos_url(ucid, page)
  response = client.get(url)
  json = JSON.parse(response.body)
-  if !json["content_html"]? || json["content_html"].as_s.empty?
+  if !json["content_html"]?
    env.response.content_type = "application/json"
-    next {"error" => "No videos or nonexistent channel"}.to_json
+
    if response.status_code == 500
      response = {"Error" => "Channel does not exist"}.to_json
      halt env, status_code: 404, response: response
    else
      next Array(String).new.to_json
    end
  end
  content_html = json["content_html"].as_s
@ -2242,47 +2235,22 @@ get "/api/v1/channels/:ucid/videos" do |env|
  videos = JSON.build do |json|
    json.array do
-      document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
+      nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-        anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)).not_nil!
+      extract_videos(nodeset, ucid).each do |video|
        title = anchor.content.strip
        video_id = anchor["href"].lchop("/watch?v=")
        metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
        if metadata.size == 0
          next
        elsif metadata.size == 1
          view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
          published = Time.now
        else
          published = decode_date(metadata[0].content)
          view_count = metadata[1].content.split(" ")[0]
          if view_count == "No"
            view_count = 0_i64
          else
            view_count = view_count.delete(",").to_i64
          end
        end
        description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
        description, description_html = html_to_description(description_html)
        length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
        json.object do
-          json.field "title", title
+          json.field "title", video.title
-          json.field "videoId", video_id
+          json.field "videoId", video.id
          json.field "videoThumbnails" do
-            generate_thumbnails(json, video_id)
+            generate_thumbnails(json, video.id)
          end
-          json.field "description", description
+          json.field "description", video.description
-          json.field "descriptionHtml", description_html
+          json.field "descriptionHtml", video.description_html
-          json.field "viewCount", view_count
+          json.field "viewCount", video.views
-          json.field "published", published.epoch
+          json.field "published", video.published.epoch
-          json.field "lengthSeconds", length_seconds
+          json.field "lengthSeconds", video.length_seconds
        end
      end
    end
@ -2344,7 +2312,7 @@ get "/api/v1/search" do |env|
          json.field "description", video.description
          json.field "descriptionHtml", video.description_html
-          json.field "viewCount", video.view_count
+          json.field "viewCount", video.views
          json.field "published", video.published.epoch
          json.field "lengthSeconds", video.length_seconds
        end
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@ -130,69 +130,3 @@ def fetch_channel(ucid, client, db, pull_all_videos = true)
  return channel
 end
 def extract_channel_videos(document, author, ucid)
  channel_videos = [] of Video
  document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
    if !anchor
      next
    end
    if anchor["href"].starts_with? "https://www.googleadservices.com"
      next
    end
    title = anchor.content.strip
    id = anchor["href"].lchop("/watch?v=")
    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
    if metadata.size == 0
      next
    elsif metadata.size == 1
      view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
      published = Time.now
    else
      published = decode_date(metadata[0].content)
      view_count = metadata[1].content.split(" ")[0]
      if view_count == "No"
        view_count = 0_i64
      else
        view_count = view_count.delete(",").to_i64
      end
    end
    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
    description, description_html = html_to_description(description_html)
    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
    if length_seconds
      length_seconds = decode_length_seconds(length_seconds.content)
    else
      length_seconds = -1
    end
    info = HTTP::Params.parse("length_seconds=#{length_seconds}")
    channel_videos << Video.new(
      id,
      info,
      Time.now,
      title,
      view_count,
      0,   # Like count
      0,   # Dislike count
      0.0, # Wilson score
      published,
      description,
      "", # Language,
      author,
      ucid,
      [] of String, # Allowed regions
      true,         # Is family friendly
      ""            # Genre
    )
  end
  return channel_videos
 end
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -286,3 +286,91 @@ def html_to_description(description_html)
  return description, description_html
 end
 def extract_videos(nodeset, ucid = nil)
  # TODO: Make this a 'common', so it makes more sense to be used here
  videos = [] of SearchVideo
  nodeset.each do |node|
    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
    if !anchor
      next
    end
    if anchor["href"].starts_with? "https://www.googleadservices.com"
      next
    end
    title = anchor.content.strip
    id = anchor["href"].lchop("/watch?v=")
    if ucid
      author = ""
      author_id = ""
    else
      anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
      if !anchor
        next
      end
      author = anchor.content
      author_id = anchor["href"].split("/")[-1]
    end
    # Skip playlists
    if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
      next
    end
    # Skip movies
    if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
      next
    end
    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
    if metadata.size == 0
      next
    elsif metadata.size == 1
      if metadata[0].content.starts_with? "Starts"
        view_count = 0_i64
        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
      else
        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
        published = Time.now
      end
    else
      published = decode_date(metadata[0].content)
      view_count = metadata[1].content.split(" ")[0]
      if view_count == "No"
        view_count = 0_i64
      else
        view_count = view_count.delete(",").to_i64
      end
    end
    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
    description, description_html = html_to_description(description_html)
    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
    if length_seconds
      length_seconds = decode_length_seconds(length_seconds.content)
    else
      length_seconds = -1
    end
    videos << SearchVideo.new(
      title,
      id,
      author,
      author_id,
      published,
      view_count,
      description,
      description_html,
      length_seconds,
    )
  end
  return videos
 end
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@ -5,7 +5,7 @@ class SearchVideo
    author:           String,
    ucid:             String,
    published:        Time,
-    view_count:       Int64,
+    views:            Int64,
    description:      String,
    description_html: String,
    length_seconds:   Int32,
@ -20,90 +20,8 @@ def search(query, page = 1, search_params = build_search_params(content_type: "v
  end
  html = XML.parse_html(html)
-  videos = [] of SearchVideo
+  nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
-
+  videos = extract_videos(nodeset)
  html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
    if !anchor
      next
    end
    if anchor["href"].starts_with? "https://www.googleadservices.com"
      next
    end
    title = anchor.content.strip
    video_id = anchor["href"].lchop("/watch?v=")
    anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
    if !anchor
      next
    end
    author = anchor.content
    author_url = anchor["href"]
    ucid = author_url.split("/")[-1]
    # Skip playlists
    if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
      next
    end
    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
    if metadata.size == 0
      next
    elsif metadata.size == 1
      # Skip movies
      if metadata[0].content.includes? "·"
        next
      end
      if metadata[0].content.starts_with? "Starts"
        view_count = 0_i64
        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
      else
        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
        published = Time.now
      end
    else
      # Skip movies
      if metadata[0].content.includes? "·"
        next
      end
      published = decode_date(metadata[0].content)
      view_count = metadata[1].content.split(" ")[0]
      if view_count == "No"
        view_count = 0_i64
      else
        view_count = view_count.delete(",").to_i64
      end
    end
    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
    description, description_html = html_to_description(description_html)
    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
    if length_seconds
      length_seconds = decode_length_seconds(length_seconds.content)
    else
      length_seconds = -1
    end
    video = SearchVideo.new(
      title,
      video_id,
      author,
      ucid,
      published,
      view_count,
      description,
      description_html,
      length_seconds,
    )
    videos << video
  end
  return videos
 end