From b1fc80b79ae498f7bfd6ad7a32ebee98fabdaf4a Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Thu, 12 Sep 2019 21:09:23 -0400 Subject: [PATCH] Update sub_count extractor --- src/invidious/channels.cr | 33 ++++++++++--------------------- src/invidious/helpers/helpers.cr | 9 ++------- src/invidious/helpers/utils.cr | 2 +- src/invidious/videos.cr | 2 +- src/invidious/views/channel.ecr | 2 +- src/invidious/views/community.ecr | 2 +- src/invidious/views/playlists.ecr | 2 +- 7 files changed, 17 insertions(+), 35 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 3291efbd..d88a8f71 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -118,7 +118,7 @@ struct AboutChannel description_html: String, paid: Bool, total_views: Int64, - sub_count: Int64, + sub_count: Int32, joined: Time, is_family_friendly: Bool, allowed_regions: Array(String), @@ -951,12 +951,6 @@ def get_about_info(ucid, locale) raise error_message end - sub_count = about.xpath_node(%q(//span[contains(text(), "subscribers")])) - if sub_count - sub_count = sub_count.content.delete(", subscribers").to_i? - end - sub_count ||= 0 - author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content author_url = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!["href"] author_thumbnail = about.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"] @@ -1000,21 +994,14 @@ def get_about_info(ucid, locale) ) end - total_views = 0_i64 - sub_count = 0_i64 - - joined = Time.unix(0) - metadata = about.xpath_nodes(%q(//span[@class="about-stat"])) - metadata.each do |item| - case item.content - when .includes? "views" - total_views = item.content.gsub(/\D/, "").to_i64 - when .includes? "subscribers" - sub_count = item.content.delete("subscribers").gsub(/\D/, "").to_i64 - when .includes? "Joined" - joined = Time.parse(item.content.lchop("Joined "), "%b %-d, %Y", Time::Location.local) - end - end + joined = about.xpath_node(%q(//span[contains(., "Joined")])) + .try &.content.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0) + + total_views = about.xpath_node(%q(//span[contains(., "views")]/b)) + .try &.content.try &.gsub(/\D/, "").to_i64? || 0_i64 + + sub_count = about.xpath_node(%q(.//span[contains(@class, "subscriber-count")])) + .try &.["title"].try { |text| short_text_to_number(text) } || 0 # Auto-generated channels # https://support.google.com/youtube/answer/2579942 @@ -1026,7 +1013,7 @@ def get_about_info(ucid, locale) tabs = about.xpath_nodes(%q(//ul[@id="channel-navigation-menu"]/li/a/span)).map { |node| node.content.downcase } - return AboutChannel.new( + AboutChannel.new( ucid: ucid, author: author, auto_generated: auto_generated, diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 43ccf0c5..0ec117c1 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -415,13 +415,8 @@ def extract_items(nodeset, ucid = nil, author_name = nil) author_thumbnail ||= "" - subscriber_count_text = node.xpath_node(%q(.//span[contains(@class, "yt-subscriber-count")])).try &.["title"] - begin - subscriber_count = subscriber_count_text.try { |text| short_text_to_number(text) } - rescue ex - subscriber_count = subscriber_count_text.try &.gsub(/\D/, "").to_i? - end - subscriber_count ||= 0 + subscriber_count = node.xpath_node(%q(.//span[contains(@class, "subscriber-count")])) + .try &.["title"].try { |text| short_text_to_number(text) } || 0 video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].gsub(/\D/, "").to_i? diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 5a813486..e17d58e2 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -157,7 +157,7 @@ def number_with_separator(number) number.to_s.reverse.gsub(/(\d{3})(?=\d)/, "\\1,").reverse end -def short_text_to_number(short_text) +def short_text_to_number(short_text : String) : Int32 case short_text when .ends_with? "M" number = short_text.rstrip(" mM").to_f diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 40bcc513..829b384b 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -1262,7 +1262,7 @@ def fetch_video(id, region) end license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || "" - sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0" + sub_count_text = html.xpath_node(%q(//span[contains(@class, "subscriber-count")])).try &.["title"]? || "0" author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]?.try &.gsub(/^\/\//, "https://") || "" video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html, diff --git a/src/invidious/views/channel.ecr b/src/invidious/views/channel.ecr index 1074598d..b5eb46ea 100644 --- a/src/invidious/views/channel.ecr +++ b/src/invidious/views/channel.ecr @@ -34,7 +34,7 @@
<% ucid = channel.ucid %> <% author = channel.author %> - <% sub_count_text = channel.sub_count.format %> + <% sub_count_text = number_to_short_text(channel.sub_count) %> <%= rendered "components/subscribe_widget" %>
diff --git a/src/invidious/views/community.ecr b/src/invidious/views/community.ecr index 9d086b5d..218cc2d4 100644 --- a/src/invidious/views/community.ecr +++ b/src/invidious/views/community.ecr @@ -33,7 +33,7 @@
<% ucid = channel.ucid %> <% author = channel.author %> - <% sub_count_text = channel.sub_count.format %> + <% sub_count_text = number_to_short_text(channel.sub_count) %> <%= rendered "components/subscribe_widget" %>
diff --git a/src/invidious/views/playlists.ecr b/src/invidious/views/playlists.ecr index 400922ff..a32192b5 100644 --- a/src/invidious/views/playlists.ecr +++ b/src/invidious/views/playlists.ecr @@ -33,7 +33,7 @@
<% ucid = channel.ucid %> <% author = channel.author %> - <% sub_count_text = channel.sub_count.format %> + <% sub_count_text = number_to_short_text(channel.sub_count) %> <%= rendered "components/subscribe_widget" %>