Merge pull request #1490 from tirz/fix-regex_parsing
fix: ytInitialData parsing with regex
This commit is contained in:
commit
99ba9874b1
@ -775,41 +775,34 @@ def extract_channel_community_cursor(continuation)
|
|||||||
cursor
|
cursor
|
||||||
end
|
end
|
||||||
|
|
||||||
INITDATA_PREQUERY = "window[\"ytInitialData\"] = {"
|
|
||||||
|
|
||||||
def get_about_info(ucid, locale)
|
def get_about_info(ucid, locale)
|
||||||
about = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
|
result = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
|
||||||
if about.status_code != 200
|
if result.status_code != 200
|
||||||
about = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
|
result = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
|
||||||
end
|
end
|
||||||
|
|
||||||
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
|
if md = result.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
|
||||||
raise ChannelRedirect.new(channel_id: md["ucid"])
|
raise ChannelRedirect.new(channel_id: md["ucid"])
|
||||||
end
|
end
|
||||||
|
|
||||||
if about.status_code != 200
|
if result.status_code != 200
|
||||||
error_message = translate(locale, "This channel does not exist.")
|
error_message = translate(locale, "This channel does not exist.")
|
||||||
raise error_message
|
raise error_message
|
||||||
end
|
end
|
||||||
|
|
||||||
initdata_pre = about.body.index(INITDATA_PREQUERY)
|
about = XML.parse_html(result.body)
|
||||||
initdata_post = initdata_pre.nil? ? nil : about.body.index("};", initdata_pre)
|
|
||||||
if initdata_post.nil?
|
|
||||||
about = XML.parse_html(about.body)
|
|
||||||
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
|
||||||
error_message ||= translate(locale, "Could not get channel info.")
|
|
||||||
raise error_message
|
|
||||||
end
|
|
||||||
initdata_pre = initdata_pre.not_nil! + INITDATA_PREQUERY.size - 1
|
|
||||||
|
|
||||||
initdata = JSON.parse(about.body[initdata_pre, initdata_post - initdata_pre + 1])
|
|
||||||
about = XML.parse_html(about.body)
|
|
||||||
|
|
||||||
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
||||||
error_message = translate(locale, "This channel does not exist.")
|
error_message = translate(locale, "This channel does not exist.")
|
||||||
raise error_message
|
raise error_message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
initdata = extract_initial_data(result.body)
|
||||||
|
if initdata.empty?
|
||||||
|
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
||||||
|
error_message ||= translate(locale, "Could not get channel info.")
|
||||||
|
raise error_message
|
||||||
|
end
|
||||||
|
|
||||||
author = about.xpath_node(%q(//meta[@name="title"])).not_nil!["content"]
|
author = about.xpath_node(%q(//meta[@name="title"])).not_nil!["content"]
|
||||||
author_url = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"]
|
author_url = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"]
|
||||||
author_thumbnail = about.xpath_node(%q(//link[@rel="image_src"])).not_nil!["href"]
|
author_thumbnail = about.xpath_node(%q(//link[@rel="image_src"])).not_nil!["href"]
|
||||||
|
@ -598,12 +598,7 @@ def create_notification_stream(env, topics, connection_channel)
|
|||||||
end
|
end
|
||||||
|
|
||||||
def extract_initial_data(body) : Hash(String, JSON::Any)
|
def extract_initial_data(body) : Hash(String, JSON::Any)
|
||||||
initial_data = body.match(/(window\["ytInitialData"\]|var\s+ytInitialData)\s*=\s*(?<info>.*?);+\s*\n/).try &.["info"] || "{}"
|
return JSON.parse(body.match(/(window\["ytInitialData"\]|var\s*ytInitialData)\s*=\s*(JSON\.parse\(")?(?<info>\{.*?\})("\))?;/m).try &.["info"] || "{}").as_h
|
||||||
if initial_data.starts_with?("JSON.parse(\"")
|
|
||||||
return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s).as_h
|
|
||||||
else
|
|
||||||
return JSON.parse(initial_data).as_h
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def proxy_file(response, env)
|
def proxy_file(response, env)
|
||||||
|
@ -839,8 +839,7 @@ def extract_polymer_config(body)
|
|||||||
params[f] = player_response[f] if player_response[f]?
|
params[f] = player_response[f] if player_response[f]?
|
||||||
end
|
end
|
||||||
|
|
||||||
yt_initial_data = body.match(/(window\["ytInitialData"\]|var\s+ytInitialData)\s*=\s*(?<info>.*?);\s*\n/)
|
yt_initial_data = extract_initial_data(body)
|
||||||
.try { |r| JSON.parse(r["info"]).as_h }
|
|
||||||
|
|
||||||
params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]?
|
params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]?
|
||||||
.try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r|
|
.try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r|
|
||||||
|
Loading…
Reference in New Issue
Block a user