extractors: separate 'extract' and 'parse' logic
This commit is contained in:
parent
9588fcb5d1
commit
bdc51cd20f
@ -8,7 +8,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
|
|||||||
|
|
||||||
items = [] of SearchItem
|
items = [] of SearchItem
|
||||||
continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
|
continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
|
||||||
extract_item(item, author, ucid).try { |t| items << t }
|
parse_item(item, author, ucid).try { |t| items << t }
|
||||||
}
|
}
|
||||||
|
|
||||||
continuation = continuation_items.as_a.last["continuationItemRenderer"]?
|
continuation = continuation_items.as_a.last["continuationItemRenderer"]?
|
||||||
|
@ -37,7 +37,7 @@ module Invidious::Search
|
|||||||
|
|
||||||
items = [] of SearchItem
|
items = [] of SearchItem
|
||||||
continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
|
continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
|
||||||
extract_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
|
parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
|
||||||
end
|
end
|
||||||
|
|
||||||
return items
|
return items
|
||||||
|
@ -20,6 +20,8 @@ private ITEM_PARSERS = {
|
|||||||
Parsers::ReelItemRendererParser,
|
Parsers::ReelItemRendererParser,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private alias InitialData = Hash(String, JSON::Any)
|
||||||
|
|
||||||
record AuthorFallback, name : String, id : String
|
record AuthorFallback, name : String, id : String
|
||||||
|
|
||||||
# Namespace for logic relating to parsing InnerTube data into various datastructs.
|
# Namespace for logic relating to parsing InnerTube data into various datastructs.
|
||||||
@ -348,7 +350,7 @@ private module Parsers
|
|||||||
raw_contents = content_container["items"]?.try &.as_a
|
raw_contents = content_container["items"]?.try &.as_a
|
||||||
if !raw_contents.nil?
|
if !raw_contents.nil?
|
||||||
raw_contents.each do |item|
|
raw_contents.each do |item|
|
||||||
result = extract_item(item)
|
result = parse_item(item)
|
||||||
if !result.nil?
|
if !result.nil?
|
||||||
contents << result
|
contents << result
|
||||||
end
|
end
|
||||||
@ -510,7 +512,7 @@ private module Extractors
|
|||||||
# }]
|
# }]
|
||||||
#
|
#
|
||||||
module YouTubeTabs
|
module YouTubeTabs
|
||||||
def self.process(initial_data : Hash(String, JSON::Any))
|
def self.process(initial_data : InitialData)
|
||||||
if target = initial_data["twoColumnBrowseResultsRenderer"]?
|
if target = initial_data["twoColumnBrowseResultsRenderer"]?
|
||||||
self.extract(target)
|
self.extract(target)
|
||||||
end
|
end
|
||||||
@ -575,7 +577,7 @@ private module Extractors
|
|||||||
# }
|
# }
|
||||||
#
|
#
|
||||||
module SearchResults
|
module SearchResults
|
||||||
def self.process(initial_data : Hash(String, JSON::Any))
|
def self.process(initial_data : InitialData)
|
||||||
if target = initial_data["twoColumnSearchResultsRenderer"]?
|
if target = initial_data["twoColumnSearchResultsRenderer"]?
|
||||||
self.extract(target)
|
self.extract(target)
|
||||||
end
|
end
|
||||||
@ -608,8 +610,8 @@ private module Extractors
|
|||||||
# The way they are structured is too varied to be accurately written down here.
|
# The way they are structured is too varied to be accurately written down here.
|
||||||
# However, they all eventually lead to an array of parsable items after traversing
|
# However, they all eventually lead to an array of parsable items after traversing
|
||||||
# through the JSON structure.
|
# through the JSON structure.
|
||||||
module Continuation
|
module ContinuationContent
|
||||||
def self.process(initial_data : Hash(String, JSON::Any))
|
def self.process(initial_data : InitialData)
|
||||||
if target = initial_data["continuationContents"]?
|
if target = initial_data["continuationContents"]?
|
||||||
self.extract(target)
|
self.extract(target)
|
||||||
elsif target = initial_data["appendContinuationItemsAction"]?
|
elsif target = initial_data["appendContinuationItemsAction"]?
|
||||||
@ -691,8 +693,7 @@ end
|
|||||||
|
|
||||||
# Parses an item from Youtube's JSON response into a more usable structure.
|
# Parses an item from Youtube's JSON response into a more usable structure.
|
||||||
# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
|
# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
|
||||||
def extract_item(item : JSON::Any, author_fallback : String? = "",
|
def parse_item(item : JSON::Any, author_fallback : String? = "", author_id_fallback : String? = "")
|
||||||
author_id_fallback : String? = "")
|
|
||||||
# We "allow" nil values but secretly use empty strings instead. This is to save us the
|
# We "allow" nil values but secretly use empty strings instead. This is to save us the
|
||||||
# hassle of modifying every author_fallback and author_id_fallback arg usage
|
# hassle of modifying every author_fallback and author_id_fallback arg usage
|
||||||
# which is more often than not nil.
|
# which is more often than not nil.
|
||||||
@ -702,24 +703,23 @@ def extract_item(item : JSON::Any, author_fallback : String? = "",
|
|||||||
# Each parser automatically validates the data given to see if the data is
|
# Each parser automatically validates the data given to see if the data is
|
||||||
# applicable to itself. If not nil is returned and the next parser is attempted.
|
# applicable to itself. If not nil is returned and the next parser is attempted.
|
||||||
ITEM_PARSERS.each do |parser|
|
ITEM_PARSERS.each do |parser|
|
||||||
LOGGER.trace("extract_item: Attempting to parse item using \"#{parser.parser_name}\" (cycling...)")
|
LOGGER.trace("parse_item: Attempting to parse item using \"#{parser.parser_name}\" (cycling...)")
|
||||||
|
|
||||||
if result = parser.process(item, author_fallback)
|
if result = parser.process(item, author_fallback)
|
||||||
LOGGER.debug("extract_item: Successfully parsed via #{parser.parser_name}")
|
LOGGER.debug("parse_item: Successfully parsed via #{parser.parser_name}")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
else
|
else
|
||||||
LOGGER.trace("extract_item: Parser \"#{parser.parser_name}\" does not apply. Cycling to the next one...")
|
LOGGER.trace("parse_item: Parser \"#{parser.parser_name}\" does not apply. Cycling to the next one...")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parses multiple items from YouTube's initial JSON response into a more usable structure.
|
# Parses multiple items from YouTube's initial JSON response into a more usable structure.
|
||||||
# The end result is an array of SearchItem.
|
# The end result is an array of SearchItem.
|
||||||
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
|
#
|
||||||
author_id_fallback : String? = nil) : Array(SearchItem)
|
# This function yields the container so that items can be parsed separately.
|
||||||
items = [] of SearchItem
|
#
|
||||||
|
def extract_items(initial_data : InitialData, &block)
|
||||||
if unpackaged_data = initial_data["contents"]?.try &.as_h
|
if unpackaged_data = initial_data["contents"]?.try &.as_h
|
||||||
elsif unpackaged_data = initial_data["response"]?.try &.as_h
|
elsif unpackaged_data = initial_data["response"]?.try &.as_h
|
||||||
elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h
|
elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h
|
||||||
@ -727,24 +727,32 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
|
|||||||
unpackaged_data = initial_data
|
unpackaged_data = initial_data
|
||||||
end
|
end
|
||||||
|
|
||||||
# This is identical to the parser cycling of extract_item().
|
# This is identical to the parser cycling of parse_item().
|
||||||
ITEM_CONTAINER_EXTRACTOR.each do |extractor|
|
ITEM_CONTAINER_EXTRACTOR.each do |extractor|
|
||||||
LOGGER.trace("extract_items: Attempting to extract item container using \"#{extractor.extractor_name}\" (cycling...)")
|
LOGGER.trace("extract_items: Attempting to extract item container using \"#{extractor.extractor_name}\" (cycling...)")
|
||||||
|
|
||||||
if container = extractor.process(unpackaged_data)
|
if container = extractor.process(unpackaged_data)
|
||||||
LOGGER.debug("extract_items: Successfully unpacked container with \"#{extractor.extractor_name}\"")
|
LOGGER.debug("extract_items: Successfully unpacked container with \"#{extractor.extractor_name}\"")
|
||||||
# Extract items in container
|
# Extract items in container
|
||||||
container.each do |item|
|
container.each { |item| yield item }
|
||||||
if parsed_result = extract_item(item, author_fallback, author_id_fallback)
|
|
||||||
items << parsed_result
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
break
|
|
||||||
else
|
else
|
||||||
LOGGER.trace("extract_items: Extractor \"#{extractor.extractor_name}\" does not apply. Cycling to the next one...")
|
LOGGER.trace("extract_items: Extractor \"#{extractor.extractor_name}\" does not apply. Cycling to the next one...")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Wrapper using the block function above
|
||||||
|
def extract_items(
|
||||||
|
initial_data : InitialData,
|
||||||
|
author_fallback : String? = nil,
|
||||||
|
author_id_fallback : String? = nil
|
||||||
|
) : Array(SearchItem)
|
||||||
|
items = [] of SearchItem
|
||||||
|
|
||||||
|
extract_items(initial_data) do |item|
|
||||||
|
parsed = parse_item(item, author_fallback, author_id_fallback)
|
||||||
|
items << parsed if !parsed.nil?
|
||||||
|
end
|
||||||
|
|
||||||
return items
|
return items
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user