Implement "fields" parameter from the YouTube Data API (#429)
* Implement fields handlingpull/466/head
parent
1b78001201
commit
a7723e6ded
@ -0,0 +1,248 @@
|
|||||||
|
module JSONFilter
|
||||||
|
alias BracketIndex = Hash(Int64, Int64)
|
||||||
|
|
||||||
|
alias GroupedFieldsValue = String | Array(GroupedFieldsValue)
|
||||||
|
alias GroupedFieldsList = Array(GroupedFieldsValue)
|
||||||
|
|
||||||
|
class FieldsParser
|
||||||
|
class ParseError < Exception
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the `Regex` pattern used to match nest groups
|
||||||
|
def self.nest_group_pattern : Regex
|
||||||
|
# uses a '.' character to match json keys as they are allowed
|
||||||
|
# to contain any unicode codepoint
|
||||||
|
/(?:|,)(?<groupname>[^,\n]*?)\(/
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns the `Regex` pattern used to check if there are any empty nest groups
|
||||||
|
def self.unnamed_nest_group_pattern : Regex
|
||||||
|
/^\(|\(\(|\/\(/
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parse_fields(fields_text : String) : Nil
|
||||||
|
if fields_text.empty?
|
||||||
|
raise FieldsParser::ParseError.new "Fields is empty"
|
||||||
|
end
|
||||||
|
|
||||||
|
opening_bracket_count = fields_text.count('(')
|
||||||
|
closing_bracket_count = fields_text.count(')')
|
||||||
|
|
||||||
|
if opening_bracket_count != closing_bracket_count
|
||||||
|
bracket_type = opening_bracket_count > closing_bracket_count ? "opening" : "closing"
|
||||||
|
raise FieldsParser::ParseError.new "There are too many #{bracket_type} brackets (#{opening_bracket_count}:#{closing_bracket_count})"
|
||||||
|
elsif match_result = unnamed_nest_group_pattern.match(fields_text)
|
||||||
|
raise FieldsParser::ParseError.new "Unnamed nest group at position #{match_result.begin}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# first, handle top-level single nested properties: items/id, playlistItems/snippet, etc
|
||||||
|
parse_single_nests(fields_text) { |nest_list| yield nest_list }
|
||||||
|
|
||||||
|
# next, handle nest groups: items(id, etag, etc)
|
||||||
|
parse_nest_groups(fields_text) { |nest_list| yield nest_list }
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parse_single_nests(fields_text : String) : Nil
|
||||||
|
single_nests = remove_nest_groups(fields_text)
|
||||||
|
|
||||||
|
if !single_nests.empty?
|
||||||
|
property_nests = single_nests.split(',')
|
||||||
|
|
||||||
|
property_nests.each do |nest|
|
||||||
|
nest_list = nest.split('/')
|
||||||
|
if nest_list.includes? ""
|
||||||
|
raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list}"
|
||||||
|
end
|
||||||
|
yield nest_list
|
||||||
|
end
|
||||||
|
# else
|
||||||
|
# raise FieldsParser::ParseError.new "Empty key in nest list 22: #{fields_text} | #{single_nests}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parse_nest_groups(fields_text : String) : Nil
|
||||||
|
nest_stack = [] of NamedTuple(group_name: String, closing_bracket_index: Int64)
|
||||||
|
bracket_pairs = get_bracket_pairs(fields_text, true)
|
||||||
|
|
||||||
|
text_index = 0
|
||||||
|
regex_index = 0
|
||||||
|
|
||||||
|
while regex_result = self.nest_group_pattern.match(fields_text, regex_index)
|
||||||
|
raw_match = regex_result[0]
|
||||||
|
group_name = regex_result["groupname"]
|
||||||
|
|
||||||
|
text_index = regex_result.begin
|
||||||
|
regex_index = regex_result.end
|
||||||
|
|
||||||
|
if text_index.nil? || regex_index.nil?
|
||||||
|
raise FieldsParser::ParseError.new "Received invalid index while parsing nest groups: text_index: #{text_index} | regex_index: #{regex_index}"
|
||||||
|
end
|
||||||
|
|
||||||
|
offset = raw_match.starts_with?(',') ? 1 : 0
|
||||||
|
|
||||||
|
opening_bracket_index = (text_index + group_name.size) + offset
|
||||||
|
closing_bracket_index = bracket_pairs[opening_bracket_index]
|
||||||
|
content_start = opening_bracket_index + 1
|
||||||
|
|
||||||
|
content = fields_text[content_start...closing_bracket_index]
|
||||||
|
|
||||||
|
if content.empty?
|
||||||
|
raise FieldsParser::ParseError.new "Empty nest group at position #{content_start}"
|
||||||
|
else
|
||||||
|
content = remove_nest_groups(content)
|
||||||
|
end
|
||||||
|
|
||||||
|
while nest_stack.size > 0 && closing_bracket_index > nest_stack[nest_stack.size - 1][:closing_bracket_index]
|
||||||
|
if nest_stack.size
|
||||||
|
nest_stack.pop
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
group_name.split('/').each do |group_name|
|
||||||
|
nest_stack.push({
|
||||||
|
group_name: group_name,
|
||||||
|
closing_bracket_index: closing_bracket_index,
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
if !content.empty?
|
||||||
|
properties = content.split(',')
|
||||||
|
|
||||||
|
properties.each do |prop|
|
||||||
|
nest_list = nest_stack.map { |nest_prop| nest_prop[:group_name] }
|
||||||
|
|
||||||
|
if !prop.empty?
|
||||||
|
if prop.includes?('/')
|
||||||
|
parse_single_nests(prop) { |list| nest_list += list }
|
||||||
|
else
|
||||||
|
nest_list.push prop
|
||||||
|
end
|
||||||
|
else
|
||||||
|
raise FieldsParser::ParseError.new "Empty key in nest list: #{nest_list << prop}"
|
||||||
|
end
|
||||||
|
|
||||||
|
yield nest_list
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.remove_nest_groups(text : String) : String
|
||||||
|
content_bracket_pairs = get_bracket_pairs(text, false)
|
||||||
|
|
||||||
|
content_bracket_pairs.each_key.to_a.reverse.each do |opening_bracket|
|
||||||
|
closing_bracket = content_bracket_pairs[opening_bracket]
|
||||||
|
last_comma = text.rindex(',', opening_bracket) || 0
|
||||||
|
|
||||||
|
text = text[0...last_comma] + text[closing_bracket + 1...text.size]
|
||||||
|
end
|
||||||
|
|
||||||
|
return text.starts_with?(',') ? text[1...text.size] : text
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.get_bracket_pairs(text : String, recursive = true) : BracketIndex
|
||||||
|
istart = [] of Int64
|
||||||
|
bracket_index = BracketIndex.new
|
||||||
|
|
||||||
|
text.each_char_with_index do |char, index|
|
||||||
|
if char == '('
|
||||||
|
istart.push(index.to_i64)
|
||||||
|
end
|
||||||
|
|
||||||
|
if char == ')'
|
||||||
|
begin
|
||||||
|
opening = istart.pop
|
||||||
|
if recursive || (!recursive && istart.size == 0)
|
||||||
|
bracket_index[opening] = index.to_i64
|
||||||
|
end
|
||||||
|
rescue
|
||||||
|
raise FieldsParser::ParseError.new "No matching opening parenthesis at: #{index}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if istart.size != 0
|
||||||
|
idx = istart.pop
|
||||||
|
raise FieldsParser::ParseError.new "No matching closing parenthesis at: #{idx}"
|
||||||
|
end
|
||||||
|
|
||||||
|
return bracket_index
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class FieldsGrouper
|
||||||
|
alias SkeletonValue = Hash(String, SkeletonValue)
|
||||||
|
|
||||||
|
def self.create_json_skeleton(fields_text : String) : SkeletonValue
|
||||||
|
root_hash = {} of String => SkeletonValue
|
||||||
|
|
||||||
|
FieldsParser.parse_fields(fields_text) do |nest_list|
|
||||||
|
current_item = root_hash
|
||||||
|
nest_list.each do |key|
|
||||||
|
if current_item[key]?
|
||||||
|
current_item = current_item[key]
|
||||||
|
else
|
||||||
|
current_item[key] = {} of String => SkeletonValue
|
||||||
|
current_item = current_item[key]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
root_hash
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.create_grouped_fields_list(json_skeleton : SkeletonValue) : GroupedFieldsList
|
||||||
|
grouped_fields_list = GroupedFieldsList.new
|
||||||
|
json_skeleton.each do |key, value|
|
||||||
|
grouped_fields_list.push key
|
||||||
|
|
||||||
|
nested_keys = create_grouped_fields_list(value)
|
||||||
|
grouped_fields_list.push nested_keys unless nested_keys.empty?
|
||||||
|
end
|
||||||
|
return grouped_fields_list
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class FilterError < Exception
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.filter(item : JSON::Any, fields_text : String, in_place : Bool = true)
|
||||||
|
skeleton = FieldsGrouper.create_json_skeleton(fields_text)
|
||||||
|
grouped_fields_list = FieldsGrouper.create_grouped_fields_list(skeleton)
|
||||||
|
filter(item, grouped_fields_list, in_place)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.filter(item : JSON::Any, grouped_fields_list : GroupedFieldsList, in_place : Bool = true) : JSON::Any
|
||||||
|
item = item.clone unless in_place
|
||||||
|
|
||||||
|
if !item.as_h? && !item.as_a?
|
||||||
|
raise FilterError.new "Can't filter '#{item}' by #{grouped_fields_list}"
|
||||||
|
end
|
||||||
|
|
||||||
|
top_level_keys = Array(String).new
|
||||||
|
grouped_fields_list.each do |value|
|
||||||
|
if value.is_a? String
|
||||||
|
top_level_keys.push value
|
||||||
|
elsif value.is_a? Array
|
||||||
|
if !top_level_keys.empty?
|
||||||
|
key_to_filter = top_level_keys.last
|
||||||
|
|
||||||
|
if item.as_h?
|
||||||
|
filter(item[key_to_filter], value, in_place: true)
|
||||||
|
elsif item.as_a?
|
||||||
|
item.as_a.each { |arr_item| filter(arr_item[key_to_filter], value, in_place: true) }
|
||||||
|
end
|
||||||
|
else
|
||||||
|
raise FilterError.new "Tried to filter while top level keys list is empty"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if item.as_h?
|
||||||
|
item.as_h.select! top_level_keys
|
||||||
|
elsif item.as_a?
|
||||||
|
item.as_a.map { |value| filter(value, top_level_keys, in_place: true) }
|
||||||
|
end
|
||||||
|
|
||||||
|
item
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue