From 20879a973ce12dbd75224ed082bd9727122a9478 Mon Sep 17 00:00:00 2001 From: Ian Ker-Seymer Date: Wed, 15 Nov 2023 12:51:15 -0500 Subject: [PATCH] Memoize regexps for common character classes (#524) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This improves the performance of `Addressable::URI#encode` and `Addressable::URI#encode_component` by memoizing the default character class regexes instead of recompiling them on every call. This results in the following performance improvements: ### `Addressable::URI#encode_component` Addressable::URI#encode_component (old) 95.417k (± 1.4%) i/s - 484.347k in 5.077072s Addressable::URI#encode_component (new) 426.794k (± 1.6%) i/s - 2.134M in 5.001961s ### `Addressable::URI#encode` benchmark Addressable::URI#encode (old) 20.800k (± 1.2%) i/s - 105.213k in 5.058973s Addressable::URI#encode (new) 34.344k (± 2.3%) i/s - 174.794k in 5.092458s --- lib/addressable/uri.rb | 51 +++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/addressable/uri.rb b/lib/addressable/uri.rb index 08ee3348..2fc87eec 100644 --- a/lib/addressable/uri.rb +++ b/lib/addressable/uri.rb @@ -50,6 +50,7 @@ module CharacterClasses SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\=" RESERVED = (GEN_DELIMS + SUB_DELIMS).freeze UNRESERVED = (ALPHA + DIGIT + "\\-\\.\\_\\~").freeze + RESERVED_AND_UNRESERVED = RESERVED + UNRESERVED PCHAR = (UNRESERVED + SUB_DELIMS + "\\:\\@").freeze SCHEME = (ALPHA + DIGIT + "\\-\\+\\.").freeze HOST = (UNRESERVED + SUB_DELIMS + "\\[\\:\\]").freeze @@ -68,6 +69,18 @@ module NormalizeCharacterClasses QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)} end + module CharacterClassesRegexps + AUTHORITY = /[^#{CharacterClasses::AUTHORITY}]/ + FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/ + HOST = /[^#{CharacterClasses::HOST}]/ + PATH = /[^#{CharacterClasses::PATH}]/ + QUERY = /[^#{CharacterClasses::QUERY}]/ + RESERVED = /[^#{CharacterClasses::RESERVED}]/ + RESERVED_AND_UNRESERVED = /[^#{CharacterClasses::RESERVED_AND_UNRESERVED}]/ + SCHEME = /[^#{CharacterClasses::SCHEME}]/ + UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/ + end + SLASH = '/' EMPTY_STR = '' @@ -387,9 +400,7 @@ def self.join(*uris) # "simple/example", Addressable::URI::CharacterClasses::UNRESERVED # ) # => "simple%2Fexample" - def self.encode_component(component, character_class= - CharacterClasses::RESERVED + CharacterClasses::UNRESERVED, - upcase_encoded='') + def self.encode_component(component, character_class=CharacterClassesRegexps::RESERVED_AND_UNRESERVED, upcase_encoded='') return nil if component.nil? begin @@ -539,7 +550,7 @@ class << self # ) # => "one two%2Fthree&four" def self.normalize_component(component, character_class= - CharacterClasses::RESERVED + CharacterClasses::UNRESERVED, + CharacterClassesRegexps::RESERVED_AND_UNRESERVED, leave_encoded='') return nil if component.nil? @@ -619,15 +630,15 @@ def self.encode(uri, return_type=String) uri_object = uri.kind_of?(self) ? uri : self.parse(uri) encoded_uri = Addressable::URI.new( :scheme => self.encode_component(uri_object.scheme, - Addressable::URI::CharacterClasses::SCHEME), + Addressable::URI::CharacterClassesRegexps::SCHEME), :authority => self.encode_component(uri_object.authority, - Addressable::URI::CharacterClasses::AUTHORITY), + Addressable::URI::CharacterClassesRegexps::AUTHORITY), :path => self.encode_component(uri_object.path, - Addressable::URI::CharacterClasses::PATH), + Addressable::URI::CharacterClassesRegexps::PATH), :query => self.encode_component(uri_object.query, - Addressable::URI::CharacterClasses::QUERY), + Addressable::URI::CharacterClassesRegexps::QUERY), :fragment => self.encode_component(uri_object.fragment, - Addressable::URI::CharacterClasses::FRAGMENT) + Addressable::URI::CharacterClassesRegexps::FRAGMENT) ) if return_type == String return encoded_uri.to_s @@ -692,19 +703,19 @@ def self.normalized_encode(uri, return_type=String) end encoded_uri = Addressable::URI.new( :scheme => self.encode_component(components[:scheme], - Addressable::URI::CharacterClasses::SCHEME), + Addressable::URI::CharacterClassesRegexps::SCHEME), :user => self.encode_component(components[:user], - Addressable::URI::CharacterClasses::UNRESERVED), + Addressable::URI::CharacterClassesRegexps::UNRESERVED), :password => self.encode_component(components[:password], - Addressable::URI::CharacterClasses::UNRESERVED), + Addressable::URI::CharacterClassesRegexps::UNRESERVED), :host => components[:host], :port => components[:port], :path => self.encode_component(components[:path], - Addressable::URI::CharacterClasses::PATH), + Addressable::URI::CharacterClassesRegexps::PATH), :query => self.encode_component(components[:query], - Addressable::URI::CharacterClasses::QUERY), + Addressable::URI::CharacterClassesRegexps::QUERY), :fragment => self.encode_component(components[:fragment], - Addressable::URI::CharacterClasses::FRAGMENT) + Addressable::URI::CharacterClassesRegexps::FRAGMENT) ) if return_type == String return encoded_uri.to_s @@ -755,11 +766,11 @@ def self.form_encode(form_values, sort=false) [ self.encode_component( key.gsub(/(\r\n|\n|\r)/, "\r\n"), - CharacterClasses::UNRESERVED + CharacterClassesRegexps::UNRESERVED ).gsub("%20", "+"), self.encode_component( value.gsub(/(\r\n|\n|\r)/, "\r\n"), - CharacterClasses::UNRESERVED + CharacterClassesRegexps::UNRESERVED ).gsub("%20", "+") ] end @@ -1734,20 +1745,20 @@ def query_values=(new_query_values) buffer = "".dup new_query_values.each do |key, value| encoded_key = URI.encode_component( - key, CharacterClasses::UNRESERVED + key, CharacterClassesRegexps::UNRESERVED ) if value == nil buffer << "#{encoded_key}&" elsif value.kind_of?(Array) value.each do |sub_value| encoded_value = URI.encode_component( - sub_value, CharacterClasses::UNRESERVED + sub_value, CharacterClassesRegexps::UNRESERVED ) buffer << "#{encoded_key}=#{encoded_value}&" end else encoded_value = URI.encode_component( - value, CharacterClasses::UNRESERVED + value, CharacterClassesRegexps::UNRESERVED ) buffer << "#{encoded_key}=#{encoded_value}&" end