Skip to content

Commit

Permalink
Memoize regexps for common character classes (#524)
Browse files Browse the repository at this point in the history
This improves the performance of `Addressable::URI#encode` and 
`Addressable::URI#encode_component` by memoizing the default character class 
regexes instead of recompiling them on every call. This results in the following 
performance improvements:

### `Addressable::URI#encode_component`


    Addressable::URI#encode_component (old)
                             95.417k (± 1.4%) i/s -    484.347k in   5.077072s

    Addressable::URI#encode_component (new)
                            426.794k (± 1.6%) i/s -      2.134M in   5.001961s

### `Addressable::URI#encode` benchmark

    Addressable::URI#encode (old)
                             20.800k (± 1.2%) i/s -    105.213k in   5.058973s

    Addressable::URI#encode (new)
                             34.344k (± 2.3%) i/s -    174.794k in   5.092458s
  • Loading branch information
ianks authored Nov 15, 2023
1 parent 60feb48 commit 20879a9
Showing 1 changed file with 31 additions and 20 deletions.
51 changes: 31 additions & 20 deletions lib/addressable/uri.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ module CharacterClasses
SUB_DELIMS = "\\!\\$\\&\\'\\(\\)\\*\\+\\,\\;\\="
RESERVED = (GEN_DELIMS + SUB_DELIMS).freeze
UNRESERVED = (ALPHA + DIGIT + "\\-\\.\\_\\~").freeze
RESERVED_AND_UNRESERVED = RESERVED + UNRESERVED
PCHAR = (UNRESERVED + SUB_DELIMS + "\\:\\@").freeze
SCHEME = (ALPHA + DIGIT + "\\-\\+\\.").freeze
HOST = (UNRESERVED + SUB_DELIMS + "\\[\\:\\]").freeze
Expand All @@ -68,6 +69,18 @@ module NormalizeCharacterClasses
QUERY = %r{[^a-zA-Z0-9\-\.\_\~\!\$\'\(\)\*\+\,\=\:\@\/\?%]|%(?!2B|2b)}
end

module CharacterClassesRegexps
AUTHORITY = /[^#{CharacterClasses::AUTHORITY}]/
FRAGMENT = /[^#{CharacterClasses::FRAGMENT}]/
HOST = /[^#{CharacterClasses::HOST}]/
PATH = /[^#{CharacterClasses::PATH}]/
QUERY = /[^#{CharacterClasses::QUERY}]/
RESERVED = /[^#{CharacterClasses::RESERVED}]/
RESERVED_AND_UNRESERVED = /[^#{CharacterClasses::RESERVED_AND_UNRESERVED}]/
SCHEME = /[^#{CharacterClasses::SCHEME}]/
UNRESERVED = /[^#{CharacterClasses::UNRESERVED}]/
end

SLASH = '/'
EMPTY_STR = ''

Expand Down Expand Up @@ -387,9 +400,7 @@ def self.join(*uris)
# "simple/example", Addressable::URI::CharacterClasses::UNRESERVED
# )
# => "simple%2Fexample"
def self.encode_component(component, character_class=
CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
upcase_encoded='')
def self.encode_component(component, character_class=CharacterClassesRegexps::RESERVED_AND_UNRESERVED, upcase_encoded='')
return nil if component.nil?

begin
Expand Down Expand Up @@ -539,7 +550,7 @@ class << self
# )
# => "one two%2Fthree&four"
def self.normalize_component(component, character_class=
CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
CharacterClassesRegexps::RESERVED_AND_UNRESERVED,
leave_encoded='')
return nil if component.nil?

Expand Down Expand Up @@ -619,15 +630,15 @@ def self.encode(uri, return_type=String)
uri_object = uri.kind_of?(self) ? uri : self.parse(uri)
encoded_uri = Addressable::URI.new(
:scheme => self.encode_component(uri_object.scheme,
Addressable::URI::CharacterClasses::SCHEME),
Addressable::URI::CharacterClassesRegexps::SCHEME),
:authority => self.encode_component(uri_object.authority,
Addressable::URI::CharacterClasses::AUTHORITY),
Addressable::URI::CharacterClassesRegexps::AUTHORITY),
:path => self.encode_component(uri_object.path,
Addressable::URI::CharacterClasses::PATH),
Addressable::URI::CharacterClassesRegexps::PATH),
:query => self.encode_component(uri_object.query,
Addressable::URI::CharacterClasses::QUERY),
Addressable::URI::CharacterClassesRegexps::QUERY),
:fragment => self.encode_component(uri_object.fragment,
Addressable::URI::CharacterClasses::FRAGMENT)
Addressable::URI::CharacterClassesRegexps::FRAGMENT)
)
if return_type == String
return encoded_uri.to_s
Expand Down Expand Up @@ -692,19 +703,19 @@ def self.normalized_encode(uri, return_type=String)
end
encoded_uri = Addressable::URI.new(
:scheme => self.encode_component(components[:scheme],
Addressable::URI::CharacterClasses::SCHEME),
Addressable::URI::CharacterClassesRegexps::SCHEME),
:user => self.encode_component(components[:user],
Addressable::URI::CharacterClasses::UNRESERVED),
Addressable::URI::CharacterClassesRegexps::UNRESERVED),
:password => self.encode_component(components[:password],
Addressable::URI::CharacterClasses::UNRESERVED),
Addressable::URI::CharacterClassesRegexps::UNRESERVED),
:host => components[:host],
:port => components[:port],
:path => self.encode_component(components[:path],
Addressable::URI::CharacterClasses::PATH),
Addressable::URI::CharacterClassesRegexps::PATH),
:query => self.encode_component(components[:query],
Addressable::URI::CharacterClasses::QUERY),
Addressable::URI::CharacterClassesRegexps::QUERY),
:fragment => self.encode_component(components[:fragment],
Addressable::URI::CharacterClasses::FRAGMENT)
Addressable::URI::CharacterClassesRegexps::FRAGMENT)
)
if return_type == String
return encoded_uri.to_s
Expand Down Expand Up @@ -755,11 +766,11 @@ def self.form_encode(form_values, sort=false)
[
self.encode_component(
key.gsub(/(\r\n|\n|\r)/, "\r\n"),
CharacterClasses::UNRESERVED
CharacterClassesRegexps::UNRESERVED
).gsub("%20", "+"),
self.encode_component(
value.gsub(/(\r\n|\n|\r)/, "\r\n"),
CharacterClasses::UNRESERVED
CharacterClassesRegexps::UNRESERVED
).gsub("%20", "+")
]
end
Expand Down Expand Up @@ -1734,20 +1745,20 @@ def query_values=(new_query_values)
buffer = "".dup
new_query_values.each do |key, value|
encoded_key = URI.encode_component(
key, CharacterClasses::UNRESERVED
key, CharacterClassesRegexps::UNRESERVED
)
if value == nil
buffer << "#{encoded_key}&"
elsif value.kind_of?(Array)
value.each do |sub_value|
encoded_value = URI.encode_component(
sub_value, CharacterClasses::UNRESERVED
sub_value, CharacterClassesRegexps::UNRESERVED
)
buffer << "#{encoded_key}=#{encoded_value}&"
end
else
encoded_value = URI.encode_component(
value, CharacterClasses::UNRESERVED
value, CharacterClassesRegexps::UNRESERVED
)
buffer << "#{encoded_key}=#{encoded_value}&"
end
Expand Down

0 comments on commit 20879a9

Please sign in to comment.