From a0dc9c9ce4d229f82f6303c0d8f33dd6de28ec10 Mon Sep 17 00:00:00 2001 From: Jonathan Sutherland Date: Fri, 23 Nov 2018 20:22:32 -0400 Subject: [PATCH 1/2] Use unicode modifer to properly support unicode matches --- lib/slugger.ex | 4 ++-- test/slugger_test.exs | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/slugger.ex b/lib/slugger.ex index 2658066..493e142 100644 --- a/lib/slugger.ex +++ b/lib/slugger.ex @@ -39,7 +39,7 @@ defmodule Slugger do text |> handle_possessives |> replace_special_chars - |> remove_unwanted_chars(separator, ~r/([^A-Za-z0-9가-힣])+/) + |> remove_unwanted_chars(separator, ~r/([^A-Za-z0-9가-힣])+/u) end @doc """ @@ -66,7 +66,7 @@ defmodule Slugger do |> handle_possessives |> replace_special_chars |> String.downcase - |> remove_unwanted_chars(separator, ~r/([^a-z0-9가-힣])+/) + |> remove_unwanted_chars(separator, ~r/([^a-z0-9가-힣])+/u) end @spec remove_unwanted_chars(text :: String.t, separator :: char, pattern :: Regex.t) :: String.t diff --git a/test/slugger_test.exs b/test/slugger_test.exs index 628149d..22e9090 100644 --- a/test/slugger_test.exs +++ b/test/slugger_test.exs @@ -28,7 +28,7 @@ defmodule SluggerTest do assert Slugger.slugify(" A B C ") == "A-B-C" assert Slugger.slugify(" A B C ", ?_) == "A_B_C" end - + test "replace multiple seperator inside and outside" do assert Slugger.slugify("--a--b c - - - ") == "a-b-c" end @@ -44,8 +44,12 @@ defmodule SluggerTest do assert Slugger.slugify("Sheep's Milk") == "Sheeps-Milk" end + test "removing unwanted unicode characters" do + assert Slugger.slugify("abc 😀") == "abc" + end + #--- slugify_downcase() - + test "string to lower" do assert Slugger.slugify_downcase("ABC") == "abc" end @@ -71,6 +75,10 @@ defmodule SluggerTest do assert Slugger.slugify_downcase("Sheep's Milk") == "sheeps-milk" end + test "removing unwanted unicode characters lowercase" do + assert Slugger.slugify_downcase("abc 😀") == "abc" + end + #--- Naughty strings test "naughty strings" do From 86cd0388c84f23921f341a051737384169b7c659 Mon Sep 17 00:00:00 2001 From: Jonathan Sutherland Date: Fri, 23 Nov 2018 20:28:29 -0400 Subject: [PATCH 2/2] Fix test that uses out-of-range characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The characters "ㅈㅓㅇㅅㅜㅇㅕㄴ" do not fall between `가-힣` --- test/slugger_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/slugger_test.exs b/test/slugger_test.exs index 22e9090..e1eaf62 100644 --- a/test/slugger_test.exs +++ b/test/slugger_test.exs @@ -21,7 +21,7 @@ defmodule SluggerTest do end test "removing space at ending and ending with korean chars" do - assert Slugger.slugify(" \n \t \n ㅈㅓㅇㅅㅜㅇㅕㄴ for 정수연 \n \t \n ") == "ㅈㅓㅇㅅㅜㅇㅕㄴ-for-정수연" + assert Slugger.slugify(" \n \t \n 정수연 for 정수연 \n \t \n ") == "정수연-for-정수연" end test "replace whitespace inside with seperator" do