-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
328 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Used by "mix format" | ||
[ | ||
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,26 @@ | ||
/_build | ||
/cover | ||
/deps | ||
/doc | ||
# The directory Mix will write compiled artifacts to. | ||
/_build/ | ||
|
||
# If you run "mix test --cover", coverage assets end up here. | ||
/cover/ | ||
|
||
# The directory Mix downloads your dependencies sources to. | ||
/deps/ | ||
|
||
# Where third-party dependencies like ExDoc output generated docs. | ||
/doc/ | ||
|
||
# Ignore .fetch files in case you like to edit your project deps locally. | ||
/.fetch | ||
|
||
# If the VM crashes, it generates a dump, let's ignore it too. | ||
erl_crash.dump | ||
|
||
# Also ignore archive artifacts (built via "mix archive.build"). | ||
*.ez | ||
*.beam | ||
/config/*.secret.exs | ||
.elixir_ls/ | ||
|
||
# Ignore package tarball (built via "mix hex.build"). | ||
vasov-*.tar | ||
|
||
# Temporary files, for example, from tests. | ||
/tmp/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Vasov | ||
|
||
![Elixir CI](https://github.com/Efesto/vasov/workflows/Elixir%20CI/badge.svg) | ||
[![Hex pm](https://img.shields.io/hexpm/v/vasov.svg?style=flat)](https://hex.pm/packages/vasov) | ||
[![Hexdocs.pm](https://img.shields.io/badge/hex-docs-lightgreen.svg)](https://hexdocs.pm/vasov/) | ||
|
||
Transliterates from bulgarian cyrillic to latin. | ||
|
||
The name is an homage to [Ivan Vazov (Иван Вазов)](https://en.wikipedia.org/wiki/Ivan_Vazov). | ||
|
||
## Installation | ||
|
||
The package can be installed by adding `vasov` to your list of dependencies in `mix.exs`: | ||
|
||
```elixir | ||
def deps do | ||
[ | ||
{:vasov, "~> 0.1.0"} | ||
] | ||
end | ||
``` | ||
|
||
## Usage | ||
|
||
```Elixir | ||
iex> Vasov.transliterate_to_latin("Бургас") | ||
"Burgas" | ||
iex> Vasov.transliterate_to_latin("София") | ||
"Sofia" | ||
iex> Vasov.transliterate_to_latin("България") | ||
"Bulgaria" | ||
``` |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
defmodule Vasov do | ||
@moduledoc """ | ||
Transliterates from bulgarian cyrillic to roman latin | ||
The transliteration implements the [streamlined system](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1105090/ROMANIZATION_OF_BULGARIAN_with_examples.pdf) | ||
officially adopted in Bulgaria. | ||
""" | ||
@bg_to_latin %{ | ||
"Р" => "R", | ||
"з" => "z", | ||
"Т" => "T", | ||
"Ш" => "Sh", | ||
"т" => "t", | ||
"А" => "A", | ||
"ч" => "ch", | ||
"р" => "r", | ||
"И" => "I", | ||
"Ь" => "Y", | ||
"м" => "m", | ||
"ц" => "ts", | ||
"Е" => "E", | ||
"щ" => "sht", | ||
"У" => "U", | ||
"Л" => "L", | ||
"Ч" => "Ch", | ||
"н" => "n", | ||
"Й" => "Y", | ||
"Ж" => "Zh", | ||
"о" => "o", | ||
"Ю" => "Yu", | ||
"ь" => "y", | ||
"Б" => "B", | ||
"х" => "h", | ||
"г" => "g", | ||
"ш" => "sh", | ||
"л" => "l", | ||
"Ц" => "Ts", | ||
"П" => "P", | ||
"в" => "v", | ||
"К" => "K", | ||
"с" => "s", | ||
"ъ" => "a", | ||
"С" => "S", | ||
"ж" => "zh", | ||
"ф" => "f", | ||
"д" => "d", | ||
"Х" => "H", | ||
"а" => "a", | ||
"б" => "b", | ||
"Я" => "Ya", | ||
"М" => "M", | ||
"й" => "y", | ||
"у" => "u", | ||
"п" => "p", | ||
"Г" => "G", | ||
"О" => "O", | ||
"ю" => "yu", | ||
"е" => "e", | ||
"к" => "k", | ||
"Н" => "N", | ||
"я" => "ya", | ||
"В" => "V", | ||
"Ъ" => "A", | ||
"Ф" => "F", | ||
"и" => "i", | ||
"З" => "Z", | ||
"Д" => "D", | ||
"Щ" => "Sht" | ||
} | ||
|
||
@doc """ | ||
Transliterates a text from bulgarian cyrillic language to roman latin | ||
Usage: | ||
``` | ||
iex> Vasov.transliterate_to_latin("Бургас") | ||
"Burgas" | ||
iex> Vasov.transliterate_to_latin("София") | ||
"Sofia" | ||
iex> Vasov.transliterate_to_latin("България") | ||
"Bulgaria" | ||
``` | ||
""" | ||
def transliterate_to_latin(text) do | ||
text | ||
|> normalize() | ||
|> String.graphemes() | ||
|> Enum.reduce("", fn | ||
original, acc -> acc <> Map.get(@bg_to_latin, original, original) | ||
end) | ||
end | ||
|
||
defp normalize(text) do | ||
text | ||
|> normalize_state_name() | ||
|> normalize_final_ya() | ||
end | ||
|
||
defp normalize_state_name(text) do | ||
Regex.replace(~r/^(б)(ъ)(лгария)/ui, text, fn _word, token_1, ua_token, token_2 -> | ||
replacement = | ||
case ua_token do | ||
"ъ" -> "у" | ||
"Ъ" -> "У" | ||
end | ||
|
||
token_1 <> replacement <> token_2 | ||
end) | ||
end | ||
|
||
defp normalize_final_ya(text) do | ||
Regex.replace(~r/^(.+и)(я)\b/ui, text, fn _word, word, ending -> | ||
replacement = | ||
case ending do | ||
"я" -> "а" | ||
"Я" -> "А" | ||
end | ||
|
||
word <> replacement | ||
end) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
defmodule Vasov.MixProject do | ||
use Mix.Project | ||
|
||
def project do | ||
[ | ||
app: :vasov, | ||
version: "0.1.0", | ||
elixir: "~> 1.14", | ||
start_permanent: Mix.env() == :prod, | ||
deps: deps(), | ||
docs: docs() | ||
] | ||
end | ||
|
||
def application do | ||
[ | ||
extra_applications: [:logger] | ||
] | ||
end | ||
|
||
defp docs do | ||
[ | ||
main: "Vasov", | ||
source_ref: "master", | ||
source_url: "https://github.com/Efesto/vasov", | ||
extras: ["README.md"] | ||
] | ||
end | ||
|
||
defp deps do | ||
[ | ||
{:credo, "~> 1.6", only: [:dev, :test], runtime: false}, | ||
{:mix_audit, "~> 2.0", only: [:dev, :test], runtime: false}, | ||
{:ex_doc, ">= 0.0.0", only: :dev, runtime: false}, | ||
{:mix_test_interactive, "~> 1.0", only: :dev, runtime: false} | ||
] | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
%{ | ||
"bunt": {:hex, :bunt, "0.2.1", "e2d4792f7bc0ced7583ab54922808919518d0e57ee162901a16a1b6664ef3b14", [:mix], [], "hexpm", "a330bfb4245239787b15005e66ae6845c9cd524a288f0d141c148b02603777a5"}, | ||
"credo": {:hex, :credo, "1.6.7", "323f5734350fd23a456f2688b9430e7d517afb313fbd38671b8a4449798a7854", [:mix], [{:bunt, "~> 0.2.1", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "41e110bfb007f7eda7f897c10bf019ceab9a0b269ce79f015d54b0dcf4fc7dd3"}, | ||
"earmark_parser": {:hex, :earmark_parser, "1.4.30", "0b938aa5b9bafd455056440cdaa2a79197ca5e693830b4a982beada840513c5f", [:mix], [], "hexpm", "3b5385c2d36b0473d0b206927b841343d25adb14f95f0110062506b300cd5a1b"}, | ||
"ex_doc": {:hex, :ex_doc, "0.29.1", "b1c652fa5f92ee9cf15c75271168027f92039b3877094290a75abcaac82a9f77", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "b7745fa6374a36daf484e2a2012274950e084815b936b1319aeebcf7809574f6"}, | ||
"file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"}, | ||
"jason": {:hex, :jason, "1.4.0", "e855647bc964a44e2f67df589ccf49105ae039d4179db7f6271dfd3843dc27e6", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "79a3791085b2a0f743ca04cec0f7be26443738779d09302e01318f97bdb82121"}, | ||
"makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"}, | ||
"makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"}, | ||
"makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"}, | ||
"mix_audit": {:hex, :mix_audit, "2.1.0", "3c0dafb29114dffcdb508164a3d35311a9ac2c5baeba6495c9cd5315c25902b9", [:make, :mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.9", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "14c57a23e0a5f652c1e7f6e8dab93f166f66d63bd0c85f97278f5972b14e2be0"}, | ||
"mix_test_interactive": {:hex, :mix_test_interactive, "1.2.2", "72f72faa7007d6cb9634ee5f6989b25ee5b194c5729e5e45a962e68b2e217374", [:mix], [{:file_system, "~> 0.2", [hex: :file_system, repo: "hexpm", optional: false]}, {:typed_struct, "~> 0.3.0", [hex: :typed_struct, repo: "hexpm", optional: false]}], "hexpm", "f49f2a70d00aee93418506dde4d95387fe56bdba501ef9d2aa06ea07d4823508"}, | ||
"nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"}, | ||
"typed_struct": {:hex, :typed_struct, "0.3.0", "939789e3c1dca39d7170c87f729127469d1315dcf99fee8e152bb774b17e7ff7", [:mix], [], "hexpm", "c50bd5c3a61fe4e198a8504f939be3d3c85903b382bde4865579bc23111d1b6d"}, | ||
"yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"}, | ||
"yaml_elixir": {:hex, :yaml_elixir, "2.9.0", "9a256da867b37b8d2c1ffd5d9de373a4fda77a32a45b452f1708508ba7bbcb53", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "0cb0e7d4c56f5e99a6253ed1a670ed0e39c13fc45a6da054033928607ac08dfc"}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
ExUnit.start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
defmodule VasovTest do | ||
use ExUnit.Case | ||
|
||
doctest Vasov | ||
|
||
describe "transliterate_to_latin/1" do | ||
test "transliterates from bulgarian cyrillic to latin" do | ||
Enum.each( | ||
[ | ||
{"основните", "osnovnite"}, | ||
{"Асеновград", "Asenovgrad"}, | ||
{"Враца", "Vratsa"}, | ||
{"Габрово", "Gabrovo"}, | ||
{"Добрич", "Dobrich"}, | ||
{"Елхово", "Elhovo"}, | ||
{"Кърджали", "Kardzhali"}, | ||
{"Златица", "Zlatitsa"}, | ||
{"Исперих", "Isperih"}, | ||
{"Айтос", "Aytos"}, | ||
{"Казанлък", "Kazanlak"}, | ||
{"Ловеч", "Lovech"}, | ||
{"Монтана", "Montana"}, | ||
{"Никопол", "Nikopol"}, | ||
{"Омуртаг", "Omurtag"}, | ||
{"Пловдив", "Plovdiv"}, | ||
{"Разград", "Razgrad"}, | ||
{"Твърдица", "Tvarditsa"}, | ||
{"Угърчин", "Ugarchin"}, | ||
{"Хасково", "Haskovo"}, | ||
{"Царево", "Tsarevo"}, | ||
{"Чирпан", "Chirpan"}, | ||
{"Шумен", "Shumen"}, | ||
{"Търговище", "Targovishte"}, | ||
{"Гълъбово", "Galabovo"}, | ||
{"каньон", "kanyon"}, | ||
{"Кюстендил", "Kyustendil"}, | ||
{"Ямбол", "Yambol"}, | ||
{"мира", "mira"}, | ||
{"Софиа е ного красива", "Sofia e nogo krasiva"}, | ||
{"Аз обичам лутеница", "Az obicham lutenitsa"}, | ||
{"Ти си чувек", "Ti si chuvek"}, | ||
{"хелло Маик, хелло Джое", "hello Maik, hello Dzhoe"} | ||
], | ||
fn {word, trans} -> | ||
assert_transliteration(trans, word) | ||
end | ||
) | ||
end | ||
|
||
test "when ия is in final position is transliterated with ia" do | ||
Enum.each( | ||
[ | ||
{"Пияница", "Piyanitsa"}, | ||
{"Катия", "Katia"}, | ||
{"СофИя", "SofIa"}, | ||
{"СофиЯ", "SofiA"}, | ||
{"фият", "fiyat"}, | ||
{"София-Град", "Sofia-Grad"} | ||
], | ||
fn {word, trans} -> | ||
assert_transliteration(trans, word) | ||
end | ||
) | ||
end | ||
|
||
test "България is always transliterated in Bulgaria" do | ||
Enum.each( | ||
[ | ||
{"българия", "bulgaria"}, | ||
{"бУлГариЯ", "bUlGariA"}, | ||
{"България-град", "Bulgaria-grad"}, | ||
{"Българияград", "Bulgariyagrad"}, | ||
{"Сбългария", "Sbalgaria"} | ||
], | ||
fn {word, trans} -> | ||
assert_transliteration(trans, word) | ||
end | ||
) | ||
end | ||
|
||
test "does not transliterate latin letters" do | ||
assert_transliteration("bulgaria", "bulgaria") | ||
end | ||
|
||
defp assert_transliteration(expected, original) do | ||
assert expected == Vasov.transliterate_to_latin(original) | ||
end | ||
end | ||
end |