Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
Efesto authored and Marco Polita committed Feb 21, 2023
1 parent d85b24d commit 8133c60
Show file tree
Hide file tree
Showing 10 changed files with 328 additions and 26 deletions.
4 changes: 4 additions & 0 deletions .formatter.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]
4 changes: 2 additions & 2 deletions .github/workflows/elixir.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
otp: ["25.1"]
elixir: ["1.14.2"]
elixir: ["1.14.3"]
otp: ["23.3", "24.3.4", "25.2.3"]
steps:
- uses: actions/checkout@v3
- name: Set up Elixir
Expand Down
30 changes: 23 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
/_build
/cover
/deps
/doc
# The directory Mix will write compiled artifacts to.
/_build/

# If you run "mix test --cover", coverage assets end up here.
/cover/

# The directory Mix downloads your dependencies sources to.
/deps/

# Where third-party dependencies like ExDoc output generated docs.
/doc/

# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch

# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump

# Also ignore archive artifacts (built via "mix archive.build").
*.ez
*.beam
/config/*.secret.exs
.elixir_ls/

# Ignore package tarball (built via "mix hex.build").
vasov-*.tar

# Temporary files, for example, from tests.
/tmp/
32 changes: 32 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Vasov

![Elixir CI](https://github.com/Efesto/vasov/workflows/Elixir%20CI/badge.svg)
[![Hex pm](https://img.shields.io/hexpm/v/vasov.svg?style=flat)](https://hex.pm/packages/vasov)
[![Hexdocs.pm](https://img.shields.io/badge/hex-docs-lightgreen.svg)](https://hexdocs.pm/vasov/)

Transliterates from bulgarian cyrillic to latin.

The name is an homage to [Ivan Vazov (Иван Вазов)](https://en.wikipedia.org/wiki/Ivan_Vazov).

## Installation

The package can be installed by adding `vasov` to your list of dependencies in `mix.exs`:

```elixir
def deps do
[
{:vasov, "~> 0.1.0"}
]
end
```

## Usage

```Elixir
iex> Vasov.transliterate_to_latin("Бургас")
"Burgas"
iex> Vasov.transliterate_to_latin("София")
"Sofia"
iex> Vasov.transliterate_to_latin("България")
"Bulgaria"
```
17 changes: 0 additions & 17 deletions dev_deps.exs

This file was deleted.

122 changes: 122 additions & 0 deletions lib/vasov.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
defmodule Vasov do
@moduledoc """
Transliterates from bulgarian cyrillic to roman latin
The transliteration implements the [streamlined system](https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1105090/ROMANIZATION_OF_BULGARIAN_with_examples.pdf)
officially adopted in Bulgaria.
"""
@bg_to_latin %{
"Р" => "R",
"з" => "z",
"Т" => "T",
"Ш" => "Sh",
"т" => "t",
"А" => "A",
"ч" => "ch",
"р" => "r",
"И" => "I",
"Ь" => "Y",
"м" => "m",
"ц" => "ts",
"Е" => "E",
"щ" => "sht",
"У" => "U",
"Л" => "L",
"Ч" => "Ch",
"н" => "n",
"Й" => "Y",
"Ж" => "Zh",
"о" => "o",
"Ю" => "Yu",
"ь" => "y",
"Б" => "B",
"х" => "h",
"г" => "g",
"ш" => "sh",
"л" => "l",
"Ц" => "Ts",
"П" => "P",
"в" => "v",
"К" => "K",
"с" => "s",
"ъ" => "a",
"С" => "S",
"ж" => "zh",
"ф" => "f",
"д" => "d",
"Х" => "H",
"а" => "a",
"б" => "b",
"Я" => "Ya",
"М" => "M",
"й" => "y",
"у" => "u",
"п" => "p",
"Г" => "G",
"О" => "O",
"ю" => "yu",
"е" => "e",
"к" => "k",
"Н" => "N",
"я" => "ya",
"В" => "V",
"Ъ" => "A",
"Ф" => "F",
"и" => "i",
"З" => "Z",
"Д" => "D",
"Щ" => "Sht"
}

@doc """
Transliterates a text from bulgarian cyrillic language to roman latin
Usage:
```
iex> Vasov.transliterate_to_latin("Бургас")
"Burgas"
iex> Vasov.transliterate_to_latin("София")
"Sofia"
iex> Vasov.transliterate_to_latin("България")
"Bulgaria"
```
"""
def transliterate_to_latin(text) do
text
|> normalize()
|> String.graphemes()
|> Enum.reduce("", fn
original, acc -> acc <> Map.get(@bg_to_latin, original, original)
end)
end

defp normalize(text) do
text
|> normalize_state_name()
|> normalize_final_ya()
end

defp normalize_state_name(text) do
Regex.replace(~r/^(б)(ъ)(лгария)/ui, text, fn _word, token_1, ua_token, token_2 ->
replacement =
case ua_token do
"ъ" -> "у"
"Ъ" -> "У"
end

token_1 <> replacement <> token_2
end)
end

defp normalize_final_ya(text) do
Regex.replace(~r/^(.+и)(я)\b/ui, text, fn _word, word, ending ->
replacement =
case ending do
"я" -> "а"
"Я" -> "А"
end

word <> replacement
end)
end
end
38 changes: 38 additions & 0 deletions mix.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
defmodule Vasov.MixProject do
use Mix.Project

def project do
[
app: :vasov,
version: "0.1.0",
elixir: "~> 1.14",
start_permanent: Mix.env() == :prod,
deps: deps(),
docs: docs()
]
end

def application do
[
extra_applications: [:logger]
]
end

defp docs do
[
main: "Vasov",
source_ref: "master",
source_url: "https://github.com/Efesto/vasov",
extras: ["README.md"]
]
end

defp deps do
[
{:credo, "~> 1.6", only: [:dev, :test], runtime: false},
{:mix_audit, "~> 2.0", only: [:dev, :test], runtime: false},
{:ex_doc, ">= 0.0.0", only: :dev, runtime: false},
{:mix_test_interactive, "~> 1.0", only: :dev, runtime: false}
]
end
end
17 changes: 17 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
%{
"bunt": {:hex, :bunt, "0.2.1", "e2d4792f7bc0ced7583ab54922808919518d0e57ee162901a16a1b6664ef3b14", [:mix], [], "hexpm", "a330bfb4245239787b15005e66ae6845c9cd524a288f0d141c148b02603777a5"},
"credo": {:hex, :credo, "1.6.7", "323f5734350fd23a456f2688b9430e7d517afb313fbd38671b8a4449798a7854", [:mix], [{:bunt, "~> 0.2.1", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2.8", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "41e110bfb007f7eda7f897c10bf019ceab9a0b269ce79f015d54b0dcf4fc7dd3"},
"earmark_parser": {:hex, :earmark_parser, "1.4.30", "0b938aa5b9bafd455056440cdaa2a79197ca5e693830b4a982beada840513c5f", [:mix], [], "hexpm", "3b5385c2d36b0473d0b206927b841343d25adb14f95f0110062506b300cd5a1b"},
"ex_doc": {:hex, :ex_doc, "0.29.1", "b1c652fa5f92ee9cf15c75271168027f92039b3877094290a75abcaac82a9f77", [:mix], [{:earmark_parser, "~> 1.4.19", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "b7745fa6374a36daf484e2a2012274950e084815b936b1319aeebcf7809574f6"},
"file_system": {:hex, :file_system, "0.2.10", "fb082005a9cd1711c05b5248710f8826b02d7d1784e7c3451f9c1231d4fc162d", [:mix], [], "hexpm", "41195edbfb562a593726eda3b3e8b103a309b733ad25f3d642ba49696bf715dc"},
"jason": {:hex, :jason, "1.4.0", "e855647bc964a44e2f67df589ccf49105ae039d4179db7f6271dfd3843dc27e6", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "79a3791085b2a0f743ca04cec0f7be26443738779d09302e01318f97bdb82121"},
"makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
"mix_audit": {:hex, :mix_audit, "2.1.0", "3c0dafb29114dffcdb508164a3d35311a9ac2c5baeba6495c9cd5315c25902b9", [:make, :mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.9", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "14c57a23e0a5f652c1e7f6e8dab93f166f66d63bd0c85f97278f5972b14e2be0"},
"mix_test_interactive": {:hex, :mix_test_interactive, "1.2.2", "72f72faa7007d6cb9634ee5f6989b25ee5b194c5729e5e45a962e68b2e217374", [:mix], [{:file_system, "~> 0.2", [hex: :file_system, repo: "hexpm", optional: false]}, {:typed_struct, "~> 0.3.0", [hex: :typed_struct, repo: "hexpm", optional: false]}], "hexpm", "f49f2a70d00aee93418506dde4d95387fe56bdba501ef9d2aa06ea07d4823508"},
"nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"},
"typed_struct": {:hex, :typed_struct, "0.3.0", "939789e3c1dca39d7170c87f729127469d1315dcf99fee8e152bb774b17e7ff7", [:mix], [], "hexpm", "c50bd5c3a61fe4e198a8504f939be3d3c85903b382bde4865579bc23111d1b6d"},
"yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"},
"yaml_elixir": {:hex, :yaml_elixir, "2.9.0", "9a256da867b37b8d2c1ffd5d9de373a4fda77a32a45b452f1708508ba7bbcb53", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "0cb0e7d4c56f5e99a6253ed1a670ed0e39c13fc45a6da054033928607ac08dfc"},
}
1 change: 1 addition & 0 deletions test/test_helper.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ExUnit.start()
89 changes: 89 additions & 0 deletions test/vasov_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
defmodule VasovTest do
use ExUnit.Case

doctest Vasov

describe "transliterate_to_latin/1" do
test "transliterates from bulgarian cyrillic to latin" do
Enum.each(
[
{"основните", "osnovnite"},
{"Асеновград", "Asenovgrad"},
{"Враца", "Vratsa"},
{"Габрово", "Gabrovo"},
{"Добрич", "Dobrich"},
{"Елхово", "Elhovo"},
{"Кърджали", "Kardzhali"},
{"Златица", "Zlatitsa"},
{"Исперих", "Isperih"},
{"Айтос", "Aytos"},
{"Казанлък", "Kazanlak"},
{"Ловеч", "Lovech"},
{"Монтана", "Montana"},
{"Никопол", "Nikopol"},
{"Омуртаг", "Omurtag"},
{"Пловдив", "Plovdiv"},
{"Разград", "Razgrad"},
{"Твърдица", "Tvarditsa"},
{"Угърчин", "Ugarchin"},
{"Хасково", "Haskovo"},
{"Царево", "Tsarevo"},
{"Чирпан", "Chirpan"},
{"Шумен", "Shumen"},
{"Търговище", "Targovishte"},
{"Гълъбово", "Galabovo"},
{"каньон", "kanyon"},
{"Кюстендил", "Kyustendil"},
{"Ямбол", "Yambol"},
{"мира", "mira"},
{"Софиа е ного красива", "Sofia e nogo krasiva"},
{"Аз обичам лутеница", "Az obicham lutenitsa"},
{"Ти си чувек", "Ti si chuvek"},
{"хелло Маик, хелло Джое", "hello Maik, hello Dzhoe"}
],
fn {word, trans} ->
assert_transliteration(trans, word)
end
)
end

test "when ия is in final position is transliterated with ia" do
Enum.each(
[
{"Пияница", "Piyanitsa"},
{"Катия", "Katia"},
{"СофИя", "SofIa"},
{"СофиЯ", "SofiA"},
{"фият", "fiyat"},
{"София-Град", "Sofia-Grad"}
],
fn {word, trans} ->
assert_transliteration(trans, word)
end
)
end

test "България is always transliterated in Bulgaria" do
Enum.each(
[
{"българия", "bulgaria"},
{"бУлГариЯ", "bUlGariA"},
{"България-град", "Bulgaria-grad"},
{"Българияград", "Bulgariyagrad"},
{"Сбългария", "Sbalgaria"}
],
fn {word, trans} ->
assert_transliteration(trans, word)
end
)
end

test "does not transliterate latin letters" do
assert_transliteration("bulgaria", "bulgaria")
end

defp assert_transliteration(expected, original) do
assert expected == Vasov.transliterate_to_latin(original)
end
end
end

0 comments on commit 8133c60

Please sign in to comment.