This repository has been archived by the owner on Sep 19, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsyllables.py
executable file
·135 lines (115 loc) · 3.22 KB
/
syllables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
"""
Provides dictionaries of letter types for using CVC syllable construction [1].
Letter frequency is drawn from 10X the English letter frequencies. The
scaling factor was chosen to differentiate on an integer level between
tenths of percentages. These weightings have been modified slightly based on
experiment results during development from the entries in wikipedia [2].
Overall consonants are drawn from both onset and end type consonants and is
generated from both dicts.
The consonant and vowel dicts are of the form key: weight and a function
is provided (and used) to generate weighted lists of letters to simplify
selecting from a weighted set.
References:
[1] https://en.wikipedia.org/wiki/Syllable#Grouping_of_components
[2] https://en.wikipedia.org/wiki/Letter_frequency
"""
__author__ = "Hillary Jeffrey"
__copyright__ = "Copyright 2015"
__credits__ = ["Hillary Jeffrey"]
__license__ = "GPL"
__version__ = "1.0"
__maintainer__ = "Hillary Jeffrey"
__email__ = "[email protected]"
__status__ = "Development"
# Information for altering syllables for readability/pronouncability
# when occurring at the ends of words
translation = {
'aw': 'augh',
'ou': 'ow',
'zh': 'j',
'ng': 'nge',
'v': 've',
}
# Onset consonants begin CVC syllables
# Dict format is key: letterweight*10 (English)
onset_consonants = {
'b': 47,
'd': 27,
'f': 38,
'g': 20,
'h': 72,
'j': 6,
'k': 6,
'l': 27,
'm': 43,
'n': 24,
'p': 25,
'r': 17,
's': 78,
't': 167,
'v': 6,
'w': 68,
'y': 16,
'z': 3,
'th': 80,
'ch': 25,
'wh': 10,
'sh': 30,
}
# End consonants end CVC syllables
# Dict format is key: letterweight*10 (English)
end_consonants = {
'b': 15,
'd': 43,
'f': 22,
'g': 20,
'k': 8,
'l': 4,
'm': 24,
'n': 67,
'p': 19,
'r': 60,
's': 63,
't': 91,
'v': 10,
'w': 24,
'z': 1,
'ch': 10,
'gh': 10,
'ng': 20,
'sh': 10,
'th': 10,
}
# Build an overall list of consonants
consonants = {}
consonants.update(onset_consonants)
consonants.update(end_consonants)
# Vowels are used to build syllables; Y is omitted because of CVC construction
# Dict format is key: letterweight*10 (English)
vowels = {
'a': 82,
'e': 127,
'i': 70,
'o': 75,
'u': 28,
}
# Build the sets of weighted distributions
def buildWeightedSets(inputdict):
"""Converts a dict of keys and weights into an array expanded with weights
of the full data set"""
# We'll use total weight to double check our built array
totalweight = sum(inputdict.values())
weightedSet = []
for letter, weight in inputdict.iteritems():
weightedSet += [letter]*weight
# Check that we got the expected length, otherwise raise an error
if not totalweight == len(weightedSet):
raise ValueError("Constructed length (%i) differs from expected (%i)"
% (len(weightedSet), totalweight))
return weightedSet
# Build access lists of weighted sets for each dictionary
end_weighted = buildWeightedSets(end_consonants)
onset_weighted = buildWeightedSets(onset_consonants)
vowels_weighted = buildWeightedSets(vowels)
consonants_weighted = buildWeightedSets(consonants)