Skip to content

Commit

Permalink
Merge pull request #2 from Bennycopter/aux-verbs-particles-fix
Browse files Browse the repository at this point in the history
Use particles instead of aux verbs; ignore some verbs
  • Loading branch information
joshdavham authored Sep 28, 2024
2 parents 46c9799 + 4bd1e3a commit bd8a0db
Showing 1 changed file with 11 additions and 10 deletions.
21 changes: 11 additions & 10 deletions src/jreadability/jreadability.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,36 +58,37 @@ def split_japanese_sentences(doc: List[UnidicNode]) -> List[List[UnidicNode]]:

mean_length_of_sentence = sum(sentence_lengths) / len(sentences)

# next, compute proportion of kango, wago, verbs and auxiliary verbs
# next, compute proportion of kango, wago, verbs and particles
num_kango = 0
num_wago = 0
num_verbs = 0
num_aux_verbs = 0
num_particles = 0
for token in doc:

goshu = token.feature.goshu # goshu (語種) is the word's origin
pos = token.feature.pos1
pos1 = token.feature.pos1
pos2 = token.feature.pos2

if goshu == '漢': # 'kan', meaning chinese
num_kango += 1
elif goshu == '和': # 'wa', meaning japanese
num_wago += 1

if pos == '動詞': # 'doushi', meaning verb
if pos1 == "動詞" and pos2 != "非自立可能": # 'doushi', meaning verb; but not certain verbs like あり in あります
num_verbs += 1
elif pos == '助動詞': # 'jodoushi', meaning auxiliary verb
num_aux_verbs += 1
elif pos1 == "助詞": # 'joshi', meaning particles
num_particles += 1

proportion_of_kango = 100.0 * num_kango / len(doc)
proportion_of_wago = 100.0 * num_wago / len(doc)
proportion_of_verbs = 100.0 * num_verbs / len(doc)
proportion_of_aux_verbs = 100.0 * num_aux_verbs / len(doc)
proportion_of_particles = 100.0 * num_particles / len(doc)

readability_score = mean_length_of_sentence * -0.056 + \
proportion_of_kango * -0.126 + \
proportion_of_wago * -0.042 + \
proportion_of_verbs * -0.145 + \
proportion_of_aux_verbs * -0.044 + \
proportion_of_particles * -0.044 + \
11.724

return readability_score

0 comments on commit bd8a0db

Please sign in to comment.