Skip to content

Commit

Permalink
Merge pull request #390 from neelsmith/dev
Browse files Browse the repository at this point in the history
Add method for `concat_ds` using a `DFParser`
  • Loading branch information
neelsmith authored Dec 3, 2023
2 parents a4bb457 + 3a72334 commit bf32331
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Kanones"
uuid = "107500f9-53d4-4696-8485-0747242ad8bc"
authors = ["nsmith"]
version = "0.24.0"
version = "0.24.1"

[deps]
AtticGreek = "330c8319-f7ed-461a-8c52-cee5da4c0892"
Expand Down
33 changes: 33 additions & 0 deletions src/parser/dfparser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,37 @@ $(SIGNATURES)
"""
function lexemes(dfp::DFParser)
Array{String}(DataFrames.select(dfp.df, :Lexeme)) |> unique
end






"""Build a new `DFParser` by adding a further dataset
to an existing parser.
- `dfp` is an existing `DFParser`.
- `rulesds` is the dataset used to build `sp`
- `newdata` is an additional dataset with any new content (rules or vocab)
"""
function concat_ds(dfp::DFParser, rulesds::FilesDataset, newdata::FilesDataset; interval = 100)
rules_all = vcat(rulesarray(rulesds), rulesarray(newdata))
stems_new = stemsarray(newdata)

analyses = []
for (i, stem) in enumerate(stems_new)
@info("stem $(i)$(stem)")
if i % interval == 0
@info("stem $(i)$(stem)")
end
append!(analyses, buildparseable(stem, rules_all, delimiter = ","))
end
temp_sp = analyses |> StringParser
temp_f = tempname()
tofile(temp_sp,temp_f; delimiter = ",")
dfp2 = dfParser(temp_f)
rm(temp_f)
DFParser(vcat(dfp.df, dfp2.df) )

end
2 changes: 0 additions & 2 deletions src/parser/stringparser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,6 @@ function lexemes(sp::StringParser)
end


#= NO THIS IS ALL WRONG.
=#
"""Build a new `StringParser` by adding a further dataset
to an existing parser.
Expand Down
3 changes: 3 additions & 0 deletions test/assets/tiniest/stems-tables/nouns/onenoun.cex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
StemUrn|LexicalEntity|Stem|Gender|InflClass|Accent|

nounstems.n72550|lsj.n72550|οἰν|masculine|os_ou|recessive|

0 comments on commit bf32331

Please sign in to comment.