diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
new file mode 100644
index 0000000..7ebc1ff
--- /dev/null
+++ b/.github/workflows/spellcheck.yml
@@ -0,0 +1,23 @@
+# adapted from https://github.com/JuliaDocs/Documenter.jl/blob/master/.github/workflows/SpellCheck.yml
+# see docs at https://github.com/crate-ci/typos
+name: Spell Check
+on: [pull_request]
+
+jobs:
+  typos-check:
+    name: Spell Check with Typos
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Check spelling
+        uses: crate-ci/typos@master
+        # don't fail on typos in files not impacted by this PR
+        continue-on-error: true
+        with:
+            config: _typos.toml
+            write_changes: true
+      - uses: reviewdog/action-suggester@v1
+        with:
+          tool_name: Typos
+          fail_on_error: true
diff --git a/_quarto.yml b/_quarto.yml
index 33476df..8a5901c 100644
--- a/_quarto.yml
+++ b/_quarto.yml
@@ -48,7 +48,7 @@ format:
     theme:
       light: [sandstone, theme.scss]
       dark: [darkly, theme-dark.scss]
-    # defaults, can be overriden
+    # defaults, can be overridden
     toc: true
     toc-depth: 2
     fig-width: 8
diff --git a/_typos.toml b/_typos.toml
new file mode 100644
index 0000000..8a02277
--- /dev/null
+++ b/_typos.toml
@@ -0,0 +1,36 @@
+# https://github.com/crate-ci/typos#false-positives
+[default]
+ignore-files = true
+
+[files]
+extend-exclude = ["_build/**", "_freeze/**", ".jupyter_cache/**", ".quarto/**", "_extensions/**"]
+
+[default.extend-identifiers]
+Lik = "Lik"
+missings = "missings"
+
+[default.extend-words]
+Lik = "Lik"
+missings = "missings"
+represention = "representation"
+
+[type.qmd]
+extend-glob = ["*.qmd"]
+
+[type.qmd.extend-words]
+multline = "multline"
+
+[type.package_toml]
+# Don't check spellings in these files
+extend-glob = ["Manifest.toml", "Project.toml"]
+check-file = false
+
+[type.bib]
+# contain lots of names, which are a great spot for false positives
+extend-glob = ["*.bib"]
+check-file = false
+
+[type.csl]
+# contains lots of boilerplate in other languages
+extend-glob = ["*.csl"]
+check-file = false
diff --git a/bootstrap.qmd b/bootstrap.qmd
index 54a5163..b443e75 100644
--- a/bootstrap.qmd
+++ b/bootstrap.qmd
@@ -75,7 +75,7 @@ contrasts = Dict(:spkr => EffectsCoding(),
 ```
 
 The `EffectsCoding` contrast is used with these to create a ±1 encoding.
-Furthermore, `Grouping` constrasts are assigned to the `subj` and `item` factors.
+Furthermore, `Grouping` contrasts are assigned to the `subj` and `item` factors.
 This is not a contrast per-se but an indication that these factors will be used as grouping factors for random effects and, therefore, there is no need to create a contrast matrix.
 For large numbers of levels in a grouping factor, an attempt to create a contrast matrix may cause memory overflow.
 
diff --git a/check_emotikon_transform.qmd b/check_emotikon_transform.qmd
index ef7f5e9..8316de8 100644
--- a/check_emotikon_transform.qmd
+++ b/check_emotikon_transform.qmd
@@ -91,7 +91,7 @@ VarCorr(m1x)
   - Command for fitting LMM m1_om = fit(MixedModel, f1, dat_om, contrasts=contr)
   - Minimizing 10502 Time: 0 Time: 2:09:40 ( 0.74  s/it)
   - Store with: julia> saveoptsum("./fits/fggk21_m1_om_optsum.json", m1_om)
-  - Only for short-term and when desparate: julia> serialize("./fits/m1_om.jls", m1_om);
+  - Only for short-term and when desperate: julia> serialize("./fits/m1_om.jls", m1_om);
 
 #### ... restoreoptsum!()
 
diff --git a/contrasts_fggk21.qmd b/contrasts_fggk21.qmd
index ebc4854..8f98798 100644
--- a/contrasts_fggk21.qmd
+++ b/contrasts_fggk21.qmd
@@ -4,7 +4,7 @@ jupyter: julia-1.9
 author: "Reinhold Kliegl"
 ---
 
-Ths script uses a subset of data reported in @Fuehner2021.
+This script uses a subset of data reported in @Fuehner2021.
 
 To circumvent delays associated with model fitting we work with models that are less complex than those in the reference publication.
 All the data to reproduce the models in the publication are used here, too; the script requires only a few changes to specify the more complex models in the paper.
@@ -15,11 +15,11 @@ The script requires only a few changes to specify the more complex models in the
 
 The script is structured in three main sections:
 
- 1. **Setup** with reading and examing the data
+ 1. **Setup** with reading and examining the data
 
  2. **Contrasts coding**
 
-  - Effect and seqential difference contrasts
+  - Effect and sequential difference contrasts
   - Helmert contrast
   - Hypothesis contrast
   - PCA-based contrast
diff --git a/contrasts_kwdyz11.qmd b/contrasts_kwdyz11.qmd
index 80645d0..7f64b88 100644
--- a/contrasts_kwdyz11.qmd
+++ b/contrasts_kwdyz11.qmd
@@ -18,7 +18,7 @@ ProgressMeter.ijulia_behavior(:clear);
 
 # A word of caution {#sec-caution}
 
-For a (quasi-)experimental set of data, there is (or should be) a clear _a priori_ theoretical committment to specific hypotheses about differences between factor levels and how these differences enter in interactions with other factors. This specification should be used in the first LMM and reported, irrespective of the outcome. If alternative theories lead to alternative _a priori_ contrast specifications, both analyses are justified. If the observed means render the specification completely irrelevant, the comparisons originally planned could still be reported in a Supplement).
+For a (quasi-)experimental set of data, there is (or should be) a clear _a priori_ theoretical commitment to specific hypotheses about differences between factor levels and how these differences enter in interactions with other factors. This specification should be used in the first LMM and reported, irrespective of the outcome. If alternative theories lead to alternative _a priori_ contrast specifications, both analyses are justified. If the observed means render the specification completely irrelevant, the comparisons originally planned could still be reported in a Supplement).
 
 In this script, we are working through a large number of different contrasts for the same data. The purpose is to introduce both the preprogrammed (“canned”) and the general options to specify hypotheses about main effects and interactions. Obviously, we do not endorse generating a plot of the means and specifying the contrasts accordingly. This is known as the [Texas sharpshooter](https://www.bayesianspectacles.org/origin-of-the-texas-sharpshooter/) fallacy. The link leads to an illustration and brief historical account by Wagenmakers (2018).
 
@@ -33,7 +33,7 @@ For further readings see “Further Readings” in @Schad2020.
 We take the `KWDYZ` dataset from @Kliegl2011.
 This is an experiment looking at three effects of visual cueing under four different cue-target relations (CTRs).
 Two horizontal rectangles are displayed above and below a central fixation point or they displayed in vertical orientation to the left and right of the fixation point.
-Subjects react to the onset of a small visual target occuring at one of the four ends of the two rectangles.
+Subjects react to the onset of a small visual target occurring at one of the four ends of the two rectangles.
 The target is cued validly on 70% of trials by a brief flash of the corner of the rectangle at which it appears; it is cued invalidly at the three other locations 10% of the trials each.
 
 We specify three contrasts for the four-level factor CTR that are derived from spatial, object-based, and attractor-like features of attention. They map onto sequential differences between appropriately ordered factor levels.
@@ -114,7 +114,7 @@ The difference to the preprogrammed `SeqDiffCoding` is that for the third contra
 
 ## DummyCoding
 
-Thi contrast corresponds to `contr.treatment()` in R
+This contrast corresponds to `contr.treatment()` in R
 
 ```{julia}
 m2 = let
@@ -259,7 +259,7 @@ We specify them with `HypothesisCoding`.
 ### A(2) x B(2)
 
 An A(2) x B(2) design can be recast as an F(4) design with the levels (A1-B1, A1-B2, A2-B1, A2-B2).
-The following contrast specifiction returns estimates for the main effect of A, the main effect of B, and the interaction of A and B.
+The following contrast specification returns estimates for the main effect of A, the main effect of B, and the interaction of A and B.
 In a figure With A on the x-axis and the levels of B shown as two lines, the interaction tests the null hypothesis that the two lines are parallel.
 A positive coefficient implies overadditivity (diverging lines toward the right) and a negative coefficient underadditivity (converging lines).
 
@@ -310,7 +310,7 @@ TO BE DONE
 Nested contrasts are often specified as follow up as post-hoc tests for ANOVA interactions. They are orthogonal. We specify them with `HypothesisCoding`.
 
 An A(2) x B(2) design can be recast as an F(4) design with the levels (A1-B1, A1-B2, A2-B1, A2-B2).
-The following contrast specifiction returns an estimate for the main effect of A and the effects of B nested in the two levels of A.
+The following contrast specification returns an estimate for the main effect of A and the effects of B nested in the two levels of A.
 In a figure With A on the x-axis and the levels of B shown as two lines, the second contrast tests whether A1-B1 is different from A1-B2 and the third contrast tests whether A2-B1 is different from A2-B2.
 
 ```{julia}
@@ -337,7 +337,7 @@ There is no test of the interaction (parallelism).
 
 For factors with more than four levels there are many options for specifying orthogonal contrasts as long as one proceeds in a top-down strictly hiearchical fashion.
 
-Suppose you have a factor with seven levels and let's ignore shifting colummns.
+Suppose you have a factor with seven levels and let's ignore shifting columns.
 In this case, you have six options for the first contrast, that is 6 vs. 1, 5 vs.2 , 4 vs. 3, 3 vs. 4, 2 vs. 5, and 1 vs. 6 levels.
 Then, you specify orthogonal contrasts for partitions with more than 2 elements and so on.
 That is, you don't specify a contrast that crosses an earlier partition line.
diff --git a/fggk21.qmd b/fggk21.qmd
index e5dd69e..c77dd62 100644
--- a/fggk21.qmd
+++ b/fggk21.qmd
@@ -9,7 +9,7 @@ All the data to reproduce the models in the publication are used here, too; the
 
 The script is structured in four main sections:
 
- 1. **Setup** with reading and examing the data, plotting the main results, and specifying the contrasts for the fixed factor `Test`
+ 1. **Setup** with reading and examining the data, plotting the main results, and specifying the contrasts for the fixed factor `Test`
  2. a demonstration of **model complexification** to determine a parsimonious random-effect structure appropriate for and supported by the data, including also a quite elaborate demonstration of **principle component analyses (PCAs)** of levels (scores) and effects,
  3. specification of **nested fixed effects or interactions** in the levels of another, superordinate factors,
  4. a **Glossary of MixedModels.jl commands** to inspect the information generated for a fitted model object.
@@ -385,7 +385,7 @@ In general, smaller deviance, AIC, and BIC indicate an improvement in goodness o
 Usually, χ² should be larger than the associated degrees of freedom; for AIC and BIC the decrease should amount to more than 5, according to some literature.
 Severity of meeting these criteria increases from deviance to AIC to BIC.
 Therefore, it is not always the case that the criteria are unanimous in their verdict.
-Basicly, the more confirmatory the analysis, the more one may go with deviance and AIC; for exploratory analyses the BIC is probably a better guide.
+Basically, the more confirmatory the analysis, the more one may go with deviance and AIC; for exploratory analyses the BIC is probably a better guide.
 There are grey zones here.
 
 ### Comparing fixed effects of `m_ovi`, `m_zcp`, and `m_cpx`
@@ -685,7 +685,7 @@ end
 n, p, q, k = size(m1)  # nobs, fe params, VCs+CPs, re terms
 ```
 
-In prinicple, the models should yield the save deviance.
+In principle, the models should yield the save deviance.
 When models are not supported by the data, that is for singular models, there may be small differences between deviances for these reparameterizations.
 During optimization such models search for the absolute minimum in a very shallow surface and may end up in a local minimum instead.
 
@@ -722,7 +722,7 @@ dof(m1)
 
 # Glossary of _MixedModels.jl_ commands
 
-Here we introduce most of the commands available in the _MixedModels.jl_ package that allow the immediated inspection and analysis of results returned in a fitted _linear_ mixed-effect model.
+Here we introduce most of the commands available in the _MixedModels.jl_ package that allow the immediate inspection and analysis of results returned in a fitted _linear_ mixed-effect model.
 
 Postprocessing related to conditional modes will be dealt with in a different tutorial.
 
@@ -858,7 +858,7 @@ These commands inform us about the model parameters associated with the RES.
 ```
 
 ```{julia}
-issingular(m1) # Test if model is singular for paramter vector m1.theta (default)
+issingular(m1) # Test if model is singular for parameter vector m1.theta (default)
 ```
 
 ```{julia}
diff --git a/glmm.qmd b/glmm.qmd
index 6079a28..c596e1d 100644
--- a/glmm.qmd
+++ b/glmm.qmd
@@ -252,7 +252,7 @@ Notice that the linear term for `age` is not significant but the quadratic term
 We usually retain the lower order term, even if it is not significant, if the higher order term is significant.
 
 Notice also that the parameter estimates for the treatment contrasts for `livch` are similar.
-Thus the distinction of 1, 2, or 3+ childen is not as important as the contrast between having any children and not having any.
+Thus the distinction of 1, 2, or 3+ children is not as important as the contrast between having any children and not having any.
 Those women who already have children are more likely to use artificial contraception.
 
 Furthermore, the women without children have a different probability vs age profile than the women with children.
@@ -375,4 +375,4 @@ preds = effects(design, gm3; invlink=AutoInvLink())
 
   - From the data plot we can see a quadratic trend in the probability by age.
   - The patterns for women with children are similar and we do not need to distinguish between 1, 2, and 3+ children.
-  - We do distinguish between those women who do not have children and those with children.  This shows up in a signficant `age & children` interaction term.
+  - We do distinguish between those women who do not have children and those with children.  This shows up in a significant `age & children` interaction term.
diff --git a/kb07.qmd b/kb07.qmd
index 8fa9e14..f58dd32 100644
--- a/kb07.qmd
+++ b/kb07.qmd
@@ -37,7 +37,7 @@ describe(DataFrame(kb07))
 
 The experimental factors; `spkr`, `prec`, and `load`, are two-level factors.
 The `EffectsCoding` contrast is used with these to create a $\pm1$ encoding.
-Furthermore, `Grouping` constrasts are assigned to the `subj` and `item` factors.
+Furthermore, `Grouping` contrasts are assigned to the `subj` and `item` factors.
 This is not a contrast per-se but an indication that these factors will be used as grouping factors for random effects and, therefore, there is no need to create a contrast matrix.
 For large numbers of levels in a grouping factor, an attempt to create a contrast matrix may cause memory overflow.
 
diff --git a/kkl15.qmd b/kkl15.qmd
index abb7523..b60e7e7 100644
--- a/kkl15.qmd
+++ b/kkl15.qmd
@@ -5,7 +5,7 @@ jupyter: julia-1.9
 
 # Background
 
-@Kliegl2015 is a follow-up to @Kliegl2011 (see also script `kwdyz11.qmd`) from an experiment looking at a variety of effects of visual cueing under four different cue-target relations (CTRs). In this experiment two rectangles are displayed (1) in horizontal orientation , (2) in vertical orientation, (3) in left diagonal orientation, or in (4) right diagonal orientation relative to a central fixation point. Subjects react to the onset of a small or a large visual target occuring at one of the four ends of the two rectangles. The target is cued validly on 70% of trials by a brief flash of the corner of the rectangle at which it appears; it is cued invalidly at the three other locations 10% of the trials each. This implies a latent imbalance in design that is not visible in the repeated-measures ANOVA, but we will show its effect in the random-effect structure and conditional modes.
+@Kliegl2015 is a follow-up to @Kliegl2011 (see also script `kwdyz11.qmd`) from an experiment looking at a variety of effects of visual cueing under four different cue-target relations (CTRs). In this experiment two rectangles are displayed (1) in horizontal orientation , (2) in vertical orientation, (3) in left diagonal orientation, or in (4) right diagonal orientation relative to a central fixation point. Subjects react to the onset of a small or a large visual target occurring at one of the four ends of the two rectangles. The target is cued validly on 70% of trials by a brief flash of the corner of the rectangle at which it appears; it is cued invalidly at the three other locations 10% of the trials each. This implies a latent imbalance in design that is not visible in the repeated-measures ANOVA, but we will show its effect in the random-effect structure and conditional modes.
 
 There are a couple of differences between the first and this follow-up experiment, rendering it more a conceptual than a direct replication. First, the original experiment was carried out at Peking University and this follow-up at Potsdam University. Second, diagonal orientations of rectangles and large target sizes were not part of the design of @Kliegl2011.
 
@@ -19,7 +19,7 @@ The analysis is based on log-transformed reaction times `lrt`, indicated by a _b
 
 In this vignette we focus on the reduction of model complexity. And we start with a quote: 
 
-“Neither the [maximal] nor the [minimal] linear mixed models are appropriate for most repeated measures analysis. Using the [maximal] model is generally wasteful and costly in terms of statiscal power for tesing hypotheses. On the other hand, the [minimal] model fails to account for nontrivial correlation among repeated measurements. This results in inflated [T]ype I error rates when non-negligible correlation does in fact exist. We can usually find middle ground, a covariance model that adequately accounts for correlation but is more parsimonious than the [maximal] model. Doing so allows us full control over [T]ype I error rates without needlessly sacrificing power.”
+“Neither the [maximal] nor the [minimal] linear mixed models are appropriate for most repeated measures analysis. Using the [maximal] model is generally wasteful and costly in terms of statiscal power for testing hypotheses. On the other hand, the [minimal] model fails to account for nontrivial correlation among repeated measurements. This results in inflated [T]ype I error rates when non-negligible correlation does in fact exist. We can usually find middle ground, a covariance model that adequately accounts for correlation but is more parsimonious than the [maximal] model. Doing so allows us full control over [T]ype I error rates without needlessly sacrificing power.”
 
 Stroup, W. W. (2012, p. 185). _Generalized linear mixed models: Modern concepts, methods and applica?ons._ CRC Press, Boca Raton.
 
diff --git a/kwdyz11.qmd b/kwdyz11.qmd
index 9739993..cde2e76 100644
--- a/kwdyz11.qmd
+++ b/kwdyz11.qmd
@@ -8,7 +8,7 @@ author: "Reinhold Kliegl"
 
 We take the `kwdyz11` dataset [@Kliegl2011] from an experiment looking at three effects of visual cueing under four different cue-target relations (CTRs).
 Two horizontal rectangles are displayed above and below a central fixation point or they displayed in vertical orientation to the left and right of the fixation point.
-Subjects react to the onset of a small visual target occuring at one of the four ends of the two rectangles.
+Subjects react to the onset of a small visual target occurring at one of the four ends of the two rectangles.
 The target is cued validly on 70% of trials by a brief flash of the corner of the rectangle at which it appears; it is cued invalidly at the three other locations 10% of the trials each.
 
 We specify three contrasts for the four-level factor CTR that are derived from spatial, object-based, and attractor-like features of attention.
@@ -316,7 +316,7 @@ draw(
 ## Fixed effects (w/o GM)
 
 The shortest coverage interval for the `GM` ranges from 376 to 404 ms.
-To keep the plot range small we do not inlcude its density here.
+To keep the plot range small we do not include its density here.
 
 ```{julia}
 #| code-fold: true
diff --git a/largescaledesigned.qmd b/largescaledesigned.qmd
index 15d84b3..7f821d5 100644
--- a/largescaledesigned.qmd
+++ b/largescaledesigned.qmd
@@ -37,14 +37,14 @@ The raw data are available as an [OSF project](https://osf.io/n63s2) as Zip file
 These Zip files contain one data file for each participant, which has a mixture of demographic data, responses on some pre-tests, and the actual trial runs.
 
 Parsing these data files is not fun -- see [this repository](https://github.com/dmbates/EnglishLexicon.jl) for some of the code used to untangle the data.
-(This respository is an unregistered Julia package.)
+(This repository is an unregistered Julia package.)
 
 Some lessons from this:
 
 - When an identifier is described as a "unique subject id", it probably isn't.
 - In a multi-center trial, the coordinating center should assign the range of id's for each satellite site.  Failure of a satellite site to stay within its range should result in banishment to a Siberian work camp.
 - As with all data cleaning, the prevailing attitude should be "trust, but verify".  Just because you are told that the file is in a certain format, doesn't mean it is.  Just because you are told that the identifiers are unique doesn't mean they are, etc.
-- It works best if each file has a well-defined, preferably simple, stucture.  These data files had two different formats mushed together.
+- It works best if each file has a well-defined, preferably simple, structure.  These data files had two different formats mushed together.
 - This is the idea of "tidy data" - each file contains only one type of record along with well-defined rules of how you relate one file to another.
 - If one of the fields is a date, declare the **only** acceptable form of writing a date, preferably `yyyy-mm-dd`.  Anyone getting creative about the format of the dates will be required to write the software to parse that form (and that is usually not an easy task).
 - As you make changes in a file, document them.  If you look at the `EnglishLexicon.jl` repository you will see that it is in the form of scripts that take the original Zip files and produce the Arrow files.  That way, if necessary, the changes can be undone or modified.
@@ -201,7 +201,7 @@ let
 end
 ```
 
-The pattern in the range of word lengths with non-negligible counts (there are points in the plot down to word lengths of 1 and up to word lengths of 21 but these points are very small) is that the accuracy for words is nearly constant at about 84% and the accuracy fof nonwords is slightly higher until lengths of 13, at which point it falls off a bit.
+The pattern in the range of word lengths with non-negligible counts (there are points in the plot down to word lengths of 1 and up to word lengths of 21 but these points are very small) is that the accuracy for words is nearly constant at about 84% and the accuracy for nonwords is slightly higher until lengths of 13, at which point it falls off a bit.
 
 ### Summaries by subject {#sec-elpsumrysubj}
 
diff --git a/mrk17.qmd b/mrk17.qmd
index 233e01f..a67f372 100644
--- a/mrk17.qmd
+++ b/mrk17.qmd
@@ -429,7 +429,7 @@ end
 
 The source of the interaction are trials where the last trial was a word target; there is no evidence for the interaction when the last trial was a nonword target.
 
-The orginal and post-hoc LMM have the same goodness of fit.
+The original and post-hoc LMM have the same goodness of fit.
 
 ```{julia}
 [objective(m_prm3), objective(m_prm3_posthoc)]
diff --git a/sleepstudy_speed.qmd b/sleepstudy_speed.qmd
index deeb538..8dee6f6 100644
--- a/sleepstudy_speed.qmd
+++ b/sleepstudy_speed.qmd
@@ -23,7 +23,7 @@ The authors analyzed response speed, that is (1/RT)*1000 -- completely warranted
 
 ## Current data
 
-The current data distributed with the _RData_ collection is attributed to the 3-hour TIB group, but the means do not agree at all with those reported for this group in [@Belenky2003 Figure 3] where the 3-hour TIB group is also based on only 13 (not 18) subjects. Specifically, the current data show a much smaller slow-down of response speed across E1 to E7 and do not reflect the recovery during R1 to R3. The currrent data also cover only 10 not 11 days, but it looks like only R3 is missing. The closest match of the current means was with the average of the 3-hour and 7-hour TIB groups; if only males were included, this would amount to 18 subjects. (This conjecture is based only on visual inspection of graphs.)
+The current data distributed with the _RData_ collection is attributed to the 3-hour TIB group, but the means do not agree at all with those reported for this group in [@Belenky2003 Figure 3] where the 3-hour TIB group is also based on only 13 (not 18) subjects. Specifically, the current data show a much smaller slow-down of response speed across E1 to E7 and do not reflect the recovery during R1 to R3. The current data also cover only 10 not 11 days, but it looks like only R3 is missing. The closest match of the current means was with the average of the 3-hour and 7-hour TIB groups; if only males were included, this would amount to 18 subjects. (This conjecture is based only on visual inspection of graphs.)
 
 # Setup