-
Notifications
You must be signed in to change notification settings - Fork 2
/
SPC_Guidelines.Rmd
908 lines (643 loc) · 47.9 KB
/
SPC_Guidelines.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
---
output: html_document
---
# Guidance on the Use of Control Charts for Quality Improvement and Monitoring {#top}
### Seattle Children's Hospital
[Dwight Barry, PhD](mailto:[email protected]&subject=SPC%20Guidelines)
*Enterprise Analytics*
[Brendan Bettinger, PhD](mailto:[email protected]&subject=SPC%20Guidelines)
*Infection Prevention*
Version 0.9.2
31 October 2016
<hr/>
[**1. Overview**](#overview)
[Guidelines for interpreting SPC charts](#guidelines)
[Which should I use: a run chart or a control chart?](#which)
[Run charts](#runcharts)
[Control charts](#controlcharts)
[Which control chart should I use?](#whichcontrolchart)
[Tips and tricks for successful control chart use](#tipsandtricks)
[**2. Control Chart Creation Details**](#part2)
[Count, proportion, or rate data](#attribute)
[Continuous data](#numeric)
[**3. Testing assumptions**](#testassumptions)
[Trending](#trending)
[Independence](#independence)
[What happens when you get the mean-variance relationship wrong](#meanvar)
[When to revise control limits](#reviselimits)
[**4. Useful References**](#useful)
[Appendix 1: An R function for SPC plots](#appendix1)
<hr/>
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
require(ggExtra)
require(ggplot2)
require(qicharts)
```
``` {r spccode, echo = FALSE, fig.height = 3.5}
spc.plot <- function(subgroup, point, mean, sigma, k = 3,
ucl.show = TRUE, lcl.show = TRUE,
band.show = TRUE, rule.show = TRUE,
ucl.max = Inf, lcl.min = -Inf,
label.x = "Subgroup", label.y = "Value")
{
# Plots control chart with ggplot
#
# Args:
# subgroup: Subgroup definition (for x-axis)
# point: Subgroup sample values (for y-axis)
# mean: Process mean value (for center line)
# sigma: Process variation value (for control limits)
# k: Specification for k-sigma limits above and below center line.
# Default is 3.
# ucl.show: Visible upper control limit? Default is true.
# lcl.show: Visible lower control limit? Default is true.
# band.show: Visible bands between 1-2 sigma limits? Default is true.
# rule.show: Highlight run rule indicators in orange? Default is true.
# ucl.max: Maximum feasible value for upper control limit.
# lcl.min: Minimum feasible value for lower control limit.
# label.x: Specify x-axis label.
# label.y: Specify y-axis label.
df <- data.frame(subgroup, point)
df$ucl <- pmin(ucl.max, mean + k*sigma)
df$lcl <- pmax(lcl.min, mean - k*sigma)
warn.points <- function(rule, num, den) {
sets <- mapply(seq, 1:(length(subgroup) - (den - 1)),
den:length(subgroup))
hits <- apply(sets, 2, function(x) sum(rule[x])) >= num
intersect(c(sets[,hits]), which(rule))
}
orange.sigma <- numeric()
p <- ggplot(data = df, aes(x = subgroup)) +
geom_hline(yintercept = mean, col = "gray", size = 1)
if (ucl.show) {
p <- p + geom_line(aes(y = ucl), col = "gray", size = 1)
}
if (lcl.show) {
p <- p + geom_line(aes(y = lcl), col = "gray", size = 1)
}
if (band.show) {
p <- p +
geom_ribbon(aes(ymin = mean + sigma,
ymax = mean + 2*sigma), alpha = 0.1) +
geom_ribbon(aes(ymin = pmax(lcl.min, mean - 2*sigma),
ymax = mean - sigma), alpha = 0.1)
orange.sigma <- unique(c(
warn.points(point > mean + sigma, 4, 5),
warn.points(point < mean - sigma, 4, 5),
warn.points(point > mean + 2*sigma, 2, 3),
warn.points(point < mean - 2*sigma, 2, 3)
))
}
df$warn <- "blue"
if (rule.show) {
shift.n <- round(log(sum(point!=mean), 2) + 3)
orange <- unique(c(orange.sigma,
warn.points(point > mean - sigma & point < mean + sigma, 15, 15),
warn.points(point > mean, shift.n, shift.n),
warn.points(point < mean, shift.n, shift.n)))
df$warn[orange] <- "orange"
}
df$warn[point > df$ucl | point < df$lcl] <- "red"
p +
geom_line(aes(y = point), col = "royalblue3") +
geom_point(data = df, aes(x = subgroup, y = point, col = warn)) +
scale_color_manual(values = c("blue" = "royalblue3", "orange" = "orangered", "red" = "red3"), guide = FALSE) +
labs(x = label.x, y = label.y) +
theme_bw()
}
```
``` {r sgnlex, echo = FALSE, fig.height = 3.5}
# Example points for signal rules of thumb
point.signal <- c(0.36, -0.25, 1.07, 0.67, -1.07, 0, 0.72, 1.82, -1.50, -0.99)
point.signal[11] <- 3.4
point.signal[12:14] <- c(-2.3, -1.5, -2.5)
point.signal[15:19] <- c(1.4, 1.8, 1.3, 0.2, 1.6)
point.signal[20:27] <- c(-0.8, -0.9, -0.2, -1.5, -1.2, -0.3, -1.1, -.5)
point.signal[28:34] <- c(-.3, 0, 0.3, 0.4, 1.1, 1.9, 2.2)
subgroup.signal <- 1:34
mean.signal <- 0
median.signal <- median(point.signal)
sigma.signal <- rep(1,34)
sig1 <- as.character(expression(1~sigma~signal))
sig2 <- as.character(expression(2~sigma~signal))
example_cc = spc.plot(subgroup.signal, point.signal, mean.signal, sigma.signal) +
geom_curve(x = 0.5, xend = 10.5, y = -0.5, yend = -1.5, linetype = 2) +
annotate('text', x = 5, y = -2.3, label = 'Normal variation') +
geom_curve(x = 10, xend = 12, y = 3.2, yend = 3.2, curvature = -1, linetype = 2) +
annotate('text', x = 9.5, y = 3.4, label = 'Out of control', hjust = 1) +
geom_curve(x = 11.5, xend = 14.5, y = -2.5, yend = -2.5, linetype = 2) +
annotate('text', x = 14.5, y = -2.8, parse = TRUE, label = sig2, hjust = 0) +
geom_curve(x = 14.5, xend = 19.5, y = 1.5, yend = 1.5, curvature = -1, linetype = 2) +
annotate('text', x = 17, y = 2.8, parse = TRUE, label = sig1) +
geom_curve(x = 19.5, xend = 27, y = -1, yend = -1, curvature = 0.7, linetype = 2) +
annotate('text', x = 23, y = -2.3, label = 'Process shift') +
geom_curve(x = 26.5, xend = 34, y = -0.5, yend = 2.5, curvature = -0.2, linetype = 2) +
annotate('text', x = 28, y = 1.3, label = 'Trend(?)', angle = 45) +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
scale_y_continuous(breaks = seq(-3,3))
ggsave("example_control_chart.png")
example_rc = spc.plot(subgroup.signal, point.signal, median.signal, sigma.signal, band.show = FALSE, ucl.show = FALSE, lcl.show = FALSE) +
geom_curve(x = 10, xend = 12, y = 3.2, yend = 3.2, curvature = -1, linetype = 2) +
annotate('text', x = 9.5, y = 3.4, label = 'Astronomical data point', hjust = 1) +
geom_curve(x = 19.5, xend = 27, y = -1, yend = -1, curvature = 0.7, linetype = 2) +
annotate('text', x = 23, y = -2.3, label = 'Process shift') +
geom_curve(x = 26.5, xend = 34, y = -0.5, yend = 2.5, curvature = -0.2, linetype = 2) +
annotate('text', x = 28, y = 1.3, label = 'Trend(?)', angle = 45) +
annotate('text', x= 33, y = -0.5, label = "Median", color = "gray50") +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
scale_y_continuous(breaks = seq(-3,3))
ggsave("example_run_chart.png")
```
## Overview {#overview}
People are really good at finding patterns that aren't real.
Every process has natural variation---*noise*---included as an inherent part of that process. True signals emerge only when you have properly characterized that variation. Statistical process control (SPC) charts---run charts and control charts---help you characterize and identify non-random patterns that suggest your process has changed.
Control charts are meant to help you identify departures from a **stable** process. Run charts help you monitor any sort of metric, process, or time series data. Each uses a set of guidelines to help you make decisions on whether a process has changed or not.
In many cases, a run chart may be all you need. In *all* cases, you should start with a run chart. If---and only if---you need to characterize the limits of natural variation in a stable process, you can move on to using a control chart.
In addition, *never* rely on a table or year-to-date (YTD) comparisons to evaluate process performance. These approaches ignore the foundational concept of process control: that natural, "common-cause" variation is an essential part of the process, and you can't see natural variation in a table or in YTD comparisons. Tables or YTD values can supplement run or control charts, but should never be used without them.
Above all, remember that the decisions you make in constructing SPC charts *will* impact the interpretation of the results. Bad charts can make for bad decisions.
### Guidelines for interpreting SPC charts {#guidelines}
| Run chart | Control Chart |
| ----------------------------------- | ------------------------------------- |
| ![](example_run_chart.png) | ![](example_control_chart.png) |
| **Identifying possible signals of change in run charts** | **Detecting special cause variation in control charts** |
| *"Astronomical" data point:* a point so different from the rest that anyone would agree that the value is unusual. | *One or more points fall outside the control limit:* if the data are distributed according to the given control chart's assumptions, the probability of seeing a point outside the control limits when the process has not changed is very low. |
| *Process shift:* $log_2(n) + 3$ data points are all above or all below the median line. | *Process shift:* $log_2(n) + 3$ data points are all above or all below the mean line. |
| *Number of crossings:* Too many or too few median line crossings suggest a pattern inconsistent with natural variation. | *Number of crossings:* Too many or too few center line crossings suggest a pattern inconsistent with natural variation. |
| *Trend:* Seven or more consecutive points all increasing or all decreasing (though this is usually ineffective\*). | *Trend:* Seven or more consecutive points all increasing or all decreasing (though this is usually ineffective\*). |
| *Cycles:* There are obvious cycles that are not linked to known causes such as seasonality. | *Cycles:* There are obvious cycles of any sort. |
| | *Reduced variation:* Fifteen or more consecutive points all within 1$\sigma$. |
| | *1$\sigma$ signal:* Four of five consecutive points are more than one standard deviation away from the mean. |
| | *2$\sigma$ signal:* Two of three consecutive points are more than two standard deviations away from the mean. |
\**Note: Although many people use a "trend" test in association with run and control charts, research has shown this test to be ineffective (see [Useful References](#useful)).*
<p align="right"><small>[*back to top*](#top)</small></p>
### Which should I use: a run chart or a control chart? {#which}
- A run chart can monitor a metric or a process, while a control chart monitors a process.
- Always create a run chart. Create a control chart only if you meet the necessary conditions.
- In both cases, the data points must be independent, that is, the position of one point does not influence the position of another point: there is no (serious) autocorrelation. If the data are autocorrelated, the guidelines for testing run or control charts can be invalid, which can lead to poor decision-making.
| Use a run chart if | Use a control chart (only) if |
| ------------------------------------ | ------------------------------------- |
| You may or may not investigate or act when a data point crosses a reference, target, or goal level, or when guidelines suggest a non-random pattern is occurring. | You intend to investigate or act when the process moves outside of control or indicates special cause variation. |
| You have little control over or cannot control the metric (e.g., ED volume/acuity). | You have the potential to control the process driving the metric (e.g., ED wait times). |
| You want to monitor the behavior of individual or groups of data points to a reference, target, or goal level. | You want to monitor the "average" of the system's behavior (i.e., the underlying statistical process)---deviations from expectations. |
| You are monitoring a metric or process that is generally trending or contains seasonality or other cycles of known cause, as long as you are able to calculate an appropriate median line (e.g., via quantile regression or seasonal adjustment). | You are monitoring a *stable* statistical process (there is no trend in the time series, or you have made the appropriate corrections to account for trends or seasonality). |
| You have no expectations that normal day-to-day operations will affect the central tendency. | You expect that normal day-to-day operations will or are meant to keep the process stable within the bounds of common-cause variation. |
| You do not need to account for the inherent natural variation in the system. | You need to understand and account for the inherent natural variation ("noise") in the system. |
| You have at least 12 data points. (Fewer than 12? Just make line chart, or use an EWMA chart. Run chart guidelines may not be valid.) | You have 20 or more data points that are in a stable statistical process, or you have performed a power analysis that provides the appropriate n for the appropriate time interval. |
| You do not understand one or more of the statistical issues discussed in the control chart column. | You understand the practical trade-offs between the sensitivity and specificity of the control limits relative to your need to investigate or act. |
| | You know which statistical distribution to use to calculate the control limits to ensure you have the proper mean-variance relationship. |
<p align="right"><small>[*back to top*](#top)</small></p>
## Run charts {#runcharts}
Run charts are meant to show a metric of interest over time. They do not rely on parametric statistical theory, so they cannot distinguish between common cause and special cause variation. However, improperly-constructed control charts can't either, so run charts can be more readily used where statistical knowledge is limited. So while run charts are not as always as powerful as control charts, they can still provide practical monitoring and useful insights into the process.
Run charts typically employ the median for the reference line. Run charts help you determine whether there are unusual runs in the data, which suggest non-random variation. They tend to be better than control charts in trying to detect moderate (~1.5σ) changes in process than using the control charts' typical 3σ limits rule alone. In other words, a run chart can be more useful when trying to detect improvement while that improvement work is still going on, where a control chart may be inappropriate.
There are basically two "tests" for run charts (an astronomical data point or looking for cycles aren't tests *per se*):
- *Process shift:* A non-random run is a set of $log_2(n) + 3$ data points (rounded to the nearest integer) that are all above or all below the median line, where *n* is the number of points that do *not* fall directly on the median line. For example, if you have 34 points, and 2 fall on the median, you have 32 observations for *n*. Thus in this case, the longest run should be 8 points.
- *Number of crossings:* Too many or too few runs---or more simply, median line crossings---suggest a pattern inconsistent with natural variation. You can use the binomial distribution (`qbinom(0.05, n-1, 0.50)` in R for a 5% false positive rate and because we expect 50% of values to lie on each side of the median) or a table (e.g., [Table 1 in Anhoj & Olesen 2014](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0113825#pone-0113825-t001)) to find the minimum number of crossings you would expect. Using the same values as above, we would expect the time series to cross the median at least 11 times.
```{r qichart, fig.width=8, fig.height=3.5}
qic(point.signal, runvals=TRUE, ylab="Value", main="")
```
<p align="right"><small>[*back to top*](#top)</small></p>
## Control charts {#controlcharts}
### Natural variation and control charts
Statistical theory provides the basis by which we can evaluate processes to more confidently detect changes in process amongst the noise of natural variation.
Understanding natural variation in time series or sequential data is the essential point of quality assurance or process improvement efforts. It's a rookie mistake to use control charts to focus solely on the values themselves or their central tendency. You need to look at all of the elements of a control chart to understand what it's telling you.
This graph shows a process created using random numbers based on a known normal distribution, where the overall distribution is shown in a histogram to the right of the run chart.
```{r ggmarg, fig.height=4}
set.seed(250)
df = data.frame(x = seq(1:120), y = 18+rnorm(120))
nat_var_run_plot = ggplot(df, aes(x, y)) +
ylim(14.75, 21.25) +
geom_hline(aes(yintercept=18), color="gray", size=1) +
xlab("Subgroup") +
ylab("Value") +
geom_line() +
theme_bw()
ggMarginal(nat_var_run_plot, margins="y", type = "histogram", binwidth=0.5)
```
<br/>
The next plot shows control limits and 1-2σ bands for comparison. Note that some of the control chart guidelines for detecting special causes suggest that some special cause variation has occurred in this data. Since this data was generated using random numbers from a known normal distribution, these are false positives. It's important to remember that control charts are meant to balance true and false negatives, but can never entirely eliminate either.
```{r ggmarg_cc, fig.height=4}
nat_var_cc_plot = ggplot(df, aes(x, y)) +
ylim(14.75, 21.25) +
geom_hline(aes(yintercept=18), color="gray", size=1) +
geom_hline(aes(yintercept=20.96), color="red") +
geom_hline(aes(yintercept=15.1), color="red") +
geom_ribbon(aes(ymin = 18.98, ymax = 19.96), alpha = 0.2) +
geom_ribbon(aes(ymin = 16.04, ymax = 17.02), alpha = 0.2) +
xlab("Subgroup") +
ylab("Value") +
geom_line() +
theme_bw()
ggMarginal(nat_var_cc_plot, margins="y", type = "histogram", binwidth=0.5)
```
<p align="right"><small>[*back to top*](#top)</small></p>
### Which control chart should I use? {#whichcontrolchart}
The following flow chart can help you determine which kind of control chart you might want to use. More details and formulas for each control chart type are provided in the [Control Chart Creation Details](#part2) section.
![](https://github.com/Rmadillo/SCH_R_Training/blob/master/images/control_chart_flowchart.png?raw=true)
<p align="right"><small>[*back to top*](#top)</small></p>
## Tips and tricks for successful control chart use {#tipsandtricks}
- The definition of your control limits depends on the trade-off between sensitivity and specificity for the question at hand. Typical control charts are built on 3σ limits, which provides the optimal trade-off between sensitivity and specificity, that is, between under- and over-alerting to an indication of special cause variation. When you need to err on the side of caution---for example, in patient safety applications---2σ limits may be more appropriate, while understanding that false positives will be higher. If you need to err on the side of certainty, 4-6σ limits may be more useful.
- With fewer than 20 observations, there is an increased chance of missing special cause variation. With more than 30 observations, there's an increased chance of detecting special cause variation that is really just chance. Knowing these outcomes are possible is useful to help facilitate careful thinking when control charts indicate special cause variation.
- Ensure your data values and control limits make sense. For example, if you have proportion data and your control limits fall above 100 or below 0, there's clearly an error somewhere. Ditto with negative counts.
- For raw ordinal data (such as likert scores), do not use means or control limits. Just. Don't. If you must plot a single value, convert to a proportion (e.g., "top box scores") first. However, stacked bar or mosaic charts help visualize this kind of data much better, and can be done in the same amount of space.
- Control charts don't measure "statistical significance"---they are meant to reduce the chances of incorrectly deciding whether a process is in (statistical) control or not. Control limits are *not* confidence limits.
- YTD comparisons don't work because they encourage naive, point-to-point comparisons and ignore natural variation---and can encourage inappropriate knee-jerk reactions. There is never useful information about a process in only one or two data points.
- A control chart should measure one defined process, so you may need to create multiple charts stratified by patient population, unit, medical service, time of day etc. to avoid mixture.
- With very large sample or subgroup sizes, control limits will be too small, and the false positive rate will skyrocket. Use prime charts instead.
<p align="right"><small>[*back to top*](#top)</small></p>
<br/>
<br/>
<hr/>
## Control Chart Creation Details {#part2}
### Count, proportion, or rate data {#attribute}
The majority of healthcare metrics of concern are rates, so the most common control chart is the u-chart.
| If your data involve... | use a ... | based on the ... distribution. |
| ----------------------------- | --------- | ----------------------------- |
| Rates (with unequal opportunity/variable denominator) | *u* chart | Poisson |
| Counts (with equal opportunity/constant denominator) | *c* chart | Poisson |
| Proportions (with unequal opportunity/variable denominator) | *p* chart | binomial |
| Proportions (with equal opportunity/constant denominator) | *np* chart | binomial |
| Rare events / infrequent counts (with equal opportunity/constant denominator) | *g* chart | geometric |
| Time between events | *t* chart | Weibull |
- For count, rate, or proportion data, carefully define your numerator and denominator. Evaluate each separately over time to see whether there are any unusual features or patterns. Sometimes patterns can occur in one or the other, then disappear or are obscured when coalesced into a rate or proportion.
- For count data, prefer u-charts to c-charts. In most cases, we do not have a constant denominator, so c-charts would not be appropriate. Even when we do, using a u-chart helps reduce audience confusion because you are explicitly stating the "per *x*".
- For proportion data, prefer p-charts to np-charts. Again, we almost never have a constant denominator, so np-charts would not be appropriate. Even when we do, using a p-chart helps reduce audience confusion by explicitly stating the "per *x*".
#### *u* chart example
**Mean for rates (*u*):** $u = {\frac{c}{n}}$
**3σ control limits for rates (*u*):** $3\sqrt{\frac{u}{n_i}}$
**Mean for counts (*c*)(not shown):** $c$
**3σ control limits for counts (*c*)(not shown):** $3\sqrt{c}$
<br/>
*Infections per 1000 central line days*
``` {r uex, fig.height = 3.5}
# Generate sample data
linedays <- sample(1000:2000,24)
infections <- rpois(24, 4)
dates <- seq(as.Date("2013/10/1"), by = "month", length.out = 24)
# Calculate u chart inputs
subgroup.u <- dates
point.u <- infections / linedays * 1000
mean.u <- sum(infections) / sum(linedays) * 1000
sigma.u <- sqrt(mean.u / linedays * 1000)
# Plot u chart
spc.plot(subgroup.u, point.u, mean.u, sigma.u, k = 3, lcl.min = 0,
label.x = "Month", label.y = "Infections per 1000 line days")
```
<br/>
#### *p* chart example
**Mean for proportions (*p*):** $p = {\frac{y}{n}}$
**3σ control limits for proportions (*p*):** $3\sqrt{\frac {p (1 - p)}{n_i}}$
<br/>
*Proportion of patients readmitted*
``` {r pex, fig.height = 3.5}
# Generate sample data
discharges <- sample(300:500, 24)
readmits <- rbinom(24, discharges, .2)
dates <- seq(as.Date("2013/10/1"), by = "month", length.out = 24)
# Calculate p chart inputs
subgroup.p <- dates
point.p <- readmits / discharges
mean.p <- sum(readmits) / sum(discharges)
sigma.p <- sqrt(mean.p*(1 - mean.p) / discharges)
# Plot p chart
spc.plot(subgroup.p, point.p, mean.p, sigma.p,
label.x = "Month", label.y = "Proportion readmitted")
```
<br/>
#### *g* chart example
**Mean for infrequent counts (*g*):** $g = {\frac{\Sigma{t}}{n}}$
**3σ limits for infrequent counts (*g*):** $3\sqrt{g (g + 1)}$
<br/>
*Days between infections*
``` {r gex, fig.height = 3.5}
# Generate sample data
dates <- sort(as.Date('2013/01/01') + sort(sample(1:1000, 24)))
# Calculate g chart inputs
subgroup.g <- seq(1, length(dates) - 1)
point.g <- as.double(dates[-1] - head(dates, -1))
mean.g <- mean(point.g)
sigma.g <- rep(sqrt(mean.g*(mean.g+1)), length(point.g))
# Plot g chart
spc.plot(subgroup.g, point.g, mean.g, sigma.g, lcl.show = FALSE, lcl.min = 0, k = 3,
label.x = "Infection number", label.y = "Days between infections")
```
<br/>
**Control limits for time between events (*t*)(not shown):** 2.66$MR_{bar}$
*where*
$t$ = time between events, where *t* is always > 0
$y = t^{0.2777}$
$MR_{bar}$ = average moving range of *y*s, excluding those > 3.27$MR_{bar}$
Note: *t* chart mean and limits can be transformed back to the original scale by raising those values to the 3.6 power. In addition, the y axis can be plotted on a log scale to make the display more symmetrical (which can be easier than explaining how the distribution works to a decision maker).
<p align="right"><small>[*back to top*](#top)</small></p>
### Continuous data {#numeric}
| If your data involve... | use a ... | based on the ... distribution. |
| ----------------------------- | --------- | ----------------------------- |
| Individual points and an average (large process shifts) | *I* chart | normal |
| Variable average (large process shifts) | $\bar{x}$ and *s* chart | normal |
| Exponentially weighted moving average (small process shifts) | EWMA chart | normal |
| Cumulative sum (small process shifts) | CUSUM chart | normal |
- For continuous data, the definition of the control limits will depend on your question and the data at hand. To detect small shifts in the mean quickly, an EWMA is probably best, while to understand natural variation and try to detect special cause variation, an $\bar{x}$ and *s* chart will be more useful.
- In the rare cases you may need an individual chart, do *not* use 3*sd* for the control limits; you must use 2.66$MR_{bar}$ instead to ensure the limits are presented correctly.
Note: EWMA and CUSUM charts aren't "standard" control charts in that the only guideline for detecting special cause variation is a point outside the limits. So while they can't detect special cause variation like control charts, they *can* detect shifts in mean with fewer points than a standard control chart.
<br/>
#### *I* chart example
**Mean($\bar{x}$):** $\bar{x} = \frac{\sum_{x_{i}}}{n}$
**Control limits for normal data (*I*):** 2.66$MR_{bar}$
*where*
$MR_{bar}$ = average moving range of *x*s, excluding those > 3.27$MR_{bar}$
<br/>
*Lab results turnaround time*
```{r imrex, fig.height = 3.5}
# Generate sample data
arrival <- cumsum(rexp(24, 1/10))
process <- rnorm(24,5)
exit <- matrix( , length(arrival))
exit[1] <- arrival[1] + process[1]
for (i in 1:length(arrival)) {
exit[i] <- max(arrival[i], exit[i-1]) + process[i]
}
# Calculate control chart inputs
subgroup.i <- seq(1,length(exit))
subgroup.mr <- seq(1,length(exit)-1)
point.i <- exit - arrival
point.mr <- matrix(, length(point.i) - 1)
for (i in 1:length(point.i) - 1) {
point.mr[i] <- abs(point.i[i+1] - point.i[i])
}
mean.i <- mean(point.i)
mean.mr <- mean(point.mr)
sigma.i <- rep(mean.mr, length(subgroup.i))
sigma.mr <- rep(mean.mr, length(subgroup.mr))
# Plot MR chart
spc.plot(subgroup.mr, point.mr, mean.mr, sigma.mr, k = 3.27,
lcl.show = FALSE, band.show = FALSE,
label.x = "Test number", label.y = "Turnaround time (moving range)")
# Plot I chart
spc.plot(subgroup.i, point.i, mean.i, sigma.i, k = 2.66,
lcl.min = 0, band.show = FALSE,
label.x = "Test number", label.y = "Turnaround time")
```
<br/>
#### $\bar{x}$ and *s* chart example
Control limits (3σ) are calculated as follows:
**Variable averages ($\bar{x}$):** $3\frac{\bar{s}}{\sqrt{n_i}}$
**Variable standard deviation (*s*):** $3\bar{s}\sqrt{1-c_4^2}$
*where* $c_4 = \sqrt{\frac{2}{n-1}}\frac{\Gamma(\frac{n}{2})}{\Gamma(\frac{n-1}{2})}$
<br/>
*Patient wait times*
``` {r xex, fig.height = 3.5}
set.seed(777)
# Generate sample data
waits <- c(rnorm(1700, 30, 5), rnorm(650, 29.5, 5))
months <- strftime(sort(as.Date('2013-10-01') + sample(0:729, length(waits), TRUE)), "%Y-%m-01")
sample.n <- as.numeric(table(months))
dfw <- data.frame(months, waits)
# Calculate control chart inputs
subgroup.x <- as.Date(unique(months))
subgroup.s <- subgroup.x
point.x <- aggregate(dfw$waits, by = list(months), FUN=mean, na.rm = TRUE)$x
point.s <- aggregate(dfw$waits, by = list(months), FUN=sd, na.rm = TRUE)$x
mean.x <- mean(waits)
mean.s <- sqrt(sum((sample.n - 1)*point.s^2) / (sum(sample.n) - length(sample.n)))
sigma.x <- mean.s / sqrt(sample.n)
c4 <- sqrt(2 / (sample.n - 1)) * gamma(sample.n / 2) / gamma((sample.n - 1)/2)
sigma.s <- mean.s * sqrt(1 - c4^2)
# Plot s chart
spc.plot(subgroup.s, point.s, mean.s, sigma.s, k = 3,
label.x = "Month", label.y = "Wait times standard deviation (s)")
# Plot xbar chart
spc.plot(subgroup.x, point.x, mean.x, sigma.x, k = 3,
label.x = "Month", label.y = "Wait times average (x)")
```
<br/>
#### EWMA chart example
**Control limits for exponentially weighted moving average (EWMA):** $3\frac{\bar{s}}{\sqrt{n_i}}\sqrt{\frac{\lambda}{2-\lambda}[1 - (1 - \lambda)^{2i}]}$
*where* $\lambda$ is a weight that determines the influence of past observations. If unsure choose $\lambda = 0.2$, but $0.05 \leq \lambda \leq 0.3$ is acceptable (where larger values give stronger weights to past observations).
<br/>
*Patient wait times (continued)*
``` {r ewmaex, fig.height = 3.5}
# Use generated sample data from xbar chart example
# Calculate control chart inputs
subgroup.z <- subgroup.x
lambda = 0.2
point.z <- matrix( , length(point.x))
point.z[1] <- mean.x
for (i in 2:length(point.z)) {
point.z[i] = lambda * point.x[i] + (1 - lambda) * point.z[i-1]
}
mean.z <- mean.x
sigma.z <- (mean.s / sqrt(sample.n)) *
sqrt(lambda/(2-lambda) * (1 - (1-lambda)^(seq(1:length(point.z)))))
# Plot EWMA chart
spc.plot(subgroup.z, point.z, mean.z, sigma.z, k = 3, band.show = FALSE, rule.show = FALSE,
label.x = "Month", label.y = "Wait times moving average")
```
<br/>
#### CUSUM example
Lower and upper cumulative sums are calculated as follows:
$S_{l,i} = -\max{[0, -z_i -k + S_{l,i-1}]},$
$S_{h,i} = \max{[0, z_i -k + S_{h,i-1}]}$
*where* $z_i$ is the standardized normal score for subgroup $i$ and $0.5 \leq k \leq 1$ is a slack value.
It is common to choose "decision limits" of $\pm 4$ or $\pm 5$.
<br/>
``` {r cusumex, fig.height = 3.5}
# Use generated sample data from xbar chart example
# Calculate control chart inputs
subgroup.cusum <- subgroup.x
slack = 0.5
zscore <- (point.x - mean.x)/sigma.x
point.cusuml <- matrix(, length(zscore))
point.cusuml[1] <- -max(0, -zscore[1] - slack)
for (i in 2:length(point.cusuml)) {
point.cusuml[i] <- -max(0, -zscore[i] - slack - point.cusuml[i-1])
}
point.cusumh <- matrix(, length(zscore))
point.cusumh[1] <- max(0, zscore[1] - slack)
for (i in 2:length(point.cusuml)) {
point.cusumh[i] <- max(0, zscore[i] - slack - point.cusumh[i-1])
}
mean.cusum <- 0
sigma.cusum <- rep(1, length(subgroup.cusum))
# Plot CUSUM chart
lower.plot <- spc.plot(subgroup.cusum, point.cusuml, mean.cusum, sigma.cusum, k = 5,
band.show = FALSE, rule.show = FALSE, label.y = "Cumulative sum")
lower.plot + geom_line(aes(y = point.cusumh), col = "royalblue3") +
geom_point(aes(y = point.cusumh), col = "royalblue3")
```
<br/>
<br/>
<hr/>
## Testing assumptions {#testassumptions}
### Trending {#trending}
You can test whether a process is trending first by eye: does it look like it's trending over a large span of the time series? Then it probably is. More technically, you can run a simple linear regression---if the coefficient of the slope is significant, your data is trending.
Because trends can sometimes be an indication of special cause variation in a stable process, control limits don't make sense around long-trending data, and calculation of center lines and control limits will be incorrect. Thus, tests for special causes other than trending will also be invalid.
Use a run chart with an appropriate median instead (e.g., via quantile regression), or evaluate it as you would any regression, via residual diagnostics.
Non-linear change is a special case of trending data, and isn't necessarily autocorrelated, though it can be in cases like seasonality. When it *isn't* autocorrelated, advanced techniques like Generalized Additive Models (GAM) can be used in place of a control chart to provide limits that can assist decision-making. If you were using a control chart when the data began to show trending, restart control limits after the trend has stabilized or switch to using a GAM.
<p align="right"><small>[*back to top*](#top)</small></p>
### Independence {#independence}
For either run or control charts, the data points must be independent for the guidelines to be effective. The first test of that is conceptual---do you expect that one value in this series will influence a subsequent value? For example, the incidence of some hospital-acquired infections can be the result of previous infections---say, one happens at the end of March, and another happens at the start of April, and were caused by the same organism, you might suspect that the monthly values would not be independent.
A second test is by calculating the autocorrelation function for the time series. Autocorrelation values over 0.50 generally indicate problems. However, any significant autocorrelation should be considered carefully relative to the cost of potential false positive or false negative signals.
When data are autocorrelated, control limits from the above chart types will be *too small*---and thus an increase in *false* signals of special causes should be expected. In addition, none of the tests for special causes remain valid.
Sometimes, autocorrelation can be removed by changing the sampling or metric's time step: for example, you generally wouldn't expect hospital acquired infection rates in one quarter to influence those in the subsequent quarter.
If you can't change the sampling rate or time step, you shouldn't use either run or control charts, and instead use a standard line chart. If you must have limits to help guide decision-making, you'll need a more advanced technique, such as a Generalized Additive Mixed Model (GAMM) or time series models such as ARIMA.
<p align="right"><small>[*back to top*](#top)</small></p>
### What happens when you get the mean-variance relationship wrong {#meanvar}
Although control charts can sometimes work when you misspecify the mean-variance relationship (they are "robust" to some assumption violations), you won't know unless you explore the differences in implications between the data as-is and that same data transformed to become more in line with the appropriate or expected distribution.
For example, if you use standard normal control limits and sd values an *I* chart on exponentially-distributed data, you get something like this:
```{r skewy, fig.height=3.5}
set.seed(290)
df2 = data.frame(x = seq(1:120), y = 17+rexp(120))
# df_sum = data.frame(ymean=mean(df2$y), lcl=mean(df2$y)-(3*sd(df2$y)), ucl=mean(df2$y)+(3*sd(df2$y)), l1sd=mean(df2$y)-(sd(df2$y)), l2sd=mean(df2$y)-(2*sd(df2$y)), u1sd=mean(df2$y)+(sd(df2$y)), u2sd=mean(df2$y)+(2*sd(df2$y)))
exp_nat_var_cc_plot = ggplot(df2, aes(x, y)) +
ylim(14.75, 24.75) +
geom_hline(aes(yintercept=17.88), color="gray", size=1) +
geom_hline(aes(yintercept=20.87), color="red") +
geom_hline(aes(yintercept=14.88), color="red") +
geom_ribbon(aes(ymin = 18.87, ymax = 19.87), alpha = 0.2) +
geom_ribbon(aes(ymin = 15.88, ymax = 16.88), alpha = 0.2) +
xlab("Subgroup") +
ylab("Value") +
geom_line() +
theme_bw()
ggMarginal(exp_nat_var_cc_plot, margins="y", type = "histogram", binwidth=0.25)
# Transformation and adjustment for plotting easier
bob = data.frame(MASS::boxcox(df2$y ~ 1, lambda=seq(-20, 5, 0.5), plotit=F))
bobmax = bob[which.max(bob[,2]),1]
df2$y2 = (df2$y ^ bobmax) * 10^19
# df_sum = data.frame(ymean=mean(df2$y2), lcl=mean(df2$y2)-(3*sd(df2$y2)), ucl=mean(df2$y2)+(3*sd(df2$y2)), l1sd=mean(df2$y2)-(sd(df2$y2)), l2sd=mean(df2$y2)-(2*sd(df2$y2)), u1sd=mean(df2$y2)+(sd(df2$y2)), u2sd=mean(df2$y)+(2*sd(df2$y)))
exp_xform_nat_var_cc_plot = ggplot(df2, aes(x, y2)) +
ylim(-0.06, 0.31) +
geom_hline(aes(yintercept=0.128), color="gray", size=1) +
geom_hline(aes(yintercept=0.302), color="red") +
geom_hline(aes(yintercept=-0.046), color="red") +
geom_ribbon(aes(ymin = 0.186, ymax = 0.244), alpha = 0.2) +
geom_ribbon(aes(ymin = 0.012, ymax = 0.070), alpha = 0.2) +
xlab("Subgroup") +
ylab("Transformed Value") +
geom_line() +
theme_bw()
```
Clearly something is weird when no points even go below 1 standard deviation. But more importantly, do the points above the upper control limit represent *real* anomalous data points, or are they the result of an improper mean-variance relationship?
Using a Box-Cox transformation (not shown) to make the distribution more symmetrical, we can see that those seemingly out-of-control points are actually well within both control limits, and the variation we see is more in line with (statistical) expectation.
```{r unskewy, fig.height=3.5}
ggMarginal(exp_xform_nat_var_cc_plot, margins="y", type = "histogram", binwidth=0.025)
```
<p align="right"><small>[*back to top*](#top)</small></p>
### When to revise control limits {#reviselimits}
If you need to determine whether an intervention might have worked soon after or even during the improvement process, you shouldn't be using a standard control chart at all. Use a run chart or an EWMA or CUSUM chart to try to detect early shifts.
When you have enough data points after the intervention (12-20), with no other changes to the process, you can "freeze" the median, and/or mean+control limits at the intervention point and recalculate the median and/or mean+limits on the subsequent data. However, by doing so you are *already assuming* that the intervention changed the process. Sometimes that evidence will be there, such as through a variety of signals of special cause variation; other times, that evidence will not be there. If there is no evidence of special cause variation for a while after the intervention, you shouldn't freeze/restart the SPC chart values.
SPC charts are blunt instruments, and are meant to detect obvious *step* changes in a process as simply as possible. Where there is no evidence in SPC charts for a change, more advanced techniques---such as ARIMA models or intervention/changepoint analysis---can be used to assess whether there was a change in the statistical process at or near the intervention point.
Use common sense and avoid the urge to change medians or means+limits for every intervention unless evidence is clear that it worked.
<p align="right"><small>[*back to top*](#top)</small></p>
## Useful References {#useful}
- For more information, a good overview of run charts can be found in Perla et al. 2011, [*The run chart: a simple analytical tool for learning from variation in healthcare processes*](http://www.med.unc.edu/cce/files/education-training/The%20run%20chart%20a%20simple%20analytical%20tool.pdf), BMJ Quality & Safety 20:46-51.
- A straight-to-the-point reference/tool for doing run charts in R is Anhøj 2016, [*Run charts with R*](https://cran.r-project.org/web/packages/qicharts/vignettes/runcharts.html).
- Some good overview papers on control charts include Benneyan et al. 2003, [*Statistical process control as a tool for research and healthcare improvement*](http://qualitysafety.bmj.com/content/12/6/458.full.pdf), BMJ Quality & Safety 12:458-464, and Mohammed et al. 2008, [*Plotting basic control charts: tutorial notes for healthcare practitioners*](https://www.researchgate.net/profile/William_Woodall/publication/5468089_Plotting_control_charts_Tutorial_notes_for_healthcare_practitioners/links/00b49521d1165f1f49000000.pdf), BMJ Quality & Safety 17:137-145. [Wheeler 2010](http://www.qualitydigest.com/inside/quality-insider-column/individual-charts-done-right-and-wrong.html) covers why you shouldn't use 3*sd* for control limits in *I* charts.
- A straight-to-the-point reference/tool for doing control charts in R is Anhøj 2016, [*Control Charts with qicharts for R*](https://cran.r-project.org/web/packages/qicharts/vignettes/controlcharts.html).
- A good basic overview book is Carey and Lloyd 2001, [*Measuring Quality Improvement in Healthcare*](https://www.amazon.com/Measuring-Quality-Improvement-Healthcare-Applications/dp/0527762938/), American Society for Quality.
- A good book that covers both basic and advanced topics is Provost and Murray 2011, [*The Health Care Data Guide*](https://www.amazon.com/Health-Care-Data-Guide-Improvement/dp/0470902582/), Jossey-Bass.
- The papers that discuss the uselessness of the trend test in run and control charts include Davis & Woodall 1988, [*Performance of the control chart trend rule under linear shift*](http://asq.org/qic/display-item/index.pl?item=5597), Journal of Quality Technology 20:260-262, and Anhøj & Olesen 2014, [*Run charts revisited: A simulation study of run chart rules for detection of non-random variation in health care processes*](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0113825), PLOS One 9(11): e113825.
- Finally, some important warnings about when control charts fail (and a useful alternative, GAMs) can be found in Morton et al. 2009, [*Hospital adverse events and control charts: the need for a new paradigm*](http://www.journalofhospitalinfection.com/article/S0195-6701(09)00340-5/abstract), Journal of Hospital Infection 73(3):225–231, as well as in Morton et al. 2007, [*New control chart methods for monitoring MROs in Hospitals*](https://www.researchgate.net/profile/Edward_Tong2/publication/43477704_New_control_chart_methods_for_monitoring_MROs_in_hospitals/links/5600d22e08aec948c4fa93cd.pdf), Australian Infection Control 12(1):14-18.
- Wikipedia is a good place to start learning about probability distributions and their mean-variance relationships, e.g.,
- [Poisson](https://en.wikipedia.org/wiki/Poisson_distribution)
- [binomial](https://en.wikipedia.org/wiki/Binomial_distribution)
- [normal](https://en.wikipedia.org/wiki/Normal_distribution)
- [geometric](https://en.wikipedia.org/wiki/Geometric_distribution)
- [Weibull](https://en.wikipedia.org/wiki/Weibull_distribution)
- [A gallery of distributions (NIST)](http://www.itl.nist.gov/div898/handbook/eda/section3/eda366.htm)
- [Common probability distributions: the data scientist’s crib sheet (Cloudera)](http://blog.cloudera.com/blog/2015/12/common-probability-distributions-the-data-scientists-crib-sheet/)
<p align="right"><small>[*back to top*](#top)</small></p>
<br/>
<br/>
<hr/>
## Appendix 1: An R function for SPC plots {#appendix1}
The R packages `qcc` and `qicharts` can create ready-to-go control charts from a single line of code. However, neither provides a combination of simplicity and visual cues of special cause variation, so we've written our own function. This function created the graphs in the [examples section](#part2), above.
```{r spccode_display, echo=TRUE, eval=FALSE}
require(ggplot2)
spc.plot <- function(subgroup, point, mean, sigma, k = 3,
ucl.show = TRUE, lcl.show = TRUE,
band.show = TRUE, rule.show = TRUE,
ucl.max = Inf, lcl.min = -Inf,
label.x = "Subgroup", label.y = "Value")
{
# Plots control chart with ggplot
#
# Args:
# subgroup: Subgroup definition (for x-axis)
# point: Subgroup sample values (for y-axis)
# mean: Process mean value (for center line)
# sigma: Process variation value (for control limits)
# k: Specification for k-sigma limits above and below center line.
# Default is 3.
# ucl.show: Visible upper control limit? Default is true.
# lcl.show: Visible lower control limit? Default is true.
# band.show: Visible bands between 1-2 sigma limits? Default is true.
# rule.show: Highlight run rule indicators in orange? Default is true.
# ucl.max: Maximum feasible value for upper control limit.
# lcl.min: Minimum feasible value for lower control limit.
# label.x: Specify x-axis label.
# label.y: Specify y-axis label.
df <- data.frame(subgroup, point)
df$ucl <- pmin(ucl.max, mean + k*sigma)
df$lcl <- pmax(lcl.min, mean - k*sigma)
warn.points <- function(rule, num, den) {
sets <- mapply(seq, 1:(length(subgroup) - (den - 1)),
den:length(subgroup))
hits <- apply(sets, 2, function(x) sum(rule[x])) >= num
intersect(c(sets[,hits]), which(rule))
}
orange.sigma <- numeric()
p <- ggplot(data = df, aes(x = subgroup)) +
geom_hline(yintercept = mean, col = "gray", size = 1)
if (ucl.show) {
p <- p + geom_line(aes(y = ucl), col = "gray", size = 1)
}
if (lcl.show) {
p <- p + geom_line(aes(y = lcl), col = "gray", size = 1)
}
if (band.show) {
p <- p +
geom_ribbon(aes(ymin = mean + sigma,
ymax = mean + 2*sigma), alpha = 0.1) +
geom_ribbon(aes(ymin = pmax(lcl.min, mean - 2*sigma),
ymax = mean - sigma), alpha = 0.1)
orange.sigma <- unique(c(
warn.points(point > mean + sigma, 4, 5),
warn.points(point < mean - sigma, 4, 5),
warn.points(point > mean + 2*sigma, 2, 3),
warn.points(point < mean - 2*sigma, 2, 3)
))
}
df$warn <- "blue"
if (rule.show) {
shift.n <- round(log(sum(point!=mean), 2) + 3)
orange <- unique(c(orange.sigma,
warn.points(point > mean - sigma & point < mean + sigma, 15, 15),
warn.points(point > mean, shift.n, shift.n),
warn.points(point < mean, shift.n, shift.n)))
df$warn[orange] <- "orange"
}
df$warn[point > df$ucl | point < df$lcl] <- "red"
p +
geom_line(aes(y = point), col = "royalblue3") +
geom_point(data = df, aes(x = subgroup, y = point, col = warn)) +
scale_color_manual(values = c("blue" = "royalblue3", "orange" = "orangered", "red" = "red3"), guide = FALSE) +
labs(x = label.x, y = label.y) +
theme_bw()
}
#### Example
# Generate sample data
linedays <- sample(1000:2000,24)
infections <- rpois(24, 4)
dates <- seq(as.Date("2013/10/1"), by = "month", length.out = 24)
# Calculate u chart inputs
subgroup.u <- dates
point.u <- infections / linedays * 1000
mean.u <- sum(infections) / sum(linedays) * 1000
sigma.u <- sqrt(mean.u / linedays * 1000)
# Plot u chart
spc.plot(subgroup.u, point.u, mean.u, sigma.u, k = 3, lcl.min = 0,
label.x = "Month", label.y = "Infections per 1000 line days")
```
<p align="right"><small>[*back to top*](#top)</small></p>
##### End of File #####