-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlt Ed 3 - Follow-up dataset construction.sql
1887 lines (1722 loc) · 58.4 KB
/
Alt Ed 3 - Follow-up dataset construction.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
Alt ed evaluation
This code takes the sandpit tables generated by the R PSM process as inputs, and adds on later life outcomes for each person in our sample.
The result is a series of panel tables saved back to the sandpit, which are then analysed in the fourth piece of R code.
Note that this references tables created in steps 1 and 2 (some sandpit tables and some temp tables saved as '##name'), and so those pieces of code need to be run first.
*/
/***********************************************************************************************************
***************************** Stuart McNaughton robustness
***********************************************************************************************************/
/* This is a method for robustness suggested by Stuart McNaughton (education Chief Scientific Advisor).
We might be unintentionally pulling our comparison group from a pool of people who are systematically
less disadvantaged than the treatment group. Ther logic of this method is to make another draw and observe
outcomes for all three groups (treatment T, control draw 1 C1 and control draw 2 C2):
-If C1 outcomes are equivalent to C2 outcomes (and so implied effect sizes are the same), this would point towards
both C1 and C2 being robust comparisons - we should expect no difference between C1 and C2, so if we find no difference,
this gives us a good placebo test.
-If C2 outcomes are systematically better than C1 outcomes, then there is evidence of omitted variable bias between
C1 and C2, and so we should expect there to also be omitted variable bias between C1 and T.
*/
/* Finding all the people who are in (any of) the control groups */
drop table if exists #c1_controls
select distinct snz_uid into #c1_controls from
(select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_output] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_output] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_output] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_output] where alted_during=0) a
/* Reconstructing tables for matching that exclude all of the C1 students */
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_sm_robust]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_sm_robust]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_input] a
left join #c1_controls b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_sm_robust]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_sm_robust]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_input] a
left join #c1_controls b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_sm_robust]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_sm_robust]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_input] a
left join #c1_controls b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_sm_robust]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_sm_robust]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_input] a
left join #c1_controls b on a.snz_uid=b.snz_uid
where b.snz_uid is null
/* Repeat process for attendance matching process */
drop table if exists #c1_controls_attendance
select distinct snz_uid into #c1_controls_attendance from
(select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_output_attendance] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_output_attendance] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_output_attendance] where alted_during=0
union
select * from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_output_attendance] where alted_during=0) a
/* Reconstructing tables for matching that exclude all of the C1 students */
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_sm_attendance]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_sm_attendance]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_input] a
left join #c1_controls_attendance b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_sm_attendance]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_sm_attendance]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_input] a
left join #c1_controls_attendance b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_sm_attendance]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_sm_attendance]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_input] a
left join #c1_controls_attendance b on a.snz_uid=b.snz_uid
where b.snz_uid is null
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_sm_attendance]
select a.*
into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_sm_attendance]
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_input] a
left join #c1_controls_attendance b on a.snz_uid=b.snz_uid
where b.snz_uid is null
/***********************************************************************************************************
***************************** Construct follow-up tables
***********************************************************************************************************/
/* Create structure for follow up table - by person by year, for each year of life 17-30
NB many of these rows will be in the future - will truncate later */
IF OBJECT_ID('tempdb..#pop_age_blank') IS NOT NULL DROP TABLE #pop_age_blank
create table #pop_age_blank (
snz_uid int
,birth_year int
,age int
,year int)
GO
CREATE PROCEDURE #construct
AS
BEGIN
declare @start_age int =17
declare @end_age int=30
while @start_age <= @end_age
begin
insert into #pop_age_blank(snz_uid,birth_year,age,year)
select a.snz_uid
, a.birth_year
, @start_age as age
, @start_age+a.birth_year as year
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_pop] a
order by a.snz_uid
-- back to month loop
set @start_age =@start_age +1
print @start_age
-- back to year loop
end
/* end of loop*/
END
GO
EXECUTE #construct
GO
DROP PROCEDURE #construct
GO
/* Extract school leavers data with row number so I can later take the latest entry per student */
drop table if exists #leavers
select snz_uid
, moe_sl_leaver_year as school_leaving_year
, moe_sl_leaver_age as school_leaving_age
, moe_sl_leaving_yr_lvl
, moe_sl_provider_code as school_leaving_providerid
, moe_sl_highest_attain_code as school_leaving_attainment
, moe_sl_ue_entrance_code as ue
, row_number() over(partition by snz_uid order by moe_sl_leaver_year desc) as rn
into #leavers
from [IDI_Clean_202210].[moe_clean].[student_leavers]
/* Identify last school enrolment per person so we can top up school leaver data where missing, if necessary */
drop table if exists #last_enrolment_spell
select a.*
, b.ReasonForLeavingDescription as leave_reason
, c.PostSchoolActivityDescription as post_school_activity
into #last_enrolment_spell
from ##school_spells a
left join [IDI_Metadata_202210].[moe_school].[rsn_for_leaving_code] b
on a.moe_esi_leave_rsn_code=b.ReasonForLeavingId
left join [IDI_Metadata_202210].[moe_school].[postschool_activity_code] c
on a.moe_esi_post_sch_act_code=c.PostSchoolActivityId
where a.rn_reversed=1
drop table if exists #followup_1
select a.*
, case when b.death_year is not null then 1 else 0 end as died
, case when c.snz_uid is not null then 1 else 0 end as in_nz
into #followup_1
from #pop_age_blank a
left join [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_pop] b
on a.snz_uid=b.snz_uid and b.death_year<=a.year
left join [IDI_Clean_202210].[data].[apc_time_series] c
on a.snz_uid=c.snz_uid and a.year=c.apc_ref_year_nbr
drop table if exists #followup_2
select a.snz_uid
, a.year
, d.school_leaving_year
, d.school_leaving_age
, d.school_leaving_providerid
, d.school_leaving_attainment
, d.ue
, year(p.moe_esi_end_date) as enrol_school_leaving_year
, p.leave_reason as enrol_leave_reason
, p.post_school_activity as enrol_post_school_activity
, p.moe_esi_provider_code as enrol_providerid
into #followup_2
from #pop_age_blank a
left join (select * from #leavers where rn=1) d
on a.snz_uid=d.snz_uid and d.school_leaving_year<=a.year
left join #last_enrolment_spell p
on a.snz_uid=p.snz_uid and year(p.moe_esi_end_date)<=a.year
select year, case when school_leaving_year is null then 0 else 1 end as leavers, case when enrol_school_leaving_year is null then 0 else 1 end as enrol, count(distinct snz_uid)
from #followup_2
group by year, case when school_leaving_year is null then 0 else 1 end, case when enrol_school_leaving_year is null then 0 else 1 end
drop table if exists #followup_3
select a.snz_uid
, a.year
, max(case when e.snz_uid is not null then 1 else 0 end) as tertiary_enrolment
, max(e.moe_enr_provider_code) as tertiary_providerid
, max(case when f.snz_uid is not null then 1 else 0 end) as ito_enrolment
, max(f.moe_itl_ito_edumis_id_code) as ito_providerid
, max(g.qual) as max_qual_tertiary
into #followup_3
from #pop_age_blank a
left join [IDI_Clean_202210].[moe_clean].[enrolment] e -- David Earle @ MoE advises this is the best table for tertiary enrolments (non industry training)
on a.snz_uid=e.snz_uid and a.year=e.moe_enr_year_nbr
left join [IDI_Clean_202210].[moe_clean].[tec_it_learner] f -- Adding on industry training
on a.snz_uid=f.snz_uid and year(f.moe_itl_start_date)<=a.year and year(f.moe_itl_end_date)>=a.year
left join ##parent_quals g
on a.snz_uid=g.snz_uid and g.year<=a.year
where e.moe_enr_qual_type_code='D' --Specifying this enrolment type takes out learning that does not lead to formal quals, as advised by David Earle @ MoE
group by a.snz_uid, a.year
drop table if exists #followup_4
select a.snz_uid
, a.year
, cast(h.[inc_cal_yr_sum_all_srces_tot_amt]/h.cpi_2017*1000 as bigint) as income_total
, cast(h.[inc_cal_yr_sum_WAS_tot_amt]/h.cpi_2017*1000 as bigint) as income_wages
, cast(h.[inc_cal_yr_sum_BEN_tot_amt]/h.cpi_2017*1000 as bigint) as income_benefit
into #followup_4
from #pop_age_blank a
left join #parent_income h
on a.snz_uid=h.snz_uid and a.year=h.year
drop table if exists #followup_5
select a.snz_uid
, a.year
, max(case when i.snz_uid is not null then 1 else 0 end) as offender_any
, max(case when left(i.[pol_poo_anzsoc_offence_code],2) in ('01','02','03','04','05','06') then 1 else 0 end) as offender_violent
, max(case when j.snz_uid is not null then 1 else 0 end) as victim_any
, max(case when left(j.[pol_pov_anzsoc_offence_code],2) in ('01','02','03','04','05','06') then 1 else 0 end) as victim_violent
, max(case when k.cor_rommp_directive_type in ('Imprisonment','Remand','Home Detention') then 1 else 0 end) as custodial_sentence
, max(case when k.cor_rommp_directive_type in ('Extended Supervision Order','Parole','Released on Conditions','Post Detention Conditions','Intensive Supervision'
,'Community Detention','Supervision','Community Work','Periodic Detention','Community Programme','Community Service'
,'Electronically Monitored Bail','Extended Supervision Order (Interim)','Returning Offender Order') then 1 else 0 end) as community_sentence
into #followup_5
from #pop_age_blank a
left join [IDI_Clean_202210].[pol_clean].[post_count_offenders] i
on a.snz_uid=i.snz_uid and i.pol_poo_year_nbr=a.year
left join [IDI_Clean_202210].[pol_clean].[post_count_victimisations] j
on a.snz_uid=j.snz_uid and j.pol_pov_year_nbr=a.year
left join [IDI_Clean_202210].[cor_clean].[ra_ofndr_major_mgmt_period_a] k
on a.snz_uid=k.snz_uid and year(k.cor_rommp_period_start_date)<=a.year and year(k.cor_rommp_period_end_date)>=a.year
group by a.snz_uid, a.year
drop table if exists #followup_6
select a.snz_uid
, a.year
, l.pho as pho_enrolment
, m.ed as ed_admissions_n
, n.gpc as gp_contacts_n
, o.ash as ash_n
into #followup_6
from #pop_age_blank a
left join [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_pho_enrolment] l
on a.snz_uid=l.snz_uid and a.year=l.year
left join [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_ed_attendance] m
on a.snz_uid=m.snz_uid and a.year=m.year
left join [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_gpc] n
on a.snz_uid=n.snz_uid and a.year=n.year
left join [IDI_Sandpit].[DL-MAA2021-60].ae_202210_ash_hosps o
on a.snz_uid=o.snz_uid and a.year=o.year
/* Identifying the school enrolled in at age 15. This is so we can split treatment effects by region */
drop table if exists #school_at_15
select a.snz_uid
, max(b.rn) as enrol_rn
, min(c.srr_providernumber) as srr_providernumber
into #school_at_15
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_pop] a
left join ##school_spells b
on a.snz_uid=b.snz_uid and year(b.moe_esi_start_date)<=a.birth_year+15 and year(b.moe_esi_end_date)>=a.birth_year+15
left join ##srr c
on a.snz_moe_uid=c.snz_moe_uid and c.year=a.birth_year+15
group by a.snz_uid
drop table if exists #school_at_15_2
select a.snz_uid
, case when a.srr_providernumber is not null then a.srr_providernumber
when a.enrol_rn is not null then b.moe_esi_provider_code
else null end as provider_at_15
into #school_at_15_2
from #school_at_15 a
left join ##school_spells b
on a.snz_uid=b.snz_uid and a.enrol_rn=b.rn
drop table if exists #school_at_15_3
select a.snz_uid
, a.provider_at_15
, b.providertypeid as provider_at_15_typeid
, c.ProviderType as provider_at_15_type
, b.decilecode as provider_at_15_decile
, b.schoolregionid as provider_at_15_regionid
, d.SchoolRegion as provider_at_15_region
into #school_at_15_3
from #school_at_15_2 a
left join [IDI_Metadata].[clean_read_CLASSIFICATIONS].[moe_Provider_Profile_20190830] b
on a.provider_at_15=b.ProviderNumber
left join [IDI_Metadata_202210].[moe_school].[provider_type_code] c
on b.ProviderTypeId=c.ProviderTypeId
left join [IDI_Metadata_202210].[moe_school].[sch_region_code] d
on b.SchoolRegionId=d.SchoolRegionID
drop table if exists #followup_combined
select a.*
, f.provider_at_15
, f.provider_at_15_type
, f.provider_at_15_decile
, f.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, case when b.school_leaving_attainment in (43,40) then 4
when b.school_leaving_attainment in (37,36,35,34,33,62,72,82,92) then 3
when b.school_leaving_attainment in (56,27,26,25,24,4,61,71,81,91) then 2
when b.school_leaving_attainment in (55,17,16,15,14,13,60,70,80,90) then 1
when b.school_leaving_attainment is null then null
else 0 end as ncea_equivalent
, case when b.school_leaving_attainment in (43,40,37,36,35,34,33,62,72,82,92,56,27,26,25,24,4,61,71,81,91,55,17,16,15,14,13,60,70,80,90) then 1
when b.school_leaving_attainment is null then null
else 0 end as ncea_1
, case when b.school_leaving_attainment in (43,40,37,36,35,34,33,62,72,82,92,56,27,26,25,24,4,61,71,81,91) then 1
when b.school_leaving_attainment is null then null
else 0 end as ncea_2
, case when b.school_leaving_attainment in (43,40,37,36,35,34,33,62,72,82,92) then 1
when b.school_leaving_attainment is null then null
else 0 end as ncea_3
, case when b.ue='Y' then 1
when b.ue='N' then 0 else null end as ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, case when b.school_leaving_year is null then
case when b.enrol_school_leaving_year is null then 1 else 0 end else 0 end as at_school
, case when b.school_leaving_year is null then b.enrol_school_leaving_year else b.school_leaving_year end as combined_school_leaving_year
, case when b.school_leaving_year is null then b.enrol_providerid else b.school_leaving_providerid end as combined_providerid
, c.tertiary_enrolment
, c.tertiary_providerid
, c.ito_enrolment
, c.ito_providerid
, case when c.tertiary_enrolment=1 or c.ito_enrolment=1 then 1 else 0 end as any_tertiary_enrolment
, case when c.tertiary_enrolment=1 then c.tertiary_providerid else c.ito_providerid end as any_tertiary_providerid
, c.max_qual_tertiary
, d.income_benefit
, d.income_wages
, d.income_total
, case when d.income_wages is not null and d.income_wages>0 then 1 else 0 end as any_wages
, case when d.income_benefit is not null and d.income_benefit>0 then 1 else 0 end as any_benefit
, e.offender_any
, e.offender_violent
, e.victim_any
, e.victim_violent
, e.community_sentence
, e.custodial_sentence
, g.pho_enrolment
, g.ed_admissions_n
, g.gp_contacts_n
, g.ash_n
into #followup_combined
from #followup_1 a
left join #followup_2 b
on a.snz_uid=b.snz_uid and a.year=b.year
left join #followup_3 c
on a.snz_uid=c.snz_uid and a.year=c.year
left join #followup_4 d
on a.snz_uid=d.snz_uid and a.year=d.year
left join #followup_5 e
on a.snz_uid=e.snz_uid and a.year=e.year
left join #school_at_15_3 f
on a.snz_uid=f.snz_uid
left join #followup_6 g
on a.snz_uid=g.snz_uid and a.year=g.year
where a.year<=2021 and a.year>=2009
/* Make sure within each year, there are not duplicate learners */
select year, count(snz_uid) as n_rows, count(distinct snz_uid) as n_learners from #followup_combined
group by year order by year
/* Save follow-up table (for total population) to sandpit to allow for comparison to the whole population */
drop table if exists [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_followup_totalpop]
select * into [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_followup_totalpop] from #followup_combined
/***********************************************************************************************************
***************************** Subset tables and save for matched learners only
***********************************************************************************************************/
drop table if exists #followup_matched_12
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, case when b.provider_at_15_region is null then 'Unknown' else b.provider_at_15_region end as provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_12
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_output] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_13
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, case when b.provider_at_15_region is null then 'Unknown' else b.provider_at_15_region end as provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_13
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_output] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_14
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, case when b.provider_at_15_region is null then 'Unknown' else b.provider_at_15_region end as provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_14
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_output] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_15
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, case when b.provider_at_15_region is null then 'Unknown' else b.provider_at_15_region end as provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_15
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_output] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
/* Combining follow up tables
Note that because some learners are shared controls between multiple models, they will show up several times in the combined table */
drop table if exists #followup_matched_combined
select * into #followup_matched_combined
from
(select 'Age 12' as matching_model, * from #followup_matched_12
union
select 'Age 13' as matching_model, * from #followup_matched_13
union
select 'Age 14' as matching_model, * from #followup_matched_14
union
select 'Age 15' as matching_model, * from #followup_matched_15) a
/* Repeat process for sensitivity analysis datasets - _attendance and _exact
See R code for explanations of the differences between these */
drop table if exists #followup_matched_12_attendance
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, b.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_12_attendance
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_output_attendance] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_13_attendance
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, b.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_13_attendance
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age13_output_attendance] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_14_attendance
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, b.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_14_attendance
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age14_output_attendance] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_15_attendance
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, b.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_15_attendance
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age15_output_attendance] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid
drop table if exists #followup_matched_combined_attendance
select * into #followup_matched_combined_attendance
from
(select 'Age 12' as matching_model, * from #followup_matched_12_attendance
union
select 'Age 13' as matching_model, * from #followup_matched_13_attendance
union
select 'Age 14' as matching_model, * from #followup_matched_14_attendance
union
select 'Age 15' as matching_model, * from #followup_matched_15_attendance) a
drop table if exists #followup_matched_12_exact
select a.*
, case when a.distance<0.1 then 'phat lt 0.1'
when a.distance>=0.1 and a.distance<0.3 then 'phat 0.1_0.3'
when a.distance>=0.3 and a.distance<0.5 then 'phat 0.3_0.5'
when a.distance>=0.5 and a.distance<0.7 then 'phat 0.5_0.7'
when a.distance>=0.7 then 'phat gt 0.7'
else 'Unknown' end as propensity_bin
, b.year
, b.age
, b.died
, b.in_nz
, b.provider_at_15
, b.provider_at_15_type
, b.provider_at_15_decile
, b.provider_at_15_region
, b.school_leaving_year
, b.school_leaving_age
, b.school_leaving_attainment
, b.ncea_equivalent
, b.ncea_1
, b.ncea_2
, b.ncea_3
, b.ue
, b.school_leaving_providerid
, b.enrol_school_leaving_year
, b.enrol_leave_reason
, b.enrol_post_school_activity
, b.enrol_providerid
, b.at_school
, b.combined_school_leaving_year
, b.combined_providerid
, b.tertiary_enrolment
, b.tertiary_providerid
, b.ito_enrolment
, b.ito_providerid
, b.any_tertiary_enrolment
, b.any_tertiary_providerid
, b.max_qual_tertiary
, b.income_benefit
, b.income_wages
, b.income_total
, b.any_wages
, b.any_benefit
, b.offender_any
, b.offender_violent
, b.victim_any
, b.victim_violent
, b.community_sentence
, b.custodial_sentence
, b.pho_enrolment
, b.ed_admissions_n
, b.gp_contacts_n
, b.ash_n
into #followup_matched_12_exact
from [IDI_Sandpit].[DL-MAA2021-60].[ae_202210_psm_age12_output_exact] a
inner join #followup_combined b
on a.snz_uid=b.snz_uid