-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimage_model.Rmd
executable file
·659 lines (519 loc) · 25.8 KB
/
image_model.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
---
title: "Image Level Analysis"
author: "Simon Vandekar"
date: "2/24/2021"
output: html_document
---
```{r setup, include=FALSE}
knitr::knit_hooks$set(GPs=function(before, options, envir){
if (before){
cex=1.5
par(mgp=c(1.7,.7,0), lwd=1.5, lend=2,
cex.lab=0.8*cex, cex.axis=0.8*cex, cex.main=1*cex,
mar=c(2.8,1.8,2,0), bty='l', oma=c(0,0,2,0))}
})
knitr::opts_chunk$set(echo = TRUE, fig.height = 5, fig.width = 5, GPs=TRUE, cache=TRUE, cache.lazy = FALSE)
library(raster)
library(png)
library(parallel)
library(class)
library(fda)
library(lme4)
library(lmerTest)
library(emmeans)
library(sjPlot)
set.seed(1234)
```
## Functions
```{r functions, echo=FALSE}
# need to check that this does the same thing as Moran function when NAs are present
# reference: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126158
moran = function (x, y=NULL, atlas, w = matrix(c(1, 1, 1, 1, 0, 1, 1, 1, 1), 3, 3))
{
if(is.null(y)) y = x
if(is.character(x)) x = raster(x)
if(is.character(y)) y = raster(y)
if(is.character(atlas)) atlas = raster(atlas)
z <- x - cellStats(x, mean)
z2 <- y - cellStats(y, mean)
wZiZj <- focal(z2, w = w, fun = "sum", na.rm = TRUE, pad = TRUE)
wZiZj <- wZiZj * z
wZiZj <- c(by(values(wZiZj), values(atlas), sum))
zsq <- sqrt(c(by(values(z^2), values(atlas), sum)))
# ineffecient if x=y
z2sq <- sqrt(c(by(values(z2^2), values(atlas), sum)))
n <- table(values(atlas)) - c(by(values(is.na(z) | is.na(z2)), values(atlas), sum)) # ncell(z) - cellStats(is.na(z) | is.na(z2), "sum")
values(z) = as.numeric(!is.na(values(z)) & !is.na(values(z2)))
W <- c(by(values(focal(z, w = w, fun = "sum", na.rm = TRUE, pad = TRUE)), values(atlas), sum)) #cellStats(focal(z, w = w, fun = "sum", na.rm = TRUE, pad = TRUE), sum)
NS0 <- n/W
mI <- NS0 * wZiZj/zsq/z2sq
return(mI)
}
# @param imgnames tif image names for markers used in the clustering
# @param centers kmeans centroid coordinates, with columns in the same order as imgnames
# @param weights Multiplicative weights applied in model fitting
# @param outname filename for output image
labelImage = function(imgnames, centers, weights, mask, maskThr=0, outname, transform = log10, offset=1/2){
imgs = stack(imgnames)
mask = raster(mask)
values(imgs)[ values(mask)<=maskThr ] = NA
#mins = apply(values(imgs), 2, function(v){min(v[v>0], na.rm=TRUE)})
#values(imgs) = sweep(values(imgs), 2, mins, FUN="+")
values(imgs) = sweep(values(imgs), 2, colMeans(values(imgs), na.rm=TRUE), FUN='/') + offset
imgout = calc(imgs, fun = function(vec){
nas = apply(is.na(vec), 1, any)
ans = rep(0, nrow(vec))
vec = transform(vec[!nas,])
if(nrow(vec)!=0){
ans[!nas] = as.numeric(as.character(c(knn1(centers, test=sweep(vec, 2, weights, '*'), cl=1:nrow(centers)) ) ))
}
return(ans)
}, filename=outname, overwrite=TRUE)
return(outname)
}
snr = function(imgname, mask, maskThr=0, outname, transform = log10, offset=1/2){
imgs = raster(imgname)
mask = raster(mask)
values(imgs)[ values(mask)<=maskThr ] = NA
mu = cellStats(imgs, 'mean')
sigma = cellStats(imgs, stat='sd')
#values(imgs) = transform(values(imgs)/mu+offset)
#snrTransform = cellStats(imgs, 'mean')/cellStats(imgs, 'sd')
c(mu=mu, sigma=sigma)
}
weighted.mean.fd = function (x, w, ...)
{
if (!inherits(x, "fd"))
stop("'x' is not of class 'fd'")
coef <- x$coefs
coefd <- dim(coef)
ndim <- length(coefd)
basisobj <- x$basis
nbasis <- basisobj$nbasis
dropind <- basisobj$dropind
ndropind <- length(dropind)
if (ndim == 2) {
coefmean <- matrix(apply(coef, 1, weighted.mean, w=w), nbasis - ndropind,
1)
coefnames <- list(dimnames(coef)[[1]], "Mean")
}
else {
nvar <- coefd[3]
coefmean <- array(0, c(coefd[1], 1, nvar))
for (j in 1:nvar) coefmean[, 1, j] <- apply(coef[, ,
j], 1, weighted.mean, w=w)
coefnames <- list(dimnames(coef)[[1]], "Mean", dimnames(coef)[[3]])
}
fdnames <- x$fdnames
fdnames[[2]] <- "mean"
fdnames[[3]] <- paste("mean", fdnames[[3]])
meanfd <- fd(coefmean, basisobj, fdnames)
meanfd
}
# Function to get subset of data from the images
#' @param imgs character vector, matrix, or data frame of images to load. Rows are slides/regions/subjects; columns are images.
#' @param masks character vector of masks defining where there is tissue in the image
#' @param maskThr numeric noninclusive lower threshold for mask
#' @param subsamp integer specifying the factor to subsample each dimension of the image.
#' @return A data frame with the subsample of imaging data for all of the images. Images are identified by the rownames of the imgs variable.
subsampImgs = function(imgs, masks, maskThr=0, subsamp=4, transform=log10, offset=1/2 ){
imgs = as.data.frame(imgs)
ss = 1:nrow(imgs)
imgnames = names(imgs)
# samples every 4th downsampled voxel from the DAPI
subsamp = rep(c(FALSE, TRUE), c(subsamp-1, 1))
locData = do.call(rbind, lapply(ss, function(rind){
message(rownames(imgs)[rind])
# get DAPI for this slideRegion
img = raster(masks[rind])
arrIndsImg = img
values(arrIndsImg) = outer(rep(subsamp, length.out=nrow(img) ), rep(subsamp, length.out=ncol(img) ), FUN = "&")
# indsImg selects the pixels that will be used to build the model
indsImg = (img>maskThr & arrIndsImg)
rm(arrIndsImg, img)
# for channel
names(imgnames) = imgnames
res = as.data.frame(do.call(cbind, lapply(imgnames, function(marker){
# gets row for this subject
img = raster(imgs[rind, marker])
# select target voxels
res = values(img)[ values(indsImg) ]
res = transform(res + offset) #/mean(res)
} )))
# set NAs
res[ , imgnames[which(!imgnames %in% names(res)) ] ] = NA
# reorder them to be the original
res = res[ ,imgnames]
# return result for this slideRegion
res[,'ID'] = rownames(imgs)[rind]
res
}) ) # combine them all into one data frame
}
#' Function to estimate warping functions for normalization
#' @param data data frame or matrix where rows are multiple observations from each subject
#' @param subjectID vector indicating subjectID assignment for data in data
#' @param nbasisHist integer dimension of basis to approximate the histogram
#' @param nbasisNorm integer dimension of basis for the normalization function
#' @param norderHist integer order of basis for approximating the histogram
#' @param norderNorm integer order of basis for the normalization function
#' @param len argument to density
#' @return A list of register.fd outputs, one for each channel
#' @import fda
fdaEstimate = function(data, subjectID,
nbasisHist = 20,
norderHist=4,
nbasisNorm=2,
norderNorm=2,
offset=0.0001,
transform = log10,
rang=transform(c(0,255)+offset),
len=512){
regDenss=lapply(colnames(data), function(var){
x = split(data[,var],subjectID) ## split data by slide
#rang = range(data[ which(data[,var]!=0),var]) ## get range of values != 0
#if(rang[1] < 0){rang[1] = 0}
argvals = seq(rang[1],rang[2],len=len) ## get evenly spaced values for computation later
w = sapply(x, function(y) sum(y>transform(offset)) )
## ---- density of var
densY = sapply(1:length(x), function(i){ ## calculate density for each nonzero value across slides
density(x[[i]][ which(x[[i]]>rang[1]) ],
from=rang[1],
to=rang[2],
n=len,
na.rm=TRUE)$y
})
## ---- setup basis functions
fdobj_basis = create.bspline.basis(rangeval = rang, norder = norderHist,nbasis=nbasisHist) ## create bspline basis with cubic splines (approx hist)
wbasis = create.bspline.basis(rangeval = rang, norder = norderNorm,nbasis=nbasisNorm) ## create bspline basis with linear (transform data)
Wfd0 <- fd(matrix(0,wbasis$nbasis,1),wbasis) ## setup `fda` object
WfdPar <- fdPar(Wfd0, Lfdobj = int2Lfd(0),lambda = 0) ## setup roughness for `fda` object
## ---- initial registration (warp functions)
fdobj <- smooth.basis(argvals, densY, fdobj_basis, fdnames = c("x", "samples", "density"))$fd ## estimates the densities using bsplines
#regDens = register.fd(yfd=fdobj, WfdParobj = WfdPar,dbglev = 0,crit=1) ## register densities using roughness penalties
## ---- reverse registration (inverse warp functions)
# y0s = mean.fd(fdobj) ## get mean curve from registration
y0s = weighted.mean.fd(fdobj, w=w)
y0s$coefs = do.call(cbind, rep(list(y0s$coefs), ncol(fdobj$coefs)))
## crit=2: broken in internal fda code
regDens = register.fd(y0fd = fdobj, yfd=y0s,WfdParobj = WfdPar, dbglev = 0,crit=1) ## register to get actual warping functions back to real data
colnames(regDens$warpfd$coefs) = names(x)
regDens
} )
names(regDenss) = colnames(data)
return(regDenss)
}
#' Function to normalize the imaging data
#' @param data data frame or matrix where rows are multiple observations from each subject
#' @param subjectID vector indicating subjectID assignment for data in data
#' @param nbasisHist integer dimension of basis to approximate the histogram
#' @param nbasisNorm integer dimension of basis for the normalization function
#' @param norderHist integer order of basis for approximating the histogram
#' @param norderNorm integer order of basis for the normalization function
#' @param len argument to density
#' @param ... optional named arguments passed to fdaEstimate
#' @return A list of register.fd outputs, one for each channel
fdaNormalize = function(data, dataSubjectID, imgs, imgSubjectID, outimgs, masks, maskThr=0, nit=1, transform=log10, untransform=function(y) 10^(y), offset=0.0001, ...){
nchannel = ncol(data)
nsubjects = nrow(imgs)
if(length(imgSubjectID)!=nrow(imgs) | nchannel!=ncol(imgs))
stop('number of subjects and channels in imgs must match subjectID and data.')
for(it in 1:nit){
regFuncs = fdaEstimate(data, dataSubjectID, offset=offset, ...)
normed = do.call(cbind, lapply(1:nchannel, function(var){
x = split(data[,var], dataSubjectID)
regDens = regFuncs[[var]]
# apply registration to the data
xp = lapply(1:length(x), ## register each slide using inverse warp functions
function(ind){
out = rep(transform(offset), length(x[[ind]]))
out[ x[[ind]]>transform(offset) ] = eval.fd(x[[ind]][x[[ind]]>transform(offset)], regDens$warpfd[ind]);
out})
out = data.frame(unlist(xp))
names(out) = colnames(data)[var]
out}
) )
# apply registration to the images
capture.output(lapply(1:nchannel, function(var){
regDens = regFuncs[[var]]
subjectIDs = colnames(regDens$warpfd$coefs)
# apply registration to the images
xp = lapply(1:nrow(imgs), ## register each slide using inverse warp functions
function(ind, regDense){
IDind = which(subjectIDs %in% imgSubjectID[ind])
img = raster(imgs[ind,var])
outimg = outimgs[ind, var]
mask = raster(masks[ind])
values(img)[ values(mask)<=maskThr ] = NA
#values(img)[!is.na(values(img)) & values(img)>0] = eval.fd(transform(values(img)[!is.na(values(img)) & values(img)>0]), regDense$warpfd[ind])
#writeRaster(img, filename=outimg)
imgout = calc(img,
# applies the warping function to the image in chunks
fun = function(vec){
nas = is.na(vec) | vec==0
ans = rep(0, length(vec))
vec = transform(vec[!nas]+ offset)
if(length(vec)!=0){
ans[!nas] = untransform(eval.fd(vec, regDense$warpfd[IDind]))-offset
ans[!nas & ans<0] = 0
}
return(ans)
}, filename=outimg, overwrite=TRUE)
}, regDense=regDens)
}
))
# update variables
imgs = outimgs
data = normed
}
return(list(normedData = normed, normedImages=outimgs))
}
```
## Overview
```{r dataSetup, echo=FALSE}
allMarkers = c("DAPI", "VIMENTIN","SMA","CD3D","CD4","CD8","CD68","CD11B","PANCK","NAKATPASE", 'COLLAGEN', 'PCNA', 'SOX9')
locationMarkers = c("DAPI", "VIMENTIN","SMA","PANCK","NAKATPASE", 'PCNA', 'SOX9')
immuneMarkers = c("CD3D","CD4","CD8","CD68","CD11B")
devtools::install_github('statimagcoll/atlasAnalysis')
sc = atlasAnalysis::loadQuantifiedPCA()
sr = atlasAnalysis::loadImagingData(unique(sc$Slide_Region), allMarkers)
# add tissue category
sr$broadTissue = factor(sc$broadTissue[ match(sr$slide_region, sc$Slide_Region)])
# name for downsampled images
sr[,paste(allMarkers, 'd', sep='_')] = apply(sr[,allMarkers], 2, function(x) gsub('\\.jpg$', '_downsampled.tif', x) )
# name for normalized images
sr[,paste(allMarkers, 'dn', sep='_')] = apply(sr[,allMarkers], 2, function(x) gsub('\\.jpg$', '_downsampled_normed.tif', x) )
# name for normalized images
nslocationMarkers = paste(allMarkers, 'dns', sep='_')
sr[,nslocationMarkers] = apply(sr[,allMarkers], 2, function(x) gsub('\\.jpg$', '_downsampled_normed_smoothed.tif', x) )
```
### Markers
We will use the markers `r paste(locationMarkers, collapse=', ')`, as input to the NMF to define tissue classes and use the immune markers, `r paste(immuneMarkers, collapse=', ')`, to model infiltration into different compartments of the tissue.
## Image processing
```{r preprocessParameters, eval=TRUE}
# This is the percentage of pixels to retain in downsampling
downsamplePerc=12.5
# This is the threshold to define a mask from the DAPI image
dapiThr=2
# this is the sd of the Gaussian smooth in pixel units of the downsampled image
sigma=5
```
Here, we apply image processing steps using c3d to all of file directories. This includes downsampling by a factor of `r downsamplePerc`, masking, and normalization by mean division. The mask is created crudely from the dapi image by thresholding it at `r dapiThr`.
```{r preprocess, eval=FALSE}
dnsImgs = grep('_dns$', names(sr), value=TRUE)
mclapply(1:nrow(sr), function(ind){
if(!all(file.exists(unlist(sr[ind,dnsImgs])))){
message('processing directory', dirname(sr$DAPI[ind]))
system(paste('/media/disk2/atlas_mxif/atlasAnalysis/preprocess.sh', dirname(sr$DAPI[ind]), downsamplePerc, dapiThr, sigma)) # , ignore.stdout=TRUE, ignore.stderr = TRUE
}
}, mc.cores = 30)
```
## Compute SNR
```{r, eval=FALSE}
for(marker in allMarkers[-1]){
result = t(mcmapply(snr, sr[,marker], mask=sr[,'DAPI'], maskThr=5, mc.cores=4))
colnames(result) = paste(marker, colnames(result), sep="_")
sr[,colnames(result)] = result
}
```
## Spatial Clusters
### Kmeans
```{r kmeansParameters, eval=TRUE}
# output file for the kmeans clusters
sr$locationLabel = gsub('DAPI', 'location_label', sr[,'DAPI_d'])
sr$locationLabelUntransformed = gsub('DAPI', 'location_label_untransformed', sr[,'DAPI_d'])
# parameters for k-means
fact = 8
offset=1/2
dapiThr = 2
```
```{r kmeans, eval=FALSE}
# extract the smooth un-normalized data from the images
kMeansMarkers = slocationMarkers
invisible(kmeansData <- subsampImgs(sr[ss, kMeansMarkers], sr[ss, 'mask'], maskThr = dapiThr, subsamp = fact))
set.seed(1234)
# fit the kmeans model
X = kmeansData[,kMeansMarkers]
X = apply(X, 2, function(x) ifelse(is.na(x), min(x, na.rm=TRUE), x))
sds = apply(X, 2, sd)
X = sweep(X, 2, sds, '/')
kmeansMod = kmeans(X, k, iter.max = 200, algorithm = 'MacQueen')
### ASSIGN LABELS
outimgs = sapply(ss, function(rind){ message(sr$slideRegion[rind]); labelImage(unlist(sr[rind,kMeansMarkers]), centers=kmeansMod$centers[,kMeansMarkers], weights=1/sds, mask=sr[rind, 'mask'], maskThr=dapiThr, outname=sr[rind,'locationLabel']) } )
```
```{r kmeansUntransformed, eval=FALSE}
# extract the smooth un-normalized data from the images
kmeansUntransformedData = subsampImgs(sr[ss, kMeansMarkers], sr[ss, 'mask'], maskThr = dapiThr, subsamp = fact, offset=0, transform = function(x) x)
set.seed(1234)
# fit the kmeans model
X = kmeansUntransformedData[,kMeansMarkers]
X = apply(X, 2, function(x) ifelse(is.na(x), min(x, na.rm=TRUE), x))
sds = apply(X, 2, sd)
X = sweep(X, 2, sds, '/')
kmeansMod = kmeans(X, k, iter.max = 200, algorithm = 'MacQueen')
### ASSIGN LABELS
outimgs = sapply(ss, function(rind){ message(sr$slideRegion[rind]); labelImage(unlist(sr[rind,kMeansMarkers]), centers=kmeansMod$centers[,kMeansMarkers], weights=1/sds, mask=sr[rind, 'mask'], maskThr=dapiThr, offset=0, transform=function(x) x, outname=sr[rind,'locationLabelUntransformed']) } )
```
```{r kmeansNormed, cache=TRUE, message=FALSE}
# extract the smoothed normalized data from the images
kMeansNormedMarkers = nslocationMarkers
ss = which(apply(apply(sr[,kMeansNormedMarkers], 2, file.exists), 1, all))
kmeansNormedData = subsampImgs(sr[ss, kMeansNormedMarkers], sr[ss, 'DAPI_d'], maskThr = dapiThr, subsamp = fact)
set.seed(1234)
# fit the kmeans model to the normalized data
Xnormed = kmeansNormedData[,kMeansNormedMarkers]
Xnormed = apply(Xnormed, 2, function(x) ifelse(is.na(x), min(x, na.rm=TRUE), x))
sdsnormed = apply(Xnormed, 2, sd)
Xnormed = sweep(Xnormed, 2, sdsnormed, '/')
elbow = data.frame(ks = 4:15, SS=NA)
for(k in elbow$ks){
kmeansFit = kmeans(Xnormed, k, iter.max = 200, algorithm = 'MacQueen')
elbow$SS[elbow$ks==k] = kmeansFit$tot.withinss
}
# k = 5, 7, or 13?
plot(elbow, type='b')
#kmedMod = pam(log10(locData[,kMeansMarkers]+1), 10)
k=5
kmeansModNormed = kmeans(Xnormed, k, iter.max = 200, algorithm = 'MacQueen')
```
```{r assignLabels}
sr$locationLabelNormed = gsub('DAPI', 'location_label_normed', sr[,'DAPI_d'])
outimgs = sapply(ss, function(rind){ message(sr$slideRegion[rind]); labelImage(unlist(sr[rind,kMeansNormedMarkers]), centers=kmeansModNormed$centers[,kMeansNormedMarkers], weights=1/sdsnormed, mask=sr[rind, 'DAPI_d'], maskThr=dapiThr, outname=sr[rind,'locationLabelNormed']) } )
tab = sweep(kmeansModNormed$centers, MARGIN = 2, STATS = sdsnormed, FUN = '*')
knitr::kable(round(tab, 2))
```
```{r, fig.width=7, fig.height=7, results='asis'}
ctab <- sample(rainbow(256))
ctab[1:13] = c('#000000', RColorBrewer::brewer.pal(12, 'Set3'))
for(img in sr$locationLabelNormed[ss]){
cat('\n####', basename(dirname(img)), '\n')
ras = raster(img)
plot(ras, col=ctab[1:(k+1)],
axes = FALSE,
main = basename(dirname(img)), bty='n')
#plot(ras, main=basename(dirname(img)), mar=c(0,0,1,0), bty='l', oma=c(0,0,0,0),mgp=c(0,.7,0))
}
```
## Quantification
### Compute proportion of each tissue component
Quantification of proportions of pixels within each tissue compartment.
```{r proportionQuantification}
vars = c('locationLabel')
# need to be resorted to match between the two normalization methods
for(var in vars){
propCols = paste0(var, 0:k, '_proportion')
pixelCols = paste0(var, 0:k, '_pixel')
logitCols = gsub('proportion', 'logit', propCols)
sr[,propCols] = NA
sr[,pixelCols] = NA
sr[,logitCols] = NA
# Further break these proportions by tumor region or not
sr[ss,pixelCols] = do.call(rbind, mapply(function(x, mask) table(factor(values(raster(x) * raster(mask)), 0:k) ) + 1/2, sr[ss,var], sr[ss, 'tumorMask'], SIMPLIFY = FALSE))
sr$npixel = rowSums(sr[,pixelCols[-1]])
sr[,propCols[-1]] = sweep(sr[,pixelCols[-1]], 1, sr$npixel, '/')
sr[,logitCols[-1]] = log( sr[, propCols[-1]]/(sr[,propCols[2]]) )
### PROPORTION OF CD3D POSITIVE IN EACH TISSUE
cd3pixelCols = paste0('CD3D', 0:k, '_pixel')
cd3propCols = paste0('CD3D', 0:k, '_proportion')
cd3logitCols = gsub('proportion', 'logit', cd3propCols)
sr[,cd3propCols] = NA
sr[,cd3pixelCols] = NA
sr[,cd3logitCols] = NA
sr[ss,cd3pixelCols] = do.call(rbind, mapply(function(x, mask, cd3) table(factor(values(raster(x) * raster(mask) * raster(cd3)), 0:k) ) + 1/2, sr[ss,var], sr[ss, 'tumorMask'], sr[ss,'CD3Dpos'], SIMPLIFY = FALSE))
# CD3 positive pixels dividing by total number of pixels
sr[,cd3propCols[-1]] = sr[,cd3pixelCols[-1]]/ sr[,pixelCols[-1]]
sr[,cd3logitCols[-1]] = log( sr[, cd3propCols[-1]]/(1-sr[, cd3propCols[-1]]) )
}
```
### Moran's index
Quantification of Moran's index for different immune marker types. Moran's index quantifies spatial heterogeneity and takes values in $$[-1,1]$$. Regions where a marker aggregates close to itself will have larger values, where as if the marker is dispersed throughout the tissue (but still variable), then it will have smaller values.
```{r, moranQuantification}
vars = 'CD3Dpos' # immunePosMarkers
# weights for computing Moran's index
# units are in original pixel coordinates
# one cell is about XX pixels across
weight = focalWeight(raster(sr$DAPI[1]), 8*12)
size = dim(weight)[1]
middle = ceiling(size/2)
# sets middle to zeros
weight[rowSums((apply(expand.grid(1:size, 1:size), 2, as.numeric)-middle)^2) < (5/2)^2] = 0
weight = weight/max(weight)
#moran(sr$CD8[1], atlas=sr$locationLabel[1], w = weight)
# need to be resorted to match between the two normalization methods
# x=sr[9,var]; atlas = sr[9,'locationLabel']; tumorMask=sr[9, 'tumorMask']; w=weight
# for(var in vars){
# moranCols = c(paste0(var, '_moran', 0:k), paste0(var, '_moranTotal'))
# sr[,moranCols] = NA
# # column zero
# morans = mapply(function(x, atlas, tumorMask, w){
# atlas = raster(atlas) * raster(tumorMask)
# compartments = moran(x, atlas=atlas)
# total = moran(x, atlas=tumorMask)
# out = c(compartments, total[2])
# }, sr[ss,var], atlas=sr$locationLabel[ss], tumorMask=sr[ss,'tumorMask'], MoreArgs = list(w=weight))
# # check if all the output is there
# failed = sapply(morans, length)
# correctn = max(failed)
# failed = which(failed<correctn)
# morans[failed] = list(rep(NA, correctn))
# sr[ss,moranCols] = do.call(rbind, morans)
# }
for(var in vars){
moranCols = c(paste0(var, '_moran_', c('epi', 'str')), paste0(var, '_moranTotal'))
sr[,moranCols] = NA
# column zero
# x=sr[2,var]; epi=sr$epiMask[2]; str=sr$strMask[2]; tumorMask=sr[2,'tumorMask']
morans = mapply(function(x, epi, str, tumorMask, w){
cat(x)
epi = raster(epi) * raster(tumorMask)
epi = moran(x, atlas=epi)[2]
str = raster(str) * raster(tumorMask)
str = moran(x, atlas=str)[2]
total = moran(x, atlas=tumorMask)
out = c(epi, str, total[2])
}, x=sr[ss,var], epi=sr$epiMask[ss], str=sr$strMask[ss], tumorMask=sr[ss,'tumorMask'], MoreArgs = list(w=weight))
sr[ss,moranCols] = t(morans)
}
```
### Moran's cross correlation
Moran's cross-correlation (https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0126158) quantifies the spatial association between two markers. If one marker aggregates around the other, then the spatial association is high, where as if they repel each other then the spatial association is low.
Like Moran's index, this does not consider a small window at the center of the weight function, so does not correspond to co-expression.
## Models
Outcomes:
1. Proportion of pixels within each tissue class. Did certain tissue classes differ between tumor types?
2. Proportion of CD8 positive pixels within each tissue class.
3. Spatial correlation of immune markers within each tissue class. Do immune cells clump together more in particular tissues?
4. Spatial correlation between immune and other markers. Do immune markers clump around particular types of cells?
```{r, fig.width=15, result='asis'}
srl = reshape(sr, direction='long', varying=c(list(propCols, pixelCols, logitCols, cd3logitCols)), v.names=c('prop', 'pixel', 'logit', 'cd3logit'), ids=sr$slideRegion, timevar='tissueClass', times=0:k)
# fit the model
srlss = srl[!is.na(srl$cd3logit) & !is.infinite(srl$cd3logit) & !is.na(srl$adenSSLcrc),]
srlss$tissueClass = factor(srlss$tissueClass, levels = 1:4, labels = c('Unspecified', 'Boundary', 'Epithelium', 'Stroma') )
srlss$adenSSLcrc = factor(srlss$adenSSLcrc)
srlss = srlss[order(srlss$Slide),]
#testModelAll = geepack::geeglm(logit ~ adenSSLcrc * tissueClass, data=srlss, corstr='exchangeable', id=as.factor(paste0(srlss$Slide)))
testModelAll = lmer(cd3logit ~ adenSSLcrc * tissueClass + (1|Slide), data=srlss)
print(sjPlot::tab_model(testModelAll))
# Things that are interesting here... Within each tissue class, is there an effect of adenSSLcrc, Is there an effect in tumor regions, but not adjacent normal regions?
boxplot(cd3logit ~ adenSSLcrc * tissueClass ,data=srlss)
as.data.frame(summary(pairs(emmeans(testModelAll, ~adenSSLcrc | tissueClass, type='response'))))
image(raster(sr$tumorMask[which.min(sr$CD3Dpos_moranTotal)]), col=gray.colors(12))
```
```{r, fig.width=15, result='asis'}
morans = lapply(paste0(vars, '_moran_'), grep, x=names(sr), value=TRUE)
srl = reshape(sr, direction='long', varying=c(morans), v.names=c(paste0(vars, '_moran')), ids=sr$slideRegion, timevar='tissueClass', times=c('epi', 'stroma'))
sr = sr[order(sr$Slide),]
srss = sr[!is.na(sr$adenSSLcrc) & !is.na(sr$CD3Dpos_moranTotal),]
testModel = geepack::geeglm(CD3Dpos_moranTotal ~ adenSSLcrc, data=srss, corstr='exchangeable', id=as.factor(paste0(srss$Slide)))
plot(as.factor(srss$adenSSLcrc), srss$CD3Dpos_moranTotal)
# fit the model
srlss = srl[!is.na(srl$CD3Dpos_moran) & !is.na(srl$adenSSLcrc),]
srlss$tissueClass = as.factor(srlss$tissueClass)
srlss$adenSSLcrc = factor(srlss$adenSSLcrc)
srlss = srlss[order(srlss$Slide),]
testModelAll = lme4::lmer(atanh(CD3Dpos_moran) ~ adenSSLcrc * tissueClass + (1|Slide), data=srlss)
# Things that are interesting here... Within each tissue class, is there an effect of adenSSLcrc, Is there an effect in tumor regions, but not adjacent normal regions?
boxplot(atanh(CD3Dpos_moran) ~ adenSSLcrc * tissueClass ,data=srlss)
library(emmeans)
as.data.frame(summary(pairs(emmeans(testModelAll, ~adenSSLcrc | tissueClass, type='response'))))
image(raster(sr$tumorMask[which.min(sr$CD3Dpos_moranTotal)]), col=gray.colors(12))
```