-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f4e3b64
commit 88cd9f5
Showing
5 changed files
with
232 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
%choose a marker and datatable to compute N/N_eff, as well as predict | ||
%N/N_eff using the exponential correlation function | ||
|
||
marker = 'Keratinp'; | ||
data = dataTNP097; | ||
|
||
xydata = data{:,{'Xt','Yt'}}; | ||
marker_val = data{:,marker}; %if marker is continuous, use apply log(x+1) | ||
|
||
%% | ||
tic | ||
[corrfunc,radius,c_0,L] = Fig2_autocorrelation(xydata,marker_val); | ||
parms = [c_0,-1/L]; | ||
[rand_means,tma_means,N,N_eff] = tma_compare(data,marker,parms); | ||
Nred_obs = (std(tma_means)/std(rand_means))^2; | ||
Nred_pred = N/N_eff; | ||
toc | ||
text(mean(xlim),mean(ylim),{[marker ' N_{eff}/N = ' num2str(Nred_obs)]; ['Predicted N_{eff}/N = ' num2str(Nred_pred)]}) | ||
|
||
|
||
function [rand_means,tma_means,N,N_eff] = tma_compare(data,marker,expparms) | ||
trials = 10^3; %number of TMA cores to estimate N_eff empirically | ||
tma_means = zeros(trials,1); | ||
rand_means = zeros(trials,1); | ||
n_size = zeros(trials,1); | ||
eff_sample_size = zeros(trials,1); | ||
|
||
for i = 1:trials | ||
subset_id = tma_select(data); | ||
n_size(i) = sum(subset_id); | ||
rand_id = subset_id(randperm(size(subset_id,1))); | ||
tma_means(i) = mean(data{(subset_id),marker}); | ||
rand_means(i) = mean(data{(rand_id),marker}); | ||
|
||
distmat = squareform(pdist(data{subset_id,{'Xt','Yt'}})); | ||
corrmat = expparms(1)*exp(expparms(2)*distmat); | ||
corrmat(logical(eye(n_size(i)))) = 1; | ||
eff_sample_size(i) = n_size(i)^2/sum(corrmat(:)); | ||
end | ||
N = mean(n_size); | ||
N_eff = mean(eff_sample_size); | ||
end | ||
|
||
function subset_id = tma_select(data) | ||
radius = 500; %vTMA radius | ||
center_id = randi(size(data,1)); | ||
center_xy = data{center_id,{'Xt','Yt'}}; | ||
subset_id = pdist2(data{:,{'Xt','Yt'}},center_xy)<radius; | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
%give the xy-positions of cells as "xydata", and the variable of interest | ||
%(e.g. marker intensity, or binary state of cell-type calls) as | ||
%"marker_val" | ||
|
||
function [corrfunc, radius, c_0, L] = Fig2_autocorrelation(xydata, marker_val) | ||
kmax = 300; | ||
[corrfunc,radius] = corr_knn(xydata,zscore(marker_val),zscore(marker_val),kmax); | ||
|
||
fit_start_rad = 5; | ||
fit_end_rad = 200; | ||
|
||
expfit = fit(radius(fit_start_rad:fit_end_rad)',corrfunc(fit_start_rad:fit_end_rad)'/corrfunc(1),'exp1'); | ||
parms = coeffvalues(expfit); | ||
c_0 = parms(1); | ||
L = -1./parms(2); | ||
|
||
figure() | ||
plot(radius,corrfunc); hold on | ||
fplot(@(r) c_0*exp(-r/L), [radius(5),radius(200)],'--') | ||
legend('Empirical', 'Exponential Fit') | ||
xlabel('Distance') | ||
ylabel('Correlation') | ||
title('Autocorrelation function of chosen marker') | ||
end | ||
|
||
|
||
function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val) | ||
|
||
[idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree'); | ||
rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance | ||
|
||
magdots = magvalA(idx).*magvalB; | ||
corrfunc = mean(magdots); | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
%give the xy-positions of cells as "xydata", and the variable of interest | ||
%(e.g. marker intensity, or binary state of cell-type calls) as | ||
%"marker_val" | ||
|
||
function [cross_corrfunc, radius] = Fig2_crosscorrelation(xydata, marker_val_A, marker_val_B) | ||
kmax = 200; | ||
[cross_corrfunc,radius] = corr_knn(xydata,zscore(marker_val_A),zscore(marker_val_B),kmax); | ||
|
||
figure() | ||
plot(sqrt(1:kmax),cross_corrfunc); hold on | ||
xlabel('Nearest Neighbor index'); | ||
set(gca,'xtick',1:floor(sqrt(kmax))); set(gca,'xticklabel', [1:floor(sqrt(kmax))].^2); | ||
ylabel('Correlation') | ||
title('Cross-correlation function of two markers') | ||
end | ||
|
||
|
||
function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val) | ||
|
||
[idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree'); | ||
rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance | ||
|
||
magdots = magvalA(idx).*magvalB; | ||
corrfunc = mean(magdots); | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
%load matlab-TNP_P2reg-20200802.mat | ||
|
||
manual_prior = 1/6*ones(1,6); %[1/4, 1/12, 1/12, 1/12, 1/4, 1/4]; %equal priors on the 4 Keratin+ categories | ||
logdnacut = 8; %manual gate on Hoechst | ||
tum_mark_44 = 'Keratin'; tum_mark_45 = 'Cy3_3'; tum_mark_46 = 'FITC_4'; tum_mark_47 = 'FITC_9'; | ||
|
||
totalcycle = [10,14,14,14]; | ||
lastcycle = [10,11,12,14]; %last cycle to consider; additional cycles lose too many cells | ||
tum_marks = {tum_mark_44,tum_mark_45,tum_mark_46,tum_mark_47}; %best tumor marker in panel | ||
pathology_KNNclassifiers = cell(4,1); %the KNN-classifiers for each section | ||
tumorids = cell(4,1); | ||
nullids = cell(4,1); | ||
labelscores = cell(4,1); | ||
suffices = {'TNP044','MAL_45reg','MAL_46reg','MAL_47reg'}; | ||
|
||
man_mark_44 = [11 14 18:21 23 24 28 29 31 37 38]; %[11 13:20 21 23:30 31 33:40]; | ||
man_mark_45 = [16,18:25,30:39,44:53]; | ||
man_mark_46 = [18:20 23 30:31 33:34 36 46:48 54]; %[16:26,30:31,33:40,44:54]; | ||
man_mark_47 = [16:24,27:29,32:55]; | ||
|
||
man_markers = {man_mark_44,man_mark_45,man_mark_46,man_mark_47}; | ||
|
||
|
||
for i = 1:4 | ||
data = eval(['data' suffices{i}]); | ||
nullids{i} = any(log(data{:,1:lastcycle(i)})<logdnacut,2); | ||
tempgmm = fitgmdist(log(data{:,tum_marks{i}}),2); | ||
[~,tumcomp] = max(tempgmm.mu); | ||
tumorids{i} = cluster(tempgmm,log(data{:,tum_marks{i}}))==tumcomp; | ||
markers = [totalcycle(i)+(1:lastcycle(i)),2*totalcycle(i)+(1:lastcycle(i)),3*totalcycle(i)+(1:lastcycle(i))]; | ||
|
||
selectid = tumorids{i}&~nullids{i}; | ||
|
||
tumordata = data(selectid,man_markers{i}); | ||
tumorROI = data.ROI(selectid); | ||
|
||
splitid = rand(numel(tumorROI),1)<0.5; %splits labeled cells into training and valid | ||
labeled_id = tumorROI~=0; | ||
|
||
traindata = log(tumordata{labeled_id&splitid,:}); | ||
trainlabels = tumorROI(labeled_id&splitid,:); | ||
|
||
validdata = log(tumordata{labeled_id&~splitid,:}); | ||
validlabels{i} = tumorROI(labeled_id&~splitid,:); | ||
|
||
tic | ||
pathology_KNNclassifiers{i} = fitcknn(traindata,trainlabels,'NumNeighbors',40,'Prior',manual_prior); | ||
[~,validscores{i},~] = predict(pathology_KNNclassifiers{i},validdata); | ||
[~,labelscores{i},~] = predict(pathology_KNNclassifiers{i},log(tumordata{:,:})); | ||
toc | ||
|
||
classvec = labelscores{i}*[1 0 0 0 0 0 ;0 1 1 1 0 0 ; 0 0 0 0 1 0 ; 0 0 0 0 0 1]'; | ||
end | ||
|
||
|
||
%% | ||
i = 4; %choose which section's results to show | ||
selectid = tumorids{i}&~nullids{i}; | ||
class_prob_id = 2; %choose which kNN-class' probability to display | ||
|
||
entropy_vals = shannon_entropy(colorvec); | ||
|
||
figure() | ||
scatter(data.Xt(~tumorids{i}),-data.Yt(~tumorids{i}),1,0.5*[1,1,1],'filled') | ||
hold on | ||
scatter(data.Xt(selectid),-data.Yt(selectid),1,colorvec(:,class_prob_id)','filled') | ||
colormap(parula) | ||
title(['kNN-postprob for class ' num2str(class_prob_id) ' of ' suffices{i}]) | ||
|
||
figure() | ||
scatter(data.Xt(selectid),-data.Yt(selectid),1,entropy_vals,'filled') | ||
colormap(bluewhitered) | ||
set(gca,'Color',[0.5 0.5 0.5]) | ||
title(['kNN-classification entropy of ' suffices{i}]) | ||
|
||
function shan_entropy = shannon_entropy(probs) | ||
individual_terms = probs.*(log2(probs)); | ||
nanterms = isnan(individual_terms); | ||
individual_terms(nanterms) = 0; | ||
shan_entropy = -sum(individual_terms,2); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
% choose a datatable representing a tissue section | ||
data = dataTNP097; | ||
xypoints = data{:,{'Xt','Yt'}}; | ||
tic | ||
adjmat = delaunaygraph(xypoints,50); | ||
toc | ||
|
||
% define the subset of cells (e.g. tumor) that you want to compute adjacency for | ||
boxoutid = data.Yt<10^4 & data.Xt<10^4; | ||
tumor_id = data.Keratinp & data.ROI~=1 & ~boxoutid; %remove normal epithelial (ROI=1) and abnormal corner of tissue | ||
|
||
% compute the connected components (i.e. Delaunay clusters) of the cell-type | ||
[bins,binsizes] = conncomp(graph(adjmat(tumor_id,tumor_id))); | ||
|
||
|
||
figure('units','normalized','outerposition',[0 0 1 1]) | ||
scatter(xypoints(~tumor_id,1),-xypoints(~tumor_id,2),0.5,[0.3 0.3 0.3],'filled') | ||
hold on | ||
scatter(xypoints(tumor_id,1),-xypoints(tumor_id,2),3,-log2(binsizes(bins)),'filled') | ||
colormap(hsv) | ||
caxis([-18 0]) | ||
set(gca,'Color','k') | ||
colorbar(); title('Keratin+ cells colored by Log2-Cluster-size') | ||
daspect([1 1 1]) | ||
|
||
%% | ||
function adjmat = delaunaygraph(xypoints,cutoff) | ||
%compute adjacency matrix of points based on Delaunay triangulation | ||
|
||
tri = delaunay(xypoints(:,1),xypoints(:,2)); | ||
trilist = tri(:); | ||
trinext = reshape(tri(:,[2 3 1]),size(trilist)); | ||
tridist = sqrt(sum((xypoints(trilist,:) - xypoints(trinext,:)).^2,2)); | ||
g = digraph(trilist,trinext,tridist); | ||
A = adjacency(g,'weighted'); | ||
A = (A + A')/2; | ||
A(A>cutoff)=0; | ||
|
||
adjmat = A; | ||
end |