Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
shuwang543 authored Oct 4, 2022
1 parent f4e3b64 commit 88cd9f5
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 0 deletions.
50 changes: 50 additions & 0 deletions Fig2G_predic_Neff.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
%choose a marker and datatable to compute N/N_eff, as well as predict
%N/N_eff using the exponential correlation function

marker = 'Keratinp';
data = dataTNP097;

xydata = data{:,{'Xt','Yt'}};
marker_val = data{:,marker}; %if marker is continuous, use apply log(x+1)

%%
tic
[corrfunc,radius,c_0,L] = Fig2_autocorrelation(xydata,marker_val);
parms = [c_0,-1/L];
[rand_means,tma_means,N,N_eff] = tma_compare(data,marker,parms);
Nred_obs = (std(tma_means)/std(rand_means))^2;
Nred_pred = N/N_eff;
toc
text(mean(xlim),mean(ylim),{[marker ' N_{eff}/N = ' num2str(Nred_obs)]; ['Predicted N_{eff}/N = ' num2str(Nred_pred)]})


function [rand_means,tma_means,N,N_eff] = tma_compare(data,marker,expparms)
trials = 10^3; %number of TMA cores to estimate N_eff empirically
tma_means = zeros(trials,1);
rand_means = zeros(trials,1);
n_size = zeros(trials,1);
eff_sample_size = zeros(trials,1);

for i = 1:trials
subset_id = tma_select(data);
n_size(i) = sum(subset_id);
rand_id = subset_id(randperm(size(subset_id,1)));
tma_means(i) = mean(data{(subset_id),marker});
rand_means(i) = mean(data{(rand_id),marker});

distmat = squareform(pdist(data{subset_id,{'Xt','Yt'}}));
corrmat = expparms(1)*exp(expparms(2)*distmat);
corrmat(logical(eye(n_size(i)))) = 1;
eff_sample_size(i) = n_size(i)^2/sum(corrmat(:));
end
N = mean(n_size);
N_eff = mean(eff_sample_size);
end

function subset_id = tma_select(data)
radius = 500; %vTMA radius
center_id = randi(size(data,1));
center_xy = data{center_id,{'Xt','Yt'}};
subset_id = pdist2(data{:,{'Xt','Yt'}},center_xy)<radius;
end

35 changes: 35 additions & 0 deletions Fig2_autocorrelation.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
%give the xy-positions of cells as "xydata", and the variable of interest
%(e.g. marker intensity, or binary state of cell-type calls) as
%"marker_val"

function [corrfunc, radius, c_0, L] = Fig2_autocorrelation(xydata, marker_val)
kmax = 300;
[corrfunc,radius] = corr_knn(xydata,zscore(marker_val),zscore(marker_val),kmax);

fit_start_rad = 5;
fit_end_rad = 200;

expfit = fit(radius(fit_start_rad:fit_end_rad)',corrfunc(fit_start_rad:fit_end_rad)'/corrfunc(1),'exp1');
parms = coeffvalues(expfit);
c_0 = parms(1);
L = -1./parms(2);

figure()
plot(radius,corrfunc); hold on
fplot(@(r) c_0*exp(-r/L), [radius(5),radius(200)],'--')
legend('Empirical', 'Exponential Fit')
xlabel('Distance')
ylabel('Correlation')
title('Autocorrelation function of chosen marker')
end


function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val)

[idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree');
rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance

magdots = magvalA(idx).*magvalB;
corrfunc = mean(magdots);
end

26 changes: 26 additions & 0 deletions Fig2_crosscorrelation.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
%give the xy-positions of cells as "xydata", and the variable of interest
%(e.g. marker intensity, or binary state of cell-type calls) as
%"marker_val"

function [cross_corrfunc, radius] = Fig2_crosscorrelation(xydata, marker_val_A, marker_val_B)
kmax = 200;
[cross_corrfunc,radius] = corr_knn(xydata,zscore(marker_val_A),zscore(marker_val_B),kmax);

figure()
plot(sqrt(1:kmax),cross_corrfunc); hold on
xlabel('Nearest Neighbor index');
set(gca,'xtick',1:floor(sqrt(kmax))); set(gca,'xticklabel', [1:floor(sqrt(kmax))].^2);
ylabel('Correlation')
title('Cross-correlation function of two markers')
end


function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val)

[idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree');
rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance

magdots = magvalA(idx).*magvalB;
corrfunc = mean(magdots);
end

81 changes: 81 additions & 0 deletions Fig3_kNN_classification.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
%load matlab-TNP_P2reg-20200802.mat

manual_prior = 1/6*ones(1,6); %[1/4, 1/12, 1/12, 1/12, 1/4, 1/4]; %equal priors on the 4 Keratin+ categories
logdnacut = 8; %manual gate on Hoechst
tum_mark_44 = 'Keratin'; tum_mark_45 = 'Cy3_3'; tum_mark_46 = 'FITC_4'; tum_mark_47 = 'FITC_9';

totalcycle = [10,14,14,14];
lastcycle = [10,11,12,14]; %last cycle to consider; additional cycles lose too many cells
tum_marks = {tum_mark_44,tum_mark_45,tum_mark_46,tum_mark_47}; %best tumor marker in panel
pathology_KNNclassifiers = cell(4,1); %the KNN-classifiers for each section
tumorids = cell(4,1);
nullids = cell(4,1);
labelscores = cell(4,1);
suffices = {'TNP044','MAL_45reg','MAL_46reg','MAL_47reg'};

man_mark_44 = [11 14 18:21 23 24 28 29 31 37 38]; %[11 13:20 21 23:30 31 33:40];
man_mark_45 = [16,18:25,30:39,44:53];
man_mark_46 = [18:20 23 30:31 33:34 36 46:48 54]; %[16:26,30:31,33:40,44:54];
man_mark_47 = [16:24,27:29,32:55];

man_markers = {man_mark_44,man_mark_45,man_mark_46,man_mark_47};


for i = 1:4
data = eval(['data' suffices{i}]);
nullids{i} = any(log(data{:,1:lastcycle(i)})<logdnacut,2);
tempgmm = fitgmdist(log(data{:,tum_marks{i}}),2);
[~,tumcomp] = max(tempgmm.mu);
tumorids{i} = cluster(tempgmm,log(data{:,tum_marks{i}}))==tumcomp;
markers = [totalcycle(i)+(1:lastcycle(i)),2*totalcycle(i)+(1:lastcycle(i)),3*totalcycle(i)+(1:lastcycle(i))];

selectid = tumorids{i}&~nullids{i};

tumordata = data(selectid,man_markers{i});
tumorROI = data.ROI(selectid);

splitid = rand(numel(tumorROI),1)<0.5; %splits labeled cells into training and valid
labeled_id = tumorROI~=0;

traindata = log(tumordata{labeled_id&splitid,:});
trainlabels = tumorROI(labeled_id&splitid,:);

validdata = log(tumordata{labeled_id&~splitid,:});
validlabels{i} = tumorROI(labeled_id&~splitid,:);

tic
pathology_KNNclassifiers{i} = fitcknn(traindata,trainlabels,'NumNeighbors',40,'Prior',manual_prior);
[~,validscores{i},~] = predict(pathology_KNNclassifiers{i},validdata);
[~,labelscores{i},~] = predict(pathology_KNNclassifiers{i},log(tumordata{:,:}));
toc

classvec = labelscores{i}*[1 0 0 0 0 0 ;0 1 1 1 0 0 ; 0 0 0 0 1 0 ; 0 0 0 0 0 1]';
end


%%
i = 4; %choose which section's results to show
selectid = tumorids{i}&~nullids{i};
class_prob_id = 2; %choose which kNN-class' probability to display

entropy_vals = shannon_entropy(colorvec);

figure()
scatter(data.Xt(~tumorids{i}),-data.Yt(~tumorids{i}),1,0.5*[1,1,1],'filled')
hold on
scatter(data.Xt(selectid),-data.Yt(selectid),1,colorvec(:,class_prob_id)','filled')
colormap(parula)
title(['kNN-postprob for class ' num2str(class_prob_id) ' of ' suffices{i}])

figure()
scatter(data.Xt(selectid),-data.Yt(selectid),1,entropy_vals,'filled')
colormap(bluewhitered)
set(gca,'Color',[0.5 0.5 0.5])
title(['kNN-classification entropy of ' suffices{i}])

function shan_entropy = shannon_entropy(probs)
individual_terms = probs.*(log2(probs));
nanterms = isnan(individual_terms);
individual_terms(nanterms) = 0;
shan_entropy = -sum(individual_terms,2);
end
40 changes: 40 additions & 0 deletions Fig4E_DelaunayClusters.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
% choose a datatable representing a tissue section
data = dataTNP097;
xypoints = data{:,{'Xt','Yt'}};
tic
adjmat = delaunaygraph(xypoints,50);
toc

% define the subset of cells (e.g. tumor) that you want to compute adjacency for
boxoutid = data.Yt<10^4 & data.Xt<10^4;
tumor_id = data.Keratinp & data.ROI~=1 & ~boxoutid; %remove normal epithelial (ROI=1) and abnormal corner of tissue

% compute the connected components (i.e. Delaunay clusters) of the cell-type
[bins,binsizes] = conncomp(graph(adjmat(tumor_id,tumor_id)));


figure('units','normalized','outerposition',[0 0 1 1])
scatter(xypoints(~tumor_id,1),-xypoints(~tumor_id,2),0.5,[0.3 0.3 0.3],'filled')
hold on
scatter(xypoints(tumor_id,1),-xypoints(tumor_id,2),3,-log2(binsizes(bins)),'filled')
colormap(hsv)
caxis([-18 0])
set(gca,'Color','k')
colorbar(); title('Keratin+ cells colored by Log2-Cluster-size')
daspect([1 1 1])

%%
function adjmat = delaunaygraph(xypoints,cutoff)
%compute adjacency matrix of points based on Delaunay triangulation

tri = delaunay(xypoints(:,1),xypoints(:,2));
trilist = tri(:);
trinext = reshape(tri(:,[2 3 1]),size(trilist));
tridist = sqrt(sum((xypoints(trilist,:) - xypoints(trinext,:)).^2,2));
g = digraph(trilist,trinext,tridist);
A = adjacency(g,'weighted');
A = (A + A')/2;
A(A>cutoff)=0;

adjmat = A;
end

0 comments on commit 88cd9f5

Please sign in to comment.