Add files via upload

labsyspharm · Oct 4, 2022 · 88cd9f5 · 88cd9f5
1 parent f4e3b64
commit 88cd9f5
Show file tree

Hide file tree

Showing 5 changed files with 232 additions and 0 deletions.
diff --git a/Fig2G_predic_Neff.m b/Fig2G_predic_Neff.m
@@ -0,0 +1,50 @@
+%choose a marker and datatable to compute N/N_eff, as well as predict
+%N/N_eff using the exponential correlation function 
+
+marker = 'Keratinp';
+data = dataTNP097;
+
+xydata = data{:,{'Xt','Yt'}};
+marker_val = data{:,marker}; %if marker is continuous, use apply log(x+1)
+
+%%
+tic
+    [corrfunc,radius,c_0,L] = Fig2_autocorrelation(xydata,marker_val);
+    parms = [c_0,-1/L];
+    [rand_means,tma_means,N,N_eff] = tma_compare(data,marker,parms);
+    Nred_obs = (std(tma_means)/std(rand_means))^2;
+    Nred_pred = N/N_eff;
+toc
+text(mean(xlim),mean(ylim),{[marker ' N_{eff}/N = ' num2str(Nred_obs)]; ['Predicted N_{eff}/N = ' num2str(Nred_pred)]})
+
+
+function [rand_means,tma_means,N,N_eff] = tma_compare(data,marker,expparms)
+    trials = 10^3; %number of TMA cores to estimate N_eff empirically
+    tma_means = zeros(trials,1);
+    rand_means = zeros(trials,1);
+    n_size = zeros(trials,1);
+    eff_sample_size = zeros(trials,1);
+
+    for i = 1:trials
+        subset_id = tma_select(data);
+        n_size(i) = sum(subset_id);
+        rand_id = subset_id(randperm(size(subset_id,1)));
+        tma_means(i) = mean(data{(subset_id),marker});
+        rand_means(i) = mean(data{(rand_id),marker});
+
+        distmat = squareform(pdist(data{subset_id,{'Xt','Yt'}}));
+        corrmat = expparms(1)*exp(expparms(2)*distmat);
+        corrmat(logical(eye(n_size(i)))) = 1;
+        eff_sample_size(i) = n_size(i)^2/sum(corrmat(:));
+    end
+    N = mean(n_size);
+    N_eff = mean(eff_sample_size);
+end
+
+function subset_id = tma_select(data)
+    radius = 500; %vTMA radius
+    center_id = randi(size(data,1));
+    center_xy = data{center_id,{'Xt','Yt'}};
+    subset_id = pdist2(data{:,{'Xt','Yt'}},center_xy)<radius;
+end
+
diff --git a/Fig2_autocorrelation.m b/Fig2_autocorrelation.m
@@ -0,0 +1,35 @@
+%give the xy-positions of cells as "xydata", and the variable of interest
+%(e.g. marker intensity, or binary state of cell-type calls) as
+%"marker_val"
+
+function [corrfunc, radius, c_0, L] = Fig2_autocorrelation(xydata, marker_val)
+    kmax = 300;
+    [corrfunc,radius] = corr_knn(xydata,zscore(marker_val),zscore(marker_val),kmax);
+
+    fit_start_rad = 5;
+    fit_end_rad = 200;
+
+    expfit = fit(radius(fit_start_rad:fit_end_rad)',corrfunc(fit_start_rad:fit_end_rad)'/corrfunc(1),'exp1');
+    parms = coeffvalues(expfit);
+    c_0 = parms(1);
+    L = -1./parms(2);
+
+    figure()
+    plot(radius,corrfunc); hold on
+    fplot(@(r) c_0*exp(-r/L), [radius(5),radius(200)],'--')
+    legend('Empirical', 'Exponential Fit')
+    xlabel('Distance')
+    ylabel('Correlation')
+    title('Autocorrelation function of chosen marker')
+end
+
+
+function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val)    
+
+    [idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree');
+    rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance
+
+    magdots = magvalA(idx).*magvalB;
+    corrfunc = mean(magdots);
+end
+
diff --git a/Fig2_crosscorrelation.m b/Fig2_crosscorrelation.m
@@ -0,0 +1,26 @@
+%give the xy-positions of cells as "xydata", and the variable of interest
+%(e.g. marker intensity, or binary state of cell-type calls) as
+%"marker_val"
+
+function [cross_corrfunc, radius] = Fig2_crosscorrelation(xydata, marker_val_A, marker_val_B)
+    kmax = 200;
+    [cross_corrfunc,radius] = corr_knn(xydata,zscore(marker_val_A),zscore(marker_val_B),kmax); 
+
+    figure()
+    plot(sqrt(1:kmax),cross_corrfunc); hold on
+    xlabel('Nearest Neighbor index');
+    set(gca,'xtick',1:floor(sqrt(kmax))); set(gca,'xticklabel', [1:floor(sqrt(kmax))].^2);
+    ylabel('Correlation')
+    title('Cross-correlation function of two markers')
+end
+
+
+function [corrfunc,rad_approx] = corr_knn(xydata,magvalA,magvalB,k_val)    
+
+    [idx,dist] = knnsearch(xydata,xydata,'k',k_val,'NSMethod','kdtree');
+    rad_approx = mean(dist); %i.e. the k'th neighbor is on average this distance
+
+    magdots = magvalA(idx).*magvalB;
+    corrfunc = mean(magdots);
+end
+
diff --git a/Fig3_kNN_classification.m b/Fig3_kNN_classification.m
@@ -0,0 +1,81 @@
+%load matlab-TNP_P2reg-20200802.mat
+
+manual_prior = 1/6*ones(1,6); %[1/4, 1/12, 1/12, 1/12, 1/4, 1/4]; %equal priors on the 4 Keratin+ categories
+logdnacut = 8; %manual gate on Hoechst
+tum_mark_44 = 'Keratin'; tum_mark_45 = 'Cy3_3'; tum_mark_46 = 'FITC_4'; tum_mark_47 = 'FITC_9';
+
+totalcycle = [10,14,14,14];
+lastcycle = [10,11,12,14]; %last cycle to consider; additional cycles lose too many cells
+tum_marks = {tum_mark_44,tum_mark_45,tum_mark_46,tum_mark_47}; %best tumor marker in panel
+pathology_KNNclassifiers = cell(4,1); %the KNN-classifiers for each section
+tumorids = cell(4,1);
+nullids = cell(4,1);
+labelscores = cell(4,1);
+suffices = {'TNP044','MAL_45reg','MAL_46reg','MAL_47reg'};
+
+man_mark_44 = [11 14 18:21 23 24 28 29 31 37 38]; %[11 13:20 21 23:30 31 33:40];
+man_mark_45 = [16,18:25,30:39,44:53];
+man_mark_46 = [18:20 23 30:31 33:34 36 46:48 54]; %[16:26,30:31,33:40,44:54];
+man_mark_47 = [16:24,27:29,32:55];
+
+man_markers = {man_mark_44,man_mark_45,man_mark_46,man_mark_47};
+
+
+for i = 1:4
+    data = eval(['data' suffices{i}]);
+    nullids{i} = any(log(data{:,1:lastcycle(i)})<logdnacut,2);
+    tempgmm = fitgmdist(log(data{:,tum_marks{i}}),2);
+    [~,tumcomp] = max(tempgmm.mu);
+    tumorids{i} = cluster(tempgmm,log(data{:,tum_marks{i}}))==tumcomp;
+    markers = [totalcycle(i)+(1:lastcycle(i)),2*totalcycle(i)+(1:lastcycle(i)),3*totalcycle(i)+(1:lastcycle(i))];
+
+    selectid = tumorids{i}&~nullids{i};
+
+    tumordata = data(selectid,man_markers{i});
+    tumorROI = data.ROI(selectid);
+
+    splitid = rand(numel(tumorROI),1)<0.5; %splits labeled cells into training and valid
+    labeled_id = tumorROI~=0;
+
+    traindata = log(tumordata{labeled_id&splitid,:});
+    trainlabels = tumorROI(labeled_id&splitid,:);
+
+    validdata = log(tumordata{labeled_id&~splitid,:});
+    validlabels{i} = tumorROI(labeled_id&~splitid,:);
+
+    tic
+    pathology_KNNclassifiers{i} = fitcknn(traindata,trainlabels,'NumNeighbors',40,'Prior',manual_prior);
+    [~,validscores{i},~] = predict(pathology_KNNclassifiers{i},validdata); 
+    [~,labelscores{i},~] = predict(pathology_KNNclassifiers{i},log(tumordata{:,:})); 
+    toc
+
+    classvec = labelscores{i}*[1 0 0 0 0 0 ;0 1 1 1 0 0 ; 0 0 0 0 1 0 ; 0 0 0 0 0 1]';
+end
+
+
+%%
+i = 4; %choose which section's results to show
+selectid = tumorids{i}&~nullids{i};
+class_prob_id = 2; %choose which kNN-class' probability to display
+
+entropy_vals = shannon_entropy(colorvec);
+
+figure()
+    scatter(data.Xt(~tumorids{i}),-data.Yt(~tumorids{i}),1,0.5*[1,1,1],'filled')
+    hold on
+    scatter(data.Xt(selectid),-data.Yt(selectid),1,colorvec(:,class_prob_id)','filled')
+    colormap(parula)
+    title(['kNN-postprob for class ' num2str(class_prob_id) ' of ' suffices{i}])
+
+figure()
+    scatter(data.Xt(selectid),-data.Yt(selectid),1,entropy_vals,'filled')
+    colormap(bluewhitered)
+    set(gca,'Color',[0.5 0.5 0.5])
+    title(['kNN-classification entropy of ' suffices{i}])
+
+function shan_entropy = shannon_entropy(probs)
+    individual_terms = probs.*(log2(probs));
+    nanterms = isnan(individual_terms);
+    individual_terms(nanterms) = 0;
+    shan_entropy = -sum(individual_terms,2);
+end
diff --git a/Fig4E_DelaunayClusters.m b/Fig4E_DelaunayClusters.m
@@ -0,0 +1,40 @@
+% choose a datatable representing a tissue section
+data = dataTNP097;
+xypoints = data{:,{'Xt','Yt'}};
+tic
+adjmat = delaunaygraph(xypoints,50);
+toc
+
+% define the subset of cells (e.g. tumor) that you want to compute adjacency for 
+boxoutid = data.Yt<10^4 & data.Xt<10^4;
+tumor_id = data.Keratinp & data.ROI~=1 & ~boxoutid; %remove normal epithelial (ROI=1) and abnormal corner of tissue
+
+% compute the connected components (i.e. Delaunay clusters) of the cell-type
+[bins,binsizes] = conncomp(graph(adjmat(tumor_id,tumor_id)));
+
+
+figure('units','normalized','outerposition',[0 0 1 1])
+scatter(xypoints(~tumor_id,1),-xypoints(~tumor_id,2),0.5,[0.3 0.3 0.3],'filled')
+hold on
+scatter(xypoints(tumor_id,1),-xypoints(tumor_id,2),3,-log2(binsizes(bins)),'filled')
+colormap(hsv)
+caxis([-18 0])
+set(gca,'Color','k')
+colorbar(); title('Keratin+ cells colored by Log2-Cluster-size')
+daspect([1 1 1])   
+
+%%
+function adjmat = delaunaygraph(xypoints,cutoff)
+%compute adjacency matrix of points based on Delaunay triangulation
+
+tri = delaunay(xypoints(:,1),xypoints(:,2));
+trilist = tri(:);
+trinext = reshape(tri(:,[2 3 1]),size(trilist));
+tridist = sqrt(sum((xypoints(trilist,:) - xypoints(trinext,:)).^2,2));
+g = digraph(trilist,trinext,tridist);
+A = adjacency(g,'weighted');
+A = (A + A')/2;
+A(A>cutoff)=0;
+
+adjmat = A;
+end