diff --git a/assets/logos/kana-small-cropped.png b/assets/logos/kana-small-cropped.png new file mode 100644 index 00000000..c22c1d5a Binary files /dev/null and b/assets/logos/kana-small-cropped.png differ diff --git a/package.json b/package.json index 20c990d8..7e9ede55 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "kana", "description": "Single cell data analysis in the browser", - "version": "1.2.0", + "version": "2.0.0", "private": true, "author": { "name": "Jayaram Kancherla", @@ -32,7 +32,7 @@ "@testing-library/jest-dom": "^5.11.4", "@testing-library/react": "^11.1.0", "@testing-library/user-event": "^12.1.10", - "bakana": "^0.2.6", + "bakana": "^0.3.3", "d3": "^7.1.1", "d3-dsv": "^3.0.1", "d3-scale": "^4.0.2", diff --git a/src/App.js b/src/App.js index 752721f4..252b6d10 100644 --- a/src/App.js +++ b/src/App.js @@ -49,6 +49,8 @@ const App = () => { const [initDims, setInitDims] = useState(null); const [qcDims, setQcDims] = useState(null); + const [inputData, setInputData] = useState(null); + // loaders for UI components const [showDimPlotLoader, setShowDimPlotLoader] = useState(true); const [showMarkerLoader, setShowMarkerLoader] = useState(true); @@ -92,7 +94,7 @@ const App = () => { const [colorByAnnotation, setColorByAnnotation] = useState("clusters"); // PCA - const [pcaVarExp, setPcaVarExp] = useState(null); + const [pcaVarExp, setPcaVarExp] = useState({}); // Cluster Data // which cluster is selected @@ -133,6 +135,10 @@ const App = () => { // request annotation column const [reqAnnotation, setReqAnnotation] = useState(null); + // modality + const [modality, setModality] = useState(null); + const [selectedModality, setSelectedModality] = useState(null); + // props for dialogs const loadingProps = { autoFocus: true, @@ -150,7 +156,7 @@ const App = () => { setGeneColSel, setLoadParams, setInitLoadState, inputFiles, annotationCols, setAnnotationCols, annotationObj, setAnnotationObj, preInputFiles, - setPreInputFilesStatus } = useContext(AppContext); + setPreInputFilesStatus, geneColSel } = useContext(AppContext); // initializes various things on the worker side useEffect(() => { @@ -180,13 +186,14 @@ const App = () => { // request worker for new markers // if either the cluster or the ranking changes useEffect(() => { + if (selectedCluster !== null && selectedModality != null) { - if (selectedCluster !== null) { let type = String(selectedCluster).startsWith("cs") ? "getMarkersForSelection" : "getMarkersForCluster"; scranWorker.postMessage({ "type": type, "payload": { + "modality": selectedModality, "cluster": selectedCluster, "rank_type": clusterRank, } @@ -194,7 +201,7 @@ const App = () => { add_to_logs("info", `--- ${type} sent ---`); } - }, [selectedCluster, clusterRank]); + }, [selectedCluster, clusterRank, selectedModality]); // compute markers in the worker // when a new custom selection of cells is made through the UI @@ -233,17 +240,18 @@ const App = () => { // get expression for a gene from worker useEffect(() => { - if (reqGene) { + if (reqGene != null && selectedModality != null) { scranWorker.postMessage({ "type": "getGeneExpression", "payload": { - "gene": reqGene + "gene": reqGene, + "modality": selectedModality } }); add_to_logs("info", `--- Request gene expression for gene:${reqGene} sent ---`); } - }, [reqGene]); + }, [reqGene, selectedModality]); useEffect(() => { @@ -390,6 +398,17 @@ const App = () => { } }, [preInputFiles, wasmInitialized]); + useEffect(() => { + if (selectedModality) { + setGenesInfo(inputData.genes[selectedModality]); + if (geneColSel[selectedModality] == null) { + let tmp = geneColSel; + tmp[selectedModality] = Object.keys(inputData.genes[selectedModality])[0] + setGeneColSel(tmp); + } + } + }, [selectedModality]); + // callback for all responses from workers // all interactions are logged and shown on the UI scranWorker.onmessage = (msg) => { @@ -429,13 +448,25 @@ const App = () => { } setIndexedDBState(false); } else if (payload.type === "inputs_DATA") { - setInitDims(`${payload.resp.dimensions.num_genes} genes, ${payload.resp.dimensions.num_cells} cells`); - setGenesInfo(payload.resp.genes); - setGeneColSel(Object.keys(payload.resp.genes)[0]); + var info = []; + if ("RNA" in payload.resp.num_genes) { + info.push(`${payload.resp.num_genes.RNA} genes`); + } + if ("ADT" in payload.resp.num_genes) { + info.push(`${payload.resp.num_genes.ADT} ADTs`); + } + info.push(`${payload.resp.num_cells} cells`); + + setInitDims(info.join(", ")); + setInputData(payload.resp); if (payload.resp?.annotations) { setAnnotationCols(Object.values(payload.resp.annotations)); } + + let pmods = Object.keys(payload.resp.genes); + setModality(pmods); + } else if (payload.type === "quality_control_DATA") { const { resp } = payload; @@ -453,14 +484,52 @@ const App = () => { resp["ranges"] = ranges; setQcData(resp); - setQcDims(`${resp.retained}`); setShowQCLoader(false); + } else if (payload.type === "adt_quality_control_DATA") { + const { resp } = payload; + + if (resp) { + var ranges = {}, data = resp["data"], all = {}; + + for (const [group, gvals] of Object.entries(data)) { + for (const [key, val] of Object.entries(gvals)) { + if (!all[key]) all[key] = [Infinity, -Infinity]; + let [min, max] = getMinMax(val); + if (min < all[key][0]) all[key][0] = min; + if (max > all[key][1]) all[key][1] = max; + } + ranges[group] = all; + } + + resp["ranges"] = ranges; + + let prevQC = {...qcData}; + for (const key in data) { + prevQC["data"][`adt_${key}`] = data[key]; + let tval = resp["thresholds"][key]; + if (key === "sums") { + tval = null; + } + prevQC["thresholds"][`adt_${key}`] = tval; + prevQC["ranges"][`adt_${key}`] = ranges[key]; + } + + setQcData(prevQC); + } + + setShowQCLoader(false); + } else if (payload.type === "cell_filtering_DATA") { + setQcDims(`${payload.resp.retained}`); } else if (payload.type === "feature_selection_DATA") { const { resp } = payload; setFSelectionData(resp); } else if (payload.type === "pca_DATA") { const { resp } = payload; - setPcaVarExp(resp); + setPcaVarExp({RNA: resp["var_exp"]}); + setShowPCALoader(false); + } else if (payload.type === "adt_pca_DATA") { + const { resp } = payload; + setPcaVarExp({...pcaVarExp, ADT: resp["var_exp"]}); setShowPCALoader(false); } else if (payload.type === "choose_clustering_DATA") { const { resp } = payload; @@ -499,20 +568,25 @@ const App = () => { } else if (payload.type === "marker_detection_DATA") { if (!selectedCluster) { // show markers for the first cluster + if (selectedModality == null) { + setSelectedModality(modality[0]); + } setSelectedCluster(0); } } else if (payload.type === "tsne_DATA") { const { resp } = payload; setTsneData(resp); - let tmp = [...redDims]; - tmp.push("TSNE"); + if (redDims.indexOf("TSNE") == -1) { + let tmp = [...redDims]; + tmp.push("TSNE"); + setRedDims(tmp); + } // once t-SNE is available, set this as the default display if (!defaultRedDims) { setDefaultRedDims("TSNE"); } - setRedDims(tmp); // also don't show the pong game anymore setShowGame(false); setShowAnimation(false); @@ -525,9 +599,11 @@ const App = () => { setUmapData(resp); // enable UMAP selection - let tmp = [...redDims]; - tmp.push("UMAP"); - setRedDims(tmp); + if (redDims.indexOf("UMAP") == -1) { + let tmp = [...redDims]; + tmp.push("UMAP"); + setRedDims(tmp); + } setShowGame(false); setShowAnimation(false); @@ -575,8 +651,15 @@ const App = () => { setIndexedDBState(false); } else if (payload.type === "loadedParameters") { const { resp } = payload; + + resp["ann"] = {}; + resp["ann"]["approximate"] = resp["batch_correction"]["approximate"]; setLoadParams(resp); + if (!resp?.combine_embeddings?.weights) { + resp["combine_embeddings"]["weights"] = {}; + } + if (resp?.custom_selections?.selections) { let cluster_count = clusterColors.length + Object.keys(resp?.custom_selections?.selections).length; let cluster_colors = null; @@ -606,6 +689,10 @@ const App = () => { setShowCellLabelLoader(false); } else if (payload.type === "PREFLIGHT_INPUT_DATA") { const { resp } = payload; + if (resp.details.features) { + let pmods = Object.keys(resp.details.features); + setModality(pmods); + } setPreInputFilesStatus(resp.details); } else if (payload.type === "custom_selections_DATA") { } else if (payload.type === "tsne_CACHE" || payload.type === "umap_CACHE") { @@ -685,6 +772,7 @@ const App = () => { setClusHighlightLabel={setClusHighlightLabel} colorByAnnotation={colorByAnnotation} setColorByAnnotation={setColorByAnnotation} + selectedModality={selectedModality} /> : showGame ?
+ Build the index for the nearest neighbor search. + This is used for a variety of steps including the graph-based clustering, t-SNE and UMAP. +
++ Approximate: + Use an approximate neighbor search algorithm - in this case, the Annoy method. + This sacrifices some search accuracy for speed, which is usually acceptable for single-cell applications. + Otherwise, an exact algorithm is used. +
++ Remove batch effects between cells from different samples. + This places all cells in a common coordinate space for consistent clustering and visualization. + Otherwise, the interpretation of downstream analysis results may be complicated by large sample-sample differences, + obscuring the heterogeneity within samples that is usually of interest. +
++ Correction method: + Which correction method to use - no correction, linear regression or mutual nearest neighbor (MNN) correction. + MNN correction is the default and handles situations with differences in cell type composition across samples. + Linear regression is simpler but assumes that all samples have the same proportions of cell types, with a consistent batch effect in each cell type. + Users may also choose not to correct if, e.g., the sample-sample differences are interesting. +
++ Number of neighbors: + Number of neighbors to use to identify MNN pairs. + Using larger values will yield a more stable correction but also increases the risk of incorrectly merging unrelated populations across samples. +
++ Remove low-quality cells based on the ADT counts. + This uses the number of detected features and, if available, the total count for isotype (IgG) controls. + Cells with few detected features or high isotype counts are filtered out; + this is combined with the RNA-based filters to ensure that cells are only retained if they are informative in both modalities. + We again use an outlier-based approach to define the filter threshold for each metric. +
++ Number of MADs: + Number of median absolute deviations (MADs) from the median, + used to define a filter threshold in the appropriate direction for each QC metric. + Increasing this value will reduce the stringency of the filtering. +
++ Isotype prefix: + Prefix to use to identify features in the dataset that are isotype controls. + This is not case-sensitive. +
++ Log-normalize the ADT count data. + This involves some more work than the RNA counterpart as the composition biases can be much stronger in ADT data. + We use a simple approach where we cluster cells based on their ADT counts, + normalize for composition biases between clusters using an median-based method, + normalize for library size differences between cells within clusters, + and then combine both to obtain per-cell factors. +
++ Number of clusters: + Number of clusters to use in the initial k-means clustering. + This clustering will not be used in any downstream steps; it is only used here to separate major subpopulations with strong DE. + Any rough clustering is fine and it should not be necessary to spend much time fine-tuning this parameter. + Overclustering is acceptable - and possibly even desirable - provided that each cluster still contains enough cells for stable median calculations. +
++ Number of PCs: + Number of principal components to use for the clustering. + We perform a PCA to compress the data for faster clustering - this has no bearing on later choices of the number of PCs. + Again, as long as a reasonable clustering is obtained, it should not be necessary to spend much time fine-tuning this parameter. + In fact, if the requested number of PCs is greater than the number of ADTs, this parameter will have no effect. +
++ Perform a principal components analysis (PCA) on the log-normalized ADT matrix. + As for RNA, the PCA is used for compression and denoising prior to downstream steps like clustering and visualization. + However, unlike RNA, no feature selection is performed here as there are relatively few ADTs in the first place. +
++ Number of PCs: + Number of principal components with the highest variance to retain in downstream analyses. + Larger values will capture more biological signal at the cost of increasing noise and computational work. + If more PCs are requested than ADTs are available, the latter is used instead. +
++ Combine PC embeddings from multiple modalities. + This yields a single matrix that can be used in downstream analyses like clustering, + allowing us to incorporate information from multiple modalities. + By default, each modality is given equal weight in the combined matrix. +
++ Modality weights: + Weight for each modality. + A larger value indicates that the corresponding modality will contribute more to the population heterogeneity in the combined embedding. + A value of zero indicates that the corresponding modality should be ignored in downstream analysis. +
+