diff --git a/assets/logos/kana-small-cropped.png b/assets/logos/kana-small-cropped.png new file mode 100644 index 00000000..c22c1d5a Binary files /dev/null and b/assets/logos/kana-small-cropped.png differ diff --git a/package.json b/package.json index 20c990d8..7e9ede55 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "kana", "description": "Single cell data analysis in the browser", - "version": "1.2.0", + "version": "2.0.0", "private": true, "author": { "name": "Jayaram Kancherla", @@ -32,7 +32,7 @@ "@testing-library/jest-dom": "^5.11.4", "@testing-library/react": "^11.1.0", "@testing-library/user-event": "^12.1.10", - "bakana": "^0.2.6", + "bakana": "^0.3.3", "d3": "^7.1.1", "d3-dsv": "^3.0.1", "d3-scale": "^4.0.2", diff --git a/src/App.js b/src/App.js index 752721f4..252b6d10 100644 --- a/src/App.js +++ b/src/App.js @@ -49,6 +49,8 @@ const App = () => { const [initDims, setInitDims] = useState(null); const [qcDims, setQcDims] = useState(null); + const [inputData, setInputData] = useState(null); + // loaders for UI components const [showDimPlotLoader, setShowDimPlotLoader] = useState(true); const [showMarkerLoader, setShowMarkerLoader] = useState(true); @@ -92,7 +94,7 @@ const App = () => { const [colorByAnnotation, setColorByAnnotation] = useState("clusters"); // PCA - const [pcaVarExp, setPcaVarExp] = useState(null); + const [pcaVarExp, setPcaVarExp] = useState({}); // Cluster Data // which cluster is selected @@ -133,6 +135,10 @@ const App = () => { // request annotation column const [reqAnnotation, setReqAnnotation] = useState(null); + // modality + const [modality, setModality] = useState(null); + const [selectedModality, setSelectedModality] = useState(null); + // props for dialogs const loadingProps = { autoFocus: true, @@ -150,7 +156,7 @@ const App = () => { setGeneColSel, setLoadParams, setInitLoadState, inputFiles, annotationCols, setAnnotationCols, annotationObj, setAnnotationObj, preInputFiles, - setPreInputFilesStatus } = useContext(AppContext); + setPreInputFilesStatus, geneColSel } = useContext(AppContext); // initializes various things on the worker side useEffect(() => { @@ -180,13 +186,14 @@ const App = () => { // request worker for new markers // if either the cluster or the ranking changes useEffect(() => { + if (selectedCluster !== null && selectedModality != null) { - if (selectedCluster !== null) { let type = String(selectedCluster).startsWith("cs") ? "getMarkersForSelection" : "getMarkersForCluster"; scranWorker.postMessage({ "type": type, "payload": { + "modality": selectedModality, "cluster": selectedCluster, "rank_type": clusterRank, } @@ -194,7 +201,7 @@ const App = () => { add_to_logs("info", `--- ${type} sent ---`); } - }, [selectedCluster, clusterRank]); + }, [selectedCluster, clusterRank, selectedModality]); // compute markers in the worker // when a new custom selection of cells is made through the UI @@ -233,17 +240,18 @@ const App = () => { // get expression for a gene from worker useEffect(() => { - if (reqGene) { + if (reqGene != null && selectedModality != null) { scranWorker.postMessage({ "type": "getGeneExpression", "payload": { - "gene": reqGene + "gene": reqGene, + "modality": selectedModality } }); add_to_logs("info", `--- Request gene expression for gene:${reqGene} sent ---`); } - }, [reqGene]); + }, [reqGene, selectedModality]); useEffect(() => { @@ -390,6 +398,17 @@ const App = () => { } }, [preInputFiles, wasmInitialized]); + useEffect(() => { + if (selectedModality) { + setGenesInfo(inputData.genes[selectedModality]); + if (geneColSel[selectedModality] == null) { + let tmp = geneColSel; + tmp[selectedModality] = Object.keys(inputData.genes[selectedModality])[0] + setGeneColSel(tmp); + } + } + }, [selectedModality]); + // callback for all responses from workers // all interactions are logged and shown on the UI scranWorker.onmessage = (msg) => { @@ -429,13 +448,25 @@ const App = () => { } setIndexedDBState(false); } else if (payload.type === "inputs_DATA") { - setInitDims(`${payload.resp.dimensions.num_genes} genes, ${payload.resp.dimensions.num_cells} cells`); - setGenesInfo(payload.resp.genes); - setGeneColSel(Object.keys(payload.resp.genes)[0]); + var info = []; + if ("RNA" in payload.resp.num_genes) { + info.push(`${payload.resp.num_genes.RNA} genes`); + } + if ("ADT" in payload.resp.num_genes) { + info.push(`${payload.resp.num_genes.ADT} ADTs`); + } + info.push(`${payload.resp.num_cells} cells`); + + setInitDims(info.join(", ")); + setInputData(payload.resp); if (payload.resp?.annotations) { setAnnotationCols(Object.values(payload.resp.annotations)); } + + let pmods = Object.keys(payload.resp.genes); + setModality(pmods); + } else if (payload.type === "quality_control_DATA") { const { resp } = payload; @@ -453,14 +484,52 @@ const App = () => { resp["ranges"] = ranges; setQcData(resp); - setQcDims(`${resp.retained}`); setShowQCLoader(false); + } else if (payload.type === "adt_quality_control_DATA") { + const { resp } = payload; + + if (resp) { + var ranges = {}, data = resp["data"], all = {}; + + for (const [group, gvals] of Object.entries(data)) { + for (const [key, val] of Object.entries(gvals)) { + if (!all[key]) all[key] = [Infinity, -Infinity]; + let [min, max] = getMinMax(val); + if (min < all[key][0]) all[key][0] = min; + if (max > all[key][1]) all[key][1] = max; + } + ranges[group] = all; + } + + resp["ranges"] = ranges; + + let prevQC = {...qcData}; + for (const key in data) { + prevQC["data"][`adt_${key}`] = data[key]; + let tval = resp["thresholds"][key]; + if (key === "sums") { + tval = null; + } + prevQC["thresholds"][`adt_${key}`] = tval; + prevQC["ranges"][`adt_${key}`] = ranges[key]; + } + + setQcData(prevQC); + } + + setShowQCLoader(false); + } else if (payload.type === "cell_filtering_DATA") { + setQcDims(`${payload.resp.retained}`); } else if (payload.type === "feature_selection_DATA") { const { resp } = payload; setFSelectionData(resp); } else if (payload.type === "pca_DATA") { const { resp } = payload; - setPcaVarExp(resp); + setPcaVarExp({RNA: resp["var_exp"]}); + setShowPCALoader(false); + } else if (payload.type === "adt_pca_DATA") { + const { resp } = payload; + setPcaVarExp({...pcaVarExp, ADT: resp["var_exp"]}); setShowPCALoader(false); } else if (payload.type === "choose_clustering_DATA") { const { resp } = payload; @@ -499,20 +568,25 @@ const App = () => { } else if (payload.type === "marker_detection_DATA") { if (!selectedCluster) { // show markers for the first cluster + if (selectedModality == null) { + setSelectedModality(modality[0]); + } setSelectedCluster(0); } } else if (payload.type === "tsne_DATA") { const { resp } = payload; setTsneData(resp); - let tmp = [...redDims]; - tmp.push("TSNE"); + if (redDims.indexOf("TSNE") == -1) { + let tmp = [...redDims]; + tmp.push("TSNE"); + setRedDims(tmp); + } // once t-SNE is available, set this as the default display if (!defaultRedDims) { setDefaultRedDims("TSNE"); } - setRedDims(tmp); // also don't show the pong game anymore setShowGame(false); setShowAnimation(false); @@ -525,9 +599,11 @@ const App = () => { setUmapData(resp); // enable UMAP selection - let tmp = [...redDims]; - tmp.push("UMAP"); - setRedDims(tmp); + if (redDims.indexOf("UMAP") == -1) { + let tmp = [...redDims]; + tmp.push("UMAP"); + setRedDims(tmp); + } setShowGame(false); setShowAnimation(false); @@ -575,8 +651,15 @@ const App = () => { setIndexedDBState(false); } else if (payload.type === "loadedParameters") { const { resp } = payload; + + resp["ann"] = {}; + resp["ann"]["approximate"] = resp["batch_correction"]["approximate"]; setLoadParams(resp); + if (!resp?.combine_embeddings?.weights) { + resp["combine_embeddings"]["weights"] = {}; + } + if (resp?.custom_selections?.selections) { let cluster_count = clusterColors.length + Object.keys(resp?.custom_selections?.selections).length; let cluster_colors = null; @@ -606,6 +689,10 @@ const App = () => { setShowCellLabelLoader(false); } else if (payload.type === "PREFLIGHT_INPUT_DATA") { const { resp } = payload; + if (resp.details.features) { + let pmods = Object.keys(resp.details.features); + setModality(pmods); + } setPreInputFilesStatus(resp.details); } else if (payload.type === "custom_selections_DATA") { } else if (payload.type === "tsne_CACHE" || payload.type === "umap_CACHE") { @@ -685,6 +772,7 @@ const App = () => { setClusHighlightLabel={setClusHighlightLabel} colorByAnnotation={colorByAnnotation} setColorByAnnotation={setColorByAnnotation} + selectedModality={selectedModality} /> : showGame ?
{ gene={gene} clusterColors={clusterColors} setReqGene={setReqGene} + modality={modality} + selectedModality={selectedModality} + setSelectedModality={setSelectedModality} /> :
{ if (loadParams && tabSelected === "load") { setTmpInputParams(loadParams); + console.log(loadParams); } }, [loadParams]); @@ -161,87 +163,98 @@ const AnalysisDialog = ({ useEffect(() => { if (tmpInputFiles) { - if (tabSelected === "new") { - let all_valid = true; - // tmpInputFiles.forEach((x, ix) => - for (let ix = 0; ix < tmpInputFiles.length; ix++) { - let x = tmpInputFiles[ix]; - if ( - (x?.mtx && !(inputText[ix]?.mtx.toLowerCase().endsWith("mtx") || - inputText[ix]?.mtx.toLowerCase().endsWith("mtx.gz") - )) || - (x?.genes && !(inputText[ix]?.genes.toLowerCase().endsWith("tsv") || - inputText[ix]?.genes.toLowerCase().endsWith("tsv.gz") - )) || - (x?.annotations && !(inputText[ix]?.annotations.toLowerCase().endsWith("tsv") || - inputText[ix]?.annotations.toLowerCase().endsWith("tsv.gz") - )) - ) { - all_valid = false; - } - if ( - x?.h5 && !( - inputText[ix]?.h5.toLowerCase().endsWith("hdf5") || - inputText[ix]?.h5.toLowerCase().endsWith("h5") || - inputText[ix]?.h5.toLowerCase().endsWith("h5ad") - ) - ) { + if(tmpInputFiles.length == 0) { + setTmpInputValid(false); + } else { + if (tabSelected === "new") { + let all_valid = true; + + // tmpInputFiles.forEach((x, ix) => + for (let ix = 0; ix < tmpInputFiles.length; ix++) { + let x = tmpInputFiles[ix]; + + if ( + (x?.mtx && !(inputText[ix]?.mtx.toLowerCase().endsWith("mtx") || + inputText[ix]?.mtx.toLowerCase().endsWith("mtx.gz") + )) || + (x?.genes && !(inputText[ix]?.genes.toLowerCase().endsWith("tsv") || + inputText[ix]?.genes.toLowerCase().endsWith("tsv.gz") + )) || + (x?.annotations && !(inputText[ix]?.annotations.toLowerCase().endsWith("tsv") || + inputText[ix]?.annotations.toLowerCase().endsWith("tsv.gz") + )) + ) { + all_valid = false; + } + + if ( + x?.h5 && !( + inputText[ix]?.h5.toLowerCase().endsWith("hdf5") || + inputText[ix]?.h5.toLowerCase().endsWith("h5") || + inputText[ix]?.h5.toLowerCase().endsWith("h5ad") + ) + ) { + all_valid = false; + } + + if (x.format === "MatrixMarket") { + if (!x.mtx) all_valid = false; + } else { + if (!x.h5) all_valid = false; + } + }; + + let tnames = tmpInputFiles.map(x => x.name); + if ([...new Set(tnames)].length != tmpInputFiles.length) { all_valid = false; } - - if (x.format === "MatrixMarket") { - if (!x.mtx) all_valid = false; - } else { - if (!x.h5) all_valid = false; - } - }; - - let tnames = tmpInputFiles.map(x => x.name); - if ([...new Set(tnames)].length != tmpInputFiles.length) { - all_valid = false; - } - - setTmpInputValid(all_valid); - - if (all_valid && tmpInputFiles.length > 0) { - let mapFiles = {}; - for (const f of tmpInputFiles) { - mapFiles[f.name] = f + + setTmpInputValid(all_valid); + + if (all_valid && tmpInputFiles.length > 0) { + let mapFiles = {}; + for (const f of tmpInputFiles) { + mapFiles[f.name] = f + } + + setPreInputFiles({ + "files": mapFiles, + }); } - - setPreInputFiles({ - "files": mapFiles, - }); - } - - } else if (tabSelected === "load") { - - if (inputText?.[0]?.file == null) { - setTmpInputValid(true); - } else { - if (!tmpInputFiles?.[0]?.file) { - setTmpInputValid(false); + + } else if (tabSelected === "load") { + + if (inputText?.[0]?.file == null) { + setTmpInputValid(true); } else { - if (loadImportFormat === "kana" && - inputText?.[0]?.file != null && !(inputText?.[0]?.file.toLowerCase().endsWith("kana") - ) - ) { - setTmpInputValid(false); - } else if (loadImportFormat === "kanadb" && tmpInputFiles?.[0]?.file === null) { + if (!tmpInputFiles?.[0]?.file) { setTmpInputValid(false); } else { - setTmpInputValid(true); + if (loadImportFormat === "kana" && + inputText?.[0]?.file != null && !(inputText?.[0]?.file.toLowerCase().endsWith("kana") + ) + ) { + setTmpInputValid(false); + } else if (loadImportFormat === "kanadb" && tmpInputFiles?.[0]?.file === null) { + setTmpInputValid(false); + } else { + setTmpInputValid(true); + } } } } } - if (!loadParams && (tmpInputFiles[0]?.batch !== undefined && tmpInputFiles[0]?.batch !== "none") || (tmpInputFiles.length > 1)) { - setTmpInputParams({ - ...tmpInputParams, - "pca": { ...tmpInputParams["pca"], "pca-correction": "mnn" } - }) + if(tmpInputFiles.length > 1 || + (tmpInputFiles.length == 1 && (tmpInputFiles[0]?.batch && tmpInputFiles[0]?.batch.toLowerCase() != "none")) + ) { + setTmpInputParams({ ...tmpInputParams, + "batch_correction": { + ...tmpInputParams["batch_correction"], + "method": "mnn" + } + }); } } }, [tmpInputFiles]); @@ -266,7 +279,7 @@ const AnalysisDialog = ({ all_valid = false; } - if (!x.mtx) all_valid = false; + if (!x.mtx && (sinputText?.mtx !== "Choose Matrix Market file")) all_valid = false; } else if (x.format === "10X") { if (x?.h5 && !( @@ -277,7 +290,7 @@ const AnalysisDialog = ({ all_valid = false; } - if (!x.h5) all_valid = false; + if (!x.h5 && (sinputText?.file !== "Choose file...")) all_valid = false; } else if ( x.format === "H5AD") { @@ -288,7 +301,7 @@ const AnalysisDialog = ({ all_valid = false; } - if (!x.h5) all_valid = false; + if (!x.h5 && (sinputText?.file !== "Choose file...")) all_valid = false; } // setTmpInputValid(all_valid); @@ -510,6 +523,121 @@ const AnalysisDialog = ({

} + {showStepHelper === 9 && + +

+ Build the index for the nearest neighbor search. + This is used for a variety of steps including the graph-based clustering, t-SNE and UMAP. +

+

+ Approximate: + Use an approximate neighbor search algorithm - in this case, the Annoy method. + This sacrifices some search accuracy for speed, which is usually acceptable for single-cell applications. + Otherwise, an exact algorithm is used. +

+
+ } + {showStepHelper === 10 && + +

+ Remove batch effects between cells from different samples. + This places all cells in a common coordinate space for consistent clustering and visualization. + Otherwise, the interpretation of downstream analysis results may be complicated by large sample-sample differences, + obscuring the heterogeneity within samples that is usually of interest. +

+

+ Correction method: + Which correction method to use - no correction, linear regression or mutual nearest neighbor (MNN) correction. + MNN correction is the default and handles situations with differences in cell type composition across samples. + Linear regression is simpler but assumes that all samples have the same proportions of cell types, with a consistent batch effect in each cell type. + Users may also choose not to correct if, e.g., the sample-sample differences are interesting. +

+

+ Number of neighbors: + Number of neighbors to use to identify MNN pairs. + Using larger values will yield a more stable correction but also increases the risk of incorrectly merging unrelated populations across samples. +

+
+ } + {showStepHelper === 11 && + +

+ Remove low-quality cells based on the ADT counts. + This uses the number of detected features and, if available, the total count for isotype (IgG) controls. + Cells with few detected features or high isotype counts are filtered out; + this is combined with the RNA-based filters to ensure that cells are only retained if they are informative in both modalities. + We again use an outlier-based approach to define the filter threshold for each metric. +

+

+ Number of MADs: + Number of median absolute deviations (MADs) from the median, + used to define a filter threshold in the appropriate direction for each QC metric. + Increasing this value will reduce the stringency of the filtering. +

+

+ Isotype prefix: + Prefix to use to identify features in the dataset that are isotype controls. + This is not case-sensitive. +

+
+ } + {showStepHelper === 12 && + +

+ Log-normalize the ADT count data. + This involves some more work than the RNA counterpart as the composition biases can be much stronger in ADT data. + We use a simple approach where we cluster cells based on their ADT counts, + normalize for composition biases between clusters using an median-based method, + normalize for library size differences between cells within clusters, + and then combine both to obtain per-cell factors. +

+

+ Number of clusters: + Number of clusters to use in the initial k-means clustering. + This clustering will not be used in any downstream steps; it is only used here to separate major subpopulations with strong DE. + Any rough clustering is fine and it should not be necessary to spend much time fine-tuning this parameter. + Overclustering is acceptable - and possibly even desirable - provided that each cluster still contains enough cells for stable median calculations. +

+

+ Number of PCs: + Number of principal components to use for the clustering. + We perform a PCA to compress the data for faster clustering - this has no bearing on later choices of the number of PCs. + Again, as long as a reasonable clustering is obtained, it should not be necessary to spend much time fine-tuning this parameter. + In fact, if the requested number of PCs is greater than the number of ADTs, this parameter will have no effect. +

+
+ } + {showStepHelper === 13 && + +

+ Perform a principal components analysis (PCA) on the log-normalized ADT matrix. + As for RNA, the PCA is used for compression and denoising prior to downstream steps like clustering and visualization. + However, unlike RNA, no feature selection is performed here as there are relatively few ADTs in the first place. +

+

+ Number of PCs: + Number of principal components with the highest variance to retain in downstream analyses. + Larger values will capture more biological signal at the cost of increasing noise and computational work. + If more PCs are requested than ADTs are available, the latter is used instead. +

+
+ } + {showStepHelper === 14 && + +

+ Combine PC embeddings from multiple modalities. + This yields a single matrix that can be used in downstream analyses like clustering, + allowing us to incorporate information from multiple modalities. + By default, each modality is given equal weight in the combined matrix. +

+

+ Modality weights: + Weight for each modality. + A larger value indicates that the corresponding modality will contribute more to the population heterogeneity in the combined embedding. + A value of zero indicates that the corresponding modality should be ignored in downstream analysis. +

+
+ } ) } @@ -518,10 +646,10 @@ const AnalysisDialog = ({ return (
-
2 +
setShowStepHelper(2)}> - Quality control + Quality control (RNA)
@@ -571,10 +699,10 @@ const AnalysisDialog = ({ return (
-
3 +
setShowStepHelper(3)}> - Feature Selection + Feature selection (RNA)
@@ -602,10 +730,10 @@ const AnalysisDialog = ({ return (
-
4 +
setShowStepHelper(4)}> - Principal components analysis + Principal components analysis (RNA)
@@ -631,25 +759,6 @@ const AnalysisDialog = ({ placeholder="25" value={tmpInputParams["pca"]["pca-npc"]} onValueChange={(nval, val) => { setTmpInputParams({ ...tmpInputParams, "pca": { ...tmpInputParams["pca"], "pca-npc": nval } }) }} /> - { - (tmpInputFiles.length > 1 || (tmpInputFiles.length == 1 && tmpInputFiles[0]?.batch && tmpInputFiles[0]?.batch.toLowerCase() != "none") - || (loadParams && loadParamsFor === loadImportFormat)) && - }
@@ -660,7 +769,7 @@ const AnalysisDialog = ({ return (
-
5 +
setShowStepHelper(5)}> Clustering @@ -707,17 +816,6 @@ const AnalysisDialog = ({ placeholder="10" value={tmpInputParams["cluster"]["clus-k"]} onValueChange={(nval, val) => { setTmpInputParams({ ...tmpInputParams, "cluster": { ...tmpInputParams["cluster"], "clus-k": nval } }) }} /> -