references.bib

@inproceedings{affeldt2020EnsembleBlockCoclusteringb,
  title      = {Ensemble Block Co-Clustering: A Unified Framework for Text Data},
  shorttitle = {Ensemble {{Block Co-clustering}}},
  booktitle  = {Proceedings of the 29th {{ACM International Conference}} on {{Information}} \& {{Knowledge Management}}},
  author     = {Affeldt, Séverine and Labiod, Lazhar and Nadif, Mohamed},
  date       = {2020-10-19},
  pages      = {5--14},
  publisher  = {ACM},
  location   = {Virtual Event Ireland},
  doi        = {10.1145/3340531.3412058},
  url        = {https://dl.acm.org/doi/10.1145/3340531.3412058},
  urldate    = {2023-12-11},
  eventtitle = {{{CIKM}} '20: {{The}} 29th {{ACM International Conference}} on {{Information}} and {{Knowledge Management}}},
  isbn       = {978-1-4503-6859-9},
  langid     = {english},
  keywords   = {co-clustering,ensemble method,information retrieval,text mining},
  annotation = {CCF: B},
  file       = {/Volumes/Mac_Ext/Zotero/storage/6FD43Q78/Affeldt et al. - 2020 - Ensemble Block Co-clustering A Unified Framework for Text Data.pdf}
}
@inproceedings{arthur2007KmeansAdvantagesCareful,
  title      = {K-Means++: The Advantages of Careful Seeding},
  shorttitle = {K-Means++},
  booktitle  = {Proceedings of the Eighteenth Annual {{ACM-SIAM}} Symposium on {{Discrete}} Algorithms},
  author     = {Arthur, David and Vassilvitskii, Sergei},
  date       = {2007-01-07},
  series     = {{{SODA}} '07},
  pages      = {1027--1035},
  publisher  = {{Society for Industrial and Applied Mathematics}},
  location   = {USA},
  abstract   = {The k-means method is a widely used clustering technique that seeks to minimize the average squared distance between points in the same cluster. Although it offers no accuracy guarantees, its simplicity and speed are very appealing in practice. By augmenting k-means with a very simple, randomized seeding technique, we obtain an algorithm that is Θ(logk)-competitive with the optimal clustering. Preliminary experiments show that our augmentation improves both the speed and the accuracy of k-means, often quite dramatically.},
  isbn       = {978-0-89871-624-5},
  langid     = {english},
  keywords   = {/unread,⛔ No DOI found,k-means,k-means ++},
  annotation = {CCF: A},
  file       = {/Volumes/Mac_Ext/Zotero/storage/3Q2QGULP/Arthur和Vassilvitskii - 2007 - k-means++ the advantages of careful seeding.pdf;/Volumes/Mac_Ext/Zotero/storage/9RPUG37X/Arthur and Vassilvitskii - k-means++ The Advantages of Careful Seeding.pdf}
}
@article{bertsimas2020InterpretableClusteringOptimization,
  title        = {Interpretable Clustering: {{An}} Optimization Approach},
  shorttitle   = {Interpretable Clustering},
  author       = {Bertsimas, D. and Orfanoudaki, Agni and Wiberg, H.},
  date         = {2020},
  journaltitle = {Machine Learning},
  shortjournal = {Mach. Learn.},
  volume       = {110},
  pages        = {89--138},
  doi          = {10.1007/s10994-020-05896-2},
  url          = {https://consensus.app/papers/clustering-optimization-approach-bertsimas/cde03daad6565269840347520cf6c0d4/},
  urldate      = {2024-05-27},
  abstract     = {State-of-the-art clustering algorithms provide little insight into the rationale for cluster membership, limiting their interpretability. In complex real-world applications, the latter poses a barrier to machine learning adoption when experts are asked to provide detailed explanations of their algorithms’ recommendations. We present a new unsupervised learning method that leverages Mixed Integer Optimization techniques to generate interpretable tree-based clustering models. Utilizing a flexible optimization-driven framework, our algorithm approximates the globally optimal solution leading to high quality partitions of the feature space. We propose a novel method which can optimize for various clustering internal validation metrics and naturally determines the optimal number of clusters. It successfully addresses the challenge of mixed numerical and categorical data and achieves comparable or superior performance to other clustering methods on both synthetic and real-world datasets while offering significantly higher interpretability.},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {36 citations (Crossref/DOI) [2024-05-27]\\
                  影响因子: 7.5\\
                  CCF: B},
  file         = {/Volumes/Mac_Ext/Zotero/storage/C2DHT4PI/Bertsimas et al. - 2020 - Interpretable clustering an optimization approach.pdf;/Volumes/Mac_Ext/Zotero/storage/F6Y5LQBB/cde03daad6565269840347520cf6c0d4.html}
}
@incollection{bouchareb2019ModelBasedCoclustering,
  title     = {Model Based Co-Clustering of Mixed Numerical and Binary Data},
  booktitle = {Advances in {{Knowledge Discovery}} and {{Management}}},
  author    = {Bouchareb, Aichetou and Boullé, Marc and Clérot, Fabrice and Rossi, Fabrice},
  date      = {2019},
  pages     = {3--22},
  publisher = {{Springer, Cham}},
  issn      = {1860-9503},
  doi       = {10.1007/978-3-030-18129-1_1},
  url       = {https://link.springer.com/chapter/10.1007/978-3-030-18129-1_1},
  urldate   = {2024-02-09},
  abstract  = {Co-clustering is a data mining technique used to extract the underlying block structureBouchareb, Aichetou\&\#160;between the rows and columns of a data matrix. Many approaches have been studied and have shown their capacity to extractBoull\&\#233;, Marc such structures...},
  isbn      = {978-3-030-18129-1},
  langid    = {english},
  file      = {D:\zihan\Zotero\storage\B6BTC8D7\Bouchareb 等 - 2019 - Model based co-clustering of mixed numerical and binary data.pdf}
}
@article{busygin2008BiclusteringDataMining,
  title        = {Biclustering in Data Mining},
  author       = {Busygin, Stanislav and Prokopyev, Oleg and Pardalos, Panos M.},
  date         = {2008-09},
  journaltitle = {Computers \& Operations Research},
  shortjournal = {Comput. Oper. Res.},
  series       = {Part {{Special Issue}}: {{Bio-inspired Methods}} in {{Combinatorial Optimization}}},
  volume       = {35},
  number       = {9},
  pages        = {2964--2987},
  issn         = {03050548},
  doi          = {10.1016/j.cor.2007.01.005},
  url          = {https://linkinghub.elsevier.com/retrieve/pii/S0305054807000159},
  urldate      = {2022-10-17},
  abstract     = {Biclustering consists in simultaneous partitioning of the set of samples and the set of their attributes (features) into subsets (classes). Samples and features classified together are supposed to have a high relevance to each other. In this paper we review the most widely used and successful biclustering techniques and their related applications. This survey is written from a theoretical viewpoint emphasizing mathematical concepts that can be met in existing biclustering techniques.},
  langid       = {english},
  keywords     = {Biclustering,Classification,Clustering,Data mining,Survey,to follow},
  annotation   = {171 citations (Crossref) [2022-10-21] JCR分区: Q1 中科院分区升级版: 工程技术2区 影响因子: 5.16 5年影响因子: 5.211 EI: 是 AJG: 3 FMS: B JCI: 0.99 南农核心: 无 南农高质量: 无},
  file         = {D\:\\zihan\\Zotero\\storage\\P7W66SUZ\\Biclustering_in_data_mining_Busygin_et_al_2008.pdf;D\:\\zihan\\Zotero\\storage\\MGCVQUZP\\S0305054807000159.html}
}@article{chen2010NonnegativeMatrixFactorization,
  title        = {Non-Negative Matrix Factorization for Semisupervised Heterogeneous Data Coclustering},
  author       = {Chen, Yanhua and Wang, Lijun and Dong, Ming},
  date         = {2010-10},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {22},
  number       = {10},
  pages        = {1459--1474},
  issn         = {1041-4347},
  doi          = {10.1109/TKDE.2009.169},
  url          = {http://ieeexplore.ieee.org/document/5184838/},
  urldate      = {2023-12-12},
  langid       = {english},
  keywords     = {/unread,Bioinformatics,text mining},
  annotation   = {影响因子: 8.9\\
                  CCF: A\\
                  5年影响因子: 8.9\\
                  JCR分区: Q1},
  file         = {/Volumes/Mac_Ext/Zotero/storage/YS5MWQ4F/Chen et al. - 2010 - Non-negative matrix factorization for semisupervised heterogeneous data coclustering.pdf}
}
@article{Chen2021Multimodal,
  title        = {Multimodal Clustering Networks for Self-Supervised Learning from Unlabeled Videos},
  author       = {Chen, Brian and Rouditchenko, Andrew and Duarte, Kevin and Kuehne, Hilde and Thomas, Samuel and Boggust, Angie and Panda, R. and Kingsbury, Brian and Feris, R. and Harwath, David F. and Glass, James R. and Picheny, M. and Chang, Shih-Fu},
  date         = {2021},
  journaltitle = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
  shortjournal = {2021 IEEE/CVF International Conference on Computer Vision (ICCV)},
  pages        = {7992--8001},
  doi          = {10.1109/ICCV48922.2021.00791},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {30 citations (Crossref/DOI) [2024-05-27]\\
                  CCF: A}
}

@article{chen2023FastFlexibleBipartite,
  title        = {Fast Flexible Bipartite Graph Model for Co-Clustering},
  author       = {Chen, Wei and Wang, Hongjun and Long, Zhiguo and Li, Tianrui},
  date         = {2023-07},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {35},
  number       = {7},
  pages        = {6930--6940},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2022.3194275},
  url          = {https://ieeexplore.ieee.org/abstract/document/9842309?casa_token=tBB7EWbNjl4AAAAA:z8a3lmH148KS9o-mrsGLjkehMkg21P3uHIyXV8rFwQ4JnorgURG1W7LUMPuNEr6dyxOZ_eG8RTg},
  urldate      = {2024-02-09},
  abstract     = {Co-clustering methods make use of the correlation between samples and attributes to explore the co-occurrence structure in data. These methods have played a significant role in gene expression analysis, image segmentation, and document clustering. In bipartite graph partition-based co-clustering methods, the relationship between samples and attributes is described by constructing a diagonal symmetric bipartite graph matrix, which is clustered by the philosophy of spectral clustering. However, this not only has high time complexity but also the same number of row and column clusters. In fact, the number of categories of rows and columns often changes in the real world. To address these problems, this paper proposes a novel fast flexible bipartite graph model for the co-clustering method (FBGPC) that directly uses the original matrix to construct the bipartite graph. Then, it uses the inflation operation to partition the bipartite graph in order to learn the co-occurrence structure of the original data matrix based on the inherent relationship between bipartite graph partitioning and co-clustering. Finally, hierarchical clustering is used to obtain the clustering results according to the set relationship of the co-occurrence structure. Extensive empirical results show the effectiveness of our proposed model and verify the faster performance, generality, and flexibility of our model.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english},
  keywords     = {Bipartite graph,bipartite graph partition,Clustering algorithms,Clustering methods,Co-clustering,Computational modeling,Data models,faster performance,flexibility,Partitioning algorithms,Time complexity},
  file         = {D\:\\zihan\\Zotero\\storage\\IQIEQB6I\\Chen 等 - 2023 - Fast flexible bipartite graph model for co-clustering.pdf;D\:\\zihan\\Zotero\\storage\\KED2JCCS\\9842309.html}
}

@article{chen2023FastFlexibleBipartitea,
  title        = {Fast {{Flexible Bipartite Graph Model}} for {{Co-Clustering}}},
  author       = {Chen, Wei and Wang, Hongjun and Long, Zhiguo and Li, Tianrui},
  date         = {2023-07},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  volume       = {35},
  number       = {7},
  pages        = {6930--6940},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2022.3194275},
  url          = {https://ieeexplore.ieee.org/document/9842309},
  urldate      = {2024-03-27},
  abstract     = {Co-clustering methods make use of the correlation between samples and attributes to explore the co-occurrence structure in data. These methods have played a significant role in gene expression analysis, image segmentation, and document clustering. In bipartite graph partition-based co-clustering methods, the relationship between samples and attributes is described by constructing a diagonal symmetric bipartite graph matrix, which is clustered by the philosophy of spectral clustering. However, this not only has high time complexity but also the same number of row and column clusters. In fact, the number of categories of rows and columns often changes in the real world. To address these problems, this paper proposes a novel fast flexible bipartite graph model for the co-clustering method (FBGPC) that directly uses the original matrix to construct the bipartite graph. Then, it uses the inflation operation to partition the bipartite graph in order to learn the co-occurrence structure of the original data matrix based on the inherent relationship between bipartite graph partitioning and co-clustering. Finally, hierarchical clustering is used to obtain the clustering results according to the set relationship of the co-occurrence structure. Extensive empirical results show the effectiveness of our proposed model and verify the faster performance, generality, and flexibility of our model.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  keywords     = {Bipartite graph,bipartite graph partition,Clustering algorithms,Clustering methods,Co-clustering,Computational modeling,Data models,faster performance,flexibility,Partitioning algorithms,Time complexity},
  file         = {/Volumes/Mac_Ext/Zotero/storage/KSAR6TWQ/Chen 等 - 2023 - Fast Flexible Bipartite Graph Model for Co-Clustering.pdf;/Volumes/Mac_Ext/Zotero/storage/SJBTPX4P/9842309.html}
}
@article{chen2023ParallelNonNegativeMatrix,
  title        = {Parallel Non-Negative Matrix Tri-Factorization for Text Data Co-Clustering},
  author       = {Chen, Yufu and Lei, Zhiqi and Rao, Yanghui and Xie, Haoran and Wang, Fu Lee and Yin, Jian and Li, Qing},
  date         = {2023-05},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {35},
  number       = {5},
  pages        = {5132--5146},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2022.3145489},
  abstract     = {As a novel paradigm for data mining and dimensionality reduction, Non-negative Matrix Tri-Factorization (NMTF) has attracted much attention due to its notable performance and elegant mathematical derivation, and it has been applied to a plethora of real-world applications, such as text data co-clustering. However, the existing NMTF-based methods usually involve intensive matrix multiplications, which exhibits a major limitation of high computational complexity. With the explosion at both the size and the feature dimension of texts, there is a growing need to develop a parallel and scalable NMTF-based algorithm for text data co-clustering. To this end, we first show in this paper how to theoretically derive the original optimization problem of NMTF by introducing the Lagrangian multipliers. Then, we propose to solve the Lagrange dual objective function in parallel through an efficient distributed implementation. Extensive experiments on five benchmark corpora validate the effectiveness, efficiency, and scalability of our distributed parallel update algorithm for an NMTF-based text data co-clustering method.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}}
}
@inproceedings{cheng2000BiclusteringExpressionData,
  title      = {Biclustering of {{Expression Data}}},
  author     = {Cheng, Yizong and Church, G.},
  date       = {2000},
  booktitle  = {Proceedings. International Conference on Intelligent Systems for Molecular Biology},
  eventtitle = {International Conference on Intelligent Systems for Molecular Biology},
  url        = {https://www.cs.princeton.edu/courses/archive/fall03/cs597F/Articles/biclustering_of_expression_data.pdf},
  urldate    = {2023-04-18},
  abstract   = {An efficient node-deletion algorithm is introduced to find submatrices in expression data that have low mean squared residue scores and it is shown to perform well in finding co-regulation patterns in yeast and human. An efficient node-deletion algorithm is introduced to find submatrices in expression data that have low mean squared residue scores and it is shown to perform well in finding co-regulation patterns in yeast and human. This introduces \&quot;biclustering\&quot;, or simultaneous clustering of both genes and conditions, to knowledge discovery from expression data. This approach overcomes some problems associated with traditional clustering methods, by allowing automatic discovery of similarity based on a subset of attributes, simultaneous clustering of genes and conditions, and overlapped grouping that provides a better representation for genes with multiple functions or regulated by many factors.},
  langid     = {english}
}

@article{cheng2015CoClusterDDistributedFramework,
  title        = {Co-{{ClusterD}}: {{A}} Distributed Framework for Data Co-Clustering with Sequential Updates},
  shorttitle   = {Co-{{ClusterD}}},
  author       = {Cheng, Xiang and Su, Sen and Gao, Lixin and Yin, Jiangtao},
  date         = {2015-12},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  volume       = {27},
  number       = {12},
  pages        = {3231--3244},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2015.2451634},
  url          = {https://ieeexplore.ieee.org/abstract/document/7145441},
  urldate      = {2024-03-27},
  abstract     = {Co-clustering has emerged to be a powerful data mining tool for two-dimensional co-occurrence and dyadic data. However, co-clustering algorithms often require significant computational resources and have been dismissed as impractical for large data sets. Existing studies have provided strong empirical evidence that expectation-maximization (EM) algorithms (e.g., k-means algorithm) with sequential updates can significantly reduce the computational cost without degrading the resulting solution. Motivated by this observation, we introduce sequential updates for alternate minimization co-clustering (AMCC) algorithms which are variants of EM algorithms, and also show that AMCC algorithms with sequential updates converge. We then propose two approaches to parallelize AMCC algorithms with sequential updates in a distributed environment. Both approaches are proved to maintain the convergence properties of AMCC algorithms. Based on these two approaches, we present a new distributed framework, Co-ClusterD, which supports efficient implementations of AMCC algorithms with sequential updates. We design and implement Co-ClusterD, and show its efficiency through two AMCC algorithms: fast nonnegative matrix tri-factorization (FNMTF) and information theoretic co-clustering (ITCC). We evaluate our framework on both a local cluster of machines and the Amazon EC2 cloud. Empirical results show that AMCC algorithms implemented in Co-ClusterD can achieve a much faster convergence and often obtain better results than their traditional concurrent counterparts.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  keywords     = {Algorithm design and analysis,Approximation algorithms,cloud computing,Cloud Computing,Clustering algorithms,Co-Clustering,concurrent updates,Concurrent Updates,Convergence,distributed framework,Distributed Framework,Linear programming,Minimization,Prototypes,sequential updates,Sequential Updates},
  file         = {/Volumes/Mac_Ext/Zotero/storage/INTX2GR8/Cheng 等 - 2015 - Co-ClusterD A distributed framework for data co-clustering with sequential updates.pdf;/Volumes/Mac_Ext/Zotero/storage/WXFGE5JN/7145441.html}
}
@article{chi2020ProvableConvexCoclustering,
  title        = {Provable {{Convex Co-Clustering}} of {{Tensors}}},
  author       = {Chi, Eric C and Gaines, Brian R and Sun, Will Wei and Zhou, Hua and Yang, Jian},
  date         = {2020},
  journaltitle = {The Journal of Machine Learning Research},
  shortjournal = {J. Mach. Learn. Res.},
  volume       = {21},
  number       = {1},
  pages        = {8792--8849},
  abstract     = {Cluster analysis is a fundamental tool for pattern discovery of complex heterogeneous data. Prevalent clustering methods mainly focus on vector or matrix-variate data and are not applicable to general-order tensors, which arise frequently in modern scientific and business applications. Moreover, there is a gap between statistical guarantees and computational efficiency for existing tensor clustering solutions due to the nature of their non-convex formulations. In this work, we bridge this gap by developing a provable convex formulation of tensor co-clustering. Our convex co-clustering (CoCo) estimator enjoys stability guarantees and its computational and storage costs are polynomial in the size of the data. We further establish a non-asymptotic error bound for the CoCo estimator, which reveals a surprising ``blessing of dimensionality'' phenomenon that does not exist in vector or matrix-variate cluster analysis. Our theoretical findings are supported by extensive simulated studies. Finally, we apply the CoCo estimator to the cluster analysis of advertisement click tensor data from a major online company. Our clustering results provide meaningful business insights to improve advertising effectiveness.},
  langid       = {english}
}
@book{coello2007EvolutionaryAlgorithmsSolving,
  title     = {Evolutionary {{Algorithms}} for {{Solving Multi-Objective Problems}} ({{Genetic}} and {{Evolutionary Computation}})},
  author    = {Coello, Carlos A. Coello and Lamont, Gary B. and Veldhuizen, David A. Van},
  date      = {2007-09-07},
  publisher = {Springer},
  isbn      = {978-0-387-33254-3},
  langid    = {english},
  pagetotal = {800},
  keywords  = {/unread},
  file      = {/Volumes/Mac_Ext/Zotero/storage/NXDQQ8YL/978-0-387-36797-2.pdf}
}
@inproceedings{daruru2009PervasiveParallelismData,
  title      = {Pervasive Parallelism in Data Mining: Dataflow Solution to Co-Clustering Large and Sparse {{Netflix}} Data},
  shorttitle = {Pervasive Parallelism in Data Mining},
  booktitle  = {Proceedings of the 15th {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
  author     = {Daruru, Srivatsava and Marin, Nena M. and Walker, Matt and Ghosh, Joydeep},
  date       = {2009-06-28},
  series     = {{{KDD}} '09},
  pages      = {1115--1124},
  publisher  = {{Association for Computing Machinery}},
  location   = {{New York, NY, USA}},
  doi        = {10.1145/1557019.1557140},
  abstract   = {All Netflix Prize algorithms proposed so far are prohibitively costly for large-scale production systems. In this paper, we describe an efficient dataflow implementation of a collaborative filtering (CF) solution to the Netflix Prize problem [1] based on weighted coclustering [5]. The dataflow library we use facilitates the development of sophisticated parallel programs designed to fully utilize commodity multicore hardware, while hiding traditional difficulties such as queuing, threading, memory management, and deadlocks.迄今为止提出的所有 Netflix Prize 算法对于大规模生产系统来说成本都高得令人望而却步。在本文中，我们描述了基于加权共聚 [5] 的 Netflix Prize 问题 [1] 协同过滤 (CF) 解决方案的高效数据流实现。我们使用的数据流库有助于开发旨在充分利用商用多核硬件的复杂并行程序，同时隐藏排队、线程、内存管理和死锁等传统困难。 The dataflow CF implementation first compresses the large, sparse training dataset into co-clusters. Then it generates recommendations by combining the average ratings of the co-clusters with the biases of the users and movies. When configured to identify 20x20 co-clusters in the Netflix training dataset, the implementation predicted over 100 million ratings in 16.31 minutes and achieved an RMSE of 0.88846 without any fine-tuning or domain knowledge. This is an effective real-time prediction runtime of 9.7 us per rating which is far superior to previously reported results. Moreover, the implemented co-clustering framework supports a wide variety of other large-scale data mining applications and forms the basis for predictive modeling on large, dyadic datasets [4, 7].数据流 CF 实现首先将大型稀疏训练数据集压缩为联合集群。然后，它通过将共同集群的平均评分与用户和电影的偏见相结合来生成推荐。当配置为识别 Netflix 训练数据集中的 20x20 联合集群时，该实施在 16.31 分钟内预测了超过 1 亿个评级，并且在没有任何微调或领域知识的情况下实现了 0.88846 的 RMSE。这是每次评级 9.7 us 的有效实时预测运行时间，远优于之前报告的结果。此外，已实施的联合聚类框架支持各种其他大规模数据挖掘应用程序，并构成了对大型二元数据集进行预测建模的基础 [4、7]。},
  eventtitle = {{{ACM Knowledge Discovery}} and {{Data Mining}}},
  isbn       = {978-1-60558-495-9},
  langid     = {english}
}
@article{dempster1977MaximumLikelihoodIncomplete,
  title        = {Maximum Likelihood from Incomplete Data via the {{EM}} Algorithm},
  author       = {Dempster, A. P. and Laird, N. M. and Rubin, D. B.},
  date         = {1977},
  journaltitle = {Journal of The Royal Statistical Society Series B-methodological},
  shortjournal = {J. Roy. Stat. Soc. B. Met.},
  volume       = {39},
  number       = {1},
  pages        = {1--22},
  issn         = {2517-6161},
  doi          = {10.1111/j.2517-6161.1977.tb01600.x},
  url          = {https://onlinelibrary.wiley.com/doi/abs/10.1111/j.2517-6161.1977.tb01600.x},
  urldate      = {2024-02-09},
  abstract     = {A broadly applicable algorithm for computing maximum likelihood estimates from incomplete data is presented at various levels of generality. Theory showing the monotone behaviour of the likelihood and convergence of the algorithm is derived. Many examples are sketched, including missing value situations, applications to grouped, censored or truncated data, finite mixture models, variance component estimation, hyperparameter estimation, iteratively reweighted least squares and factor analysis.},
  langid       = {english},
  keywords     = {em algorithm,incomplete data,maximum likelihood,posterior mode},
  file         = {D:\zihan\Zotero\storage\XRZKRJTD\j.2517-6161.1977.tb01600.html}
}
@inproceedings{dhillon2001CoclusteringDocumentsWords,
  title      = {Co-Clustering Documents and Words Using Bipartite Spectral Graph Partitioning},
  booktitle  = {Proceedings of the Seventh {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
  author     = {Dhillon, Inderjit S.},
  date       = {2001-08-26},
  pages      = {269--274},
  publisher  = {{ACM}},
  location   = {{San Francisco California}},
  doi        = {10.1145/502512.502550},
  url        = {https://dl.acm.org/doi/10.1145/502512.502550},
  urldate    = {2024-02-08},
  abstract   = {Bothdoumentlusteringandwordlusteringarewellstudiedproblems.Mostexistingalgorithmslusterdouments andwordsseparatelybutnotsimultaneously.Inthispaper wepresentthenovelideaofmodelingthedoumentolletionasabipartitegraphbetweendoumentsandwords,usingwhihthesimultaneouslusteringproblemanbeposed asabipartitegraphpartitioningproblem.Tosolvethepartitioningproblem,weuseanewspetralo-lusteringalgorithmthatusestheseondleftandrightsingularvetorsof anappropriatelysaledword-doumentmatrixtoyieldgood bipartitionings.Thespetralalgorithmenjoyssomeoptimalityproperties;itanbeshownthatthesingularvetors solvearealrelaxationtotheNP-ompletegraphbipartitioningproblem.Wepresentexperimentalresultstoverifythat theresultingo-lusteringalgorithmworkswellinpratie.},
  eventtitle = {{{KDD01}}: {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} and {{Data Mining}}},
  isbn       = {978-1-58113-391-2},
  langid     = {english},
  keywords   = {/unread,good cocluster},
  file       = {D\:\\zihan\\Zotero\\storage\\VIX9F4QT\\Dhillon - 2001 - Co-clustering documents and words using bipartite .pdf;D\:\\zihan\\Zotero\\storage\\Z6QS73SY\\Dhillon - 2001 - Co-clustering documents and words using bipartite spectral graph partitioning.pdf}
}
@inproceedings{dhillon2003InformationtheoreticCoclustering,
  title      = {Information-Theoretic Co-Clustering},
  booktitle  = {Proceedings of the Ninth {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining  - {{KDD}} '03},
  author     = {Dhillon, Inderjit S. and Mallela, Subramanyam and Modha, Dharmendra S.},
  date       = {2003-08-24},
  series     = {{{KDD}} '03},
  pages      = {89--98},
  publisher  = {{Association for Computing Machinery}},
  location   = {{New York, NY, USA}},
  doi        = {10.1145/956750.956764},
  abstract   = {Two-dimensional contingency or co-occurrence tables arise frequently in important applications such as text, web-log and market-basket data analysis. A basic problem in contingency table analysis is co-clustering: simultaneous clustering of the rows and columns. A novel theoretical formulation views the contingency table as an empirical joint probability distribution of two discrete random variables and poses the co-clustering problem as an optimization problem in information theory---the optimal co-clustering maximizes the mutual information between the clustered random variables subject to constraints on the number of row and column clusters. We present an innovative co-clustering algorithm that monotonically increases the preserved mutual information by intertwining both the row and column clusterings at all stages. Using the practical example of simultaneous word-document clustering, we demonstrate that our algorithm works well in practice, especially in the presence of sparsity and high-dimensionality.},
  eventtitle = {{{ACM Knowledge Discovery}} and {{Data Mining}}},
  isbn       = {978-1-58113-737-8},
  langid     = {english}
}
@article{dhillon2007WeightedGraphCuts,
  title        = {Weighted Graph Cuts without Eigenvectors a Multilevel Approach},
  author       = {Dhillon, Inderjit S. and Guan, Yuqiang and Kulis, Brian},
  date         = {2007-11},
  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  shortjournal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  volume       = {29},
  number       = {11},
  pages        = {1944--1957},
  issn         = {1939-3539},
  doi          = {10.1109/TPAMI.2007.1115},
  url          = {https://ieeexplore.ieee.org/abstract/document/4302760?casa_token=f1EZcr5W854AAAAA:q6s3QPMFTkKJE7IPzQcQ5PiiZjN-KZz44D4rQRzwGsVEOZ0IsF8Io-l4lUj0x2_NjUtTIBn9qGQ},
  urldate      = {2024-02-08},
  abstract     = {A variety of clustering algorithms have recently been proposed to handle data that is not linearly separable; spectral clustering and kernel k-means are two of the main methods. In this paper, we discuss an equivalence between the objective functions used in these seemingly different methods - in particular, a general weighted kernel k-means objective is mathematically equivalent to a weighted graph clustering objective. We exploit this equivalence to develop a fast high-quality multilevel algorithm that directly optimizes various weighted graph clustering objectives, such as the popular ratio cut, normalized cut, and ratio association criteria. This eliminates the need for any eigenvector computation for graph clustering problems, which can be prohibitive for very large graphs. Previous multilevel graph partitioning methods such as Metis have suffered from the restriction of equal-sized clusters; our multilevel algorithm removes this restriction by using kernel k-means to optimize weighted graph cuts. Experimental results show that our multilevel algorithm outperforms a state-of-the-art spectral clustering algorithm in terms of speed, memory usage, and quality. We demonstrate that our algorithm is applicable to large-scale clustering tasks such as image segmentation, social network analysis, and gene network analysis.},
  eventtitle   = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  langid       = {english},
  keywords     = {Algorithm design and analysis,Clustering,Clustering algorithms,Data mining,Data Mining,Graph Partitioning,Image analysis,Image segmentation,k-means,Kernel,Large-scale systems,Optimization methods,Partitioning algorithms,Segmentation,Social network services,Spectral Clustering},
  file         = {D\:\\zihan\\Zotero\\storage\\JGZ4PCF7\\Dhillon 等 - 2007 - Weighted graph cuts without eigenvectors a multilevel approach.pdf;D\:\\zihan\\Zotero\\storage\\96LC4NSW\\4302760.html}
}
@inproceedings{ding2006OrthogonalNonnegativeMatrix,
  title      = {Orthogonal Nonnegative Matrix T-Factorizations for Clustering},
  booktitle  = {Proceedings of the 12th {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
  author     = {Ding, Chris and Li, Tao and Peng, Wei and Park, Haesun},
  date       = {2006-08-20},
  series     = {{{KDD}} '06},
  pages      = {126--135},
  publisher  = {{Association for Computing Machinery}},
  location   = {{New York, NY, USA}},
  doi        = {10.1145/1150402.1150420},
  abstract   = {Currently, most research on nonnegative matrix factorization (NMF)focus on 2-factor \$X=FG\^T\$ factorization. We provide a systematicanalysis of 3-factor \$X=FSG\^T\$ NMF. While it unconstrained 3-factor NMF is equivalent to it unconstrained 2-factor NMF, itconstrained 3-factor NMF brings new features to it constrained 2-factor NMF. We study the orthogonality constraint because it leadsto rigorous clustering interpretation. We provide new rules for updating \$F,S, G\$ and prove the convergenceof these algorithms. Experiments on 5 datasets and a real world casestudy are performed to show the capability of bi-orthogonal 3-factorNMF on simultaneously clustering rows and columns of the input datamatrix. We provide a new approach of evaluating the quality ofclustering on words using class aggregate distribution andmulti-peak distribution. We also provide an overview of various NMF extensions andexamine their relationships.目前，大多数关于非负矩阵分解（NMF）的研究都集中在2-factor \$X=FG\^T\$ factorization上。我们提供了 3 因子 \$X=FSG\^T\$ NMF 的系统分析。虽然无约束 3 因子 NMF 等同于无约束 2 因子 NMF，但有约束 3 因子 NMF 为其有约束 2 因子 NMF 带来了新特征。我们研究正交性约束，因为它会导致严格的聚类解释。我们提供了更新\$F、S、G\$的新规则，并证明了这些算法的收敛性。对 5 个数据集和真实世界的案例研究进行了实验，以显示双正交 3 因子 NMF 在同时聚类输入数据矩阵的行和列上的能力。我们提供了一种使用类聚合分布和多峰分布来评估词聚类质量的新方法。我们还提供了各种 NMF 扩展的概述并检查了它们的关系。},
  eventtitle = {{{ACM Knowledge Discovery}} and {{Data Mining}}},
  isbn       = {978-1-59593-339-3},
  langid     = {english}
}
@inproceedings{dongkuanxu2019DeepCoClustering,
  title        = {Deep co-clustering},
  author       = {Xu, Dongkuan and Cheng, Wei and Zong, Bo and Ni, Jingchao and Song, Dongjin and Yu, Wenchao and Chen, Yuncong and Chen, Haifeng and Zhang, Xiang},
  booktitle    = {Proceedings of the 2019 SIAM International Conference on Data Mining},
  pages        = {414--422},
  year         = {2019},
  organization = {SIAM}
}
@article{eckart1936ApproximationOneMatrix,
  title        = {The approximation of one matrix by another of lower rank},
  author       = {Eckart, Carl and Young, Gale},
  date         = {1936-09},
  journaltitle = {Psychometrika},
  shortjournal = {Psychometrika},
  volume       = {1},
  number       = {3},
  pages        = {211--218},
  issn         = {0033-3123, 1860-0980},
  doi          = {10.1007/BF02288367},
  url          = {https://www.cambridge.org/core/product/identifier/S0033312300051085/type/journal_article},
  urldate      = {2025-02-07},
  abstract     = {The mathematical problem of approximating one matrix by another of lower rank is closely related to the fundamental postulate of factor-theory. When formulated as a least-squares problem, the normal equations cannot be immediately written down, since the elements of the approximate matrix are not independent of one another. The solution of the problem is simplified by first expressing the matrices in a canonic form. It is found that the problem always has a solution which is usually unique. Several conclusions can be drawn from the form of this solution.             A hypothetical interpretation of the canonic components of a score matrix is discussed.},
  langid       = {english}
}

@article{fettal2024BoostingSubspaceCoclustering,
  title        = {Boosting Subspace Co-Clustering via Bilateral Graph Convolution},
  author       = {Fettal, Chakib and Labiod, Lazhar and Nadif, Mohamed},
  date         = {2024-03},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {36},
  number       = {3},
  pages        = {960--971},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2023.3300814},
  url          = {https://ieeexplore.ieee.org/document/10207697},
  urldate      = {2024-04-22},
  abstract     = {Subspace clustering seeks to cluster high-dimensional data lying in a union of low-dimensional subspaces. It has achieved state-of-the-art results in image clustering, but text clustering of document-term matrices, has proved more impervious to advances with this approach, even though text data satisfies the assumptions of subspace clustering. We hypothesize that this is because such matrices are generally sparser and higher-dimensional than images. This, combined with the complexity of subspace clustering, which is generally cubic in the number of inputs, makes its use impractical in the context of text. Here we address these issues with a view to leveraging subspace clustering for networked (or not) text data. We first extend the concept of subspace clustering to co-clustering, which is suitable to deal with document-term matrices because of the interplay engendered between the document and word representations. We then address the sparsity problem through bilateral graph convolution, which promotes the grouping effect that has been credited for the effectiveness of some subspace clustering models. The proposed formulation results in an algorithm that is computationally/spatially efficient. Experiments using real-world datasets demonstrate the superior performance, in terms of document clustering, word clustering, and computational efficiency, of our proposed approach over the baselines and comparable methods.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  keywords     = {attributed graphs,Clustering algorithms,Co-clustering,Complexity theory,Computational modeling,Convolution,Data models,Matrix decomposition,subspace clustering,Task analysis,text mining},
  annotation   = {0 citations (Crossref/DOI) [2024-04-22]\\
                  JCR分区: Q1\\
                  影响因子: 8.9\\
                  CCF: A},
  file         = {/Volumes/Mac_Ext/Zotero/storage/HE3I7AJG/Fettal 等 - 2024 - Boosting subspace co-clustering via bilateral graph convolution.pdf;/Volumes/Mac_Ext/Zotero/storage/IWI2TK8Z/10207697.html}
}
@article{ghimatgar2018ImprovedFeatureSelection,
  title        = {An Improved Feature Selection Algorithm Based on Graph Clustering and Ant Colony Optimization},
  author       = {Ghimatgar, Hojat and Kazemi, K. and Helfroush, M. and Aarabi, A.},
  date         = {2018},
  journaltitle = {Knowl. Based Syst.},
  shortjournal = {Knowl. Based Syst.},
  volume       = {159},
  pages        = {270--285},
  doi          = {10.1016/j.knosys.2018.06.025},
  url          = {https://consensus.app/papers/feature-selection-algorithm-based-graph-clustering-ghimatgar/a401aa844bba5730b7e43f90f6870322/},
  urldate      = {2024-05-27},
  abstract     = {Abstract Dimensionality reduction is an important preprocessing step to improve the performance of machine learning algorithms. Feature selection methods can efficiently speed up the learning process and improve the overall classification accuracy by reducing the computational complexity. Among the feature selection methods, multivariate methods are more effective in removing irrelevant and redundant features. An efficient multivariate feature selection method, optimization method, called ‘graph clustering based ant colony optimization (GCACO)’ has been recently introduced and shown to outperform other well-known feature selection methods. In the GCACO, features are divided into communities (clusters) in the entire feature space represented as a graph by an efficient community detection algorithm. An ACO-based search strategy is then used to select an optimal feature subset from the initial set of features. In this paper, a modified GCACO algorithm called MGCACO is presented to significantly improve the performance of the GCACO. Performance of the MGCACO algorithm was assessed by testing it on several standard benchmark datasets and sleep EEG data. The performance of the MGCACO was compared to those obtained using the original GCACO and other well-known filtering methods available in the literature. The MGCACO achieved superior performance over the GCACO and other univariate and multivariate algorithms with up to 10\%. The MGCACO also exhibited higher efficiency in reducing the number of features all by keeping the classification accuracy maximum.},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {66 citations (Crossref/DOI) [2024-05-27]\\
                  影响因子: 8.8\\
                  CCF: C},
  file         = {/Volumes/Mac_Ext/Zotero/storage/TLVFGQRC/Ghimatgar et al. - 2018 - An improved feature selection algorithm based on graph clustering and ant colony optimization.pdf;/Volumes/Mac_Ext/Zotero/storage/NIIH5F8W/a401aa844bba5730b7e43f90f6870322.html}
}
@article{golchev2015BiclusteringAnalysisGene,
  title        = {Biclustering Analysis of Gene Expression Data Using Multi-Objective Evolutionary Algorithms},
  author       = {Golchev, Maryam and Davarpanah, S. H. and Liew, Alan Wee-Chung},
  date         = {2015},
  journaltitle = {2015 International Conference on Machine Learning and Cybernetics (ICMLC)},
  shortjournal = {2015 International Conference on Machine Learning and Cybernetics (ICMLC)},
  volume       = {2},
  pages        = {505--510},
  doi          = {10.1109/ICMLC.2015.7340608},
  url          = {https://www.semanticscholar.org/paper/eb6ff576bec9920a8e39f5b13c1814c76f804794},
  abstract     = {Clustering is an unsupervised learning technique that groups data into clusters using the entire conditions. However, sometimes, data is similar only under a subset of conditions. Biclustering allows clustering of rows and columns of a dataset simultaneously. It extracts more accurate information from sparse datasets. In recent years, biclustering has found many useful applications in different fields and many biclustering algorithms have been proposed. Using both row and column information of data, biclustering requires the optimization of two conflicting objectives. In this study, a new multi-objective evolutionary biclustering framework using SPEA2 is proposed. A heuristic local search based on the gene and condition deletion and addition is added into SPEA2 and the best bicluster is selected using a new quantitative measure that considers both its coherence and size. The performance of our algorithm is evaluated using simulated and gene expression data and compared with several well-known biclustering methods. The experimental results demonstrate better performance with respect to the size and MSR of detected biclusters and significant enrichment of detected genes.},
  langid       = {english},
  keywords     = {/unread,⛔ No INSPIRE recid found},
  annotation   = {2 citations (Crossref) [2023-04-27] EI: 是 南农核心: 无 南农高质量: 无},
  file         = {D:\zihan\Zotero\storage\4T2KKGML\Golchev et al. - 2015 - Biclustering analysis of gene expression data usin.pdf}
}

@article{hartigan1972DirectClusteringData,
  title        = {Direct Clustering of a Data Matrix},
  author       = {Hartigan, J. A.},
  date         = {1972-03-01},
  journaltitle = {Journal of the American Statistical Association},
  volume       = {67},
  number       = {337},
  pages        = {123--129},
  publisher    = {Taylor \& Francis},
  issn         = {0162-1459},
  doi          = {10.1080/01621459.1972.10481214},
  url          = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1972.10481214},
  urldate      = {2024-03-27},
  abstract     = {Clustering algorithms are now in widespread use for sorting heterogeneous data into homogeneous blocks. If the data consist of a number of variables taking values over a number of cases, these algorithms may be used either to construct clusters of variables (using, say, correlation as a measure of distance between variables) or clusters of cases. This article presents a model, and a technique, for clustering cases and variables simultaneously. The principal advantage in this approach is the direct interpretation of the clusters on the data.},
  file         = {/Volumes/Mac_Ext/Zotero/storage/88UXVDR6/Hartigan - 1972 - Direct clustering of a data matrix.pdf}
}

@inproceedings{he2024CoclusteringFederatedRecommender,
  title      = {Co-Clustering for Federated Recommender System},
  booktitle  = {Proceedings of the {{ACM}} on {{Web Conference}} 2024},
  author     = {He, Xinrui and Liu, Shuo and Keung, Jacky and He, Jingrui},
  date       = {2024-05-13},
  series     = {{{WWW}} '24},
  pages      = {3821--3832},
  publisher  = {Association for Computing Machinery},
  location   = {New York, NY, USA},
  doi        = {10.1145/3589334.3645626},
  url        = {https://dl.acm.org/doi/10.1145/3589334.3645626},
  urldate    = {2024-05-26},
  abstract   = {As data privacy and security attract increasing attention, Federated Recommender System (FRS) offers a solution that strikes a balance between providing high-quality recommendations and preserving user privacy. However, the presence of statistical heterogeneity in FRS, commonly observed due to personalized decision-making patterns, can pose challenges. To address this issue and maximize the benefit of collaborative filtering (CF) in FRS, it is intuitive to consider clustering clients (users) as well as items into different groups and learning group-specific models. Existing methods either resort to client clustering via user representations-risking privacy leakage, or employ classical clustering strategies on item embeddings or gradients, which we found are plagued by the curse of dimensionality. In this paper, we delve into the inefficiencies of the K-Means method in client grouping, attributing failures due to the high dimensionality as well as data sparsity occurring in FRS, and propose CoFedRec, a novel Co-clustering Federated Recommendation mechanism, to address clients heterogeneity and enhance the collaborative filtering within the federated framework. Specifically, the server initially formulates an item membership from the client-provided item networks. Subsequently, clients are grouped regarding a specific item category picked from the item membership during each communication round, resulting in an intelligently aggregated group model. Meanwhile, to comprehensively capture the global inter-relationships among items, we incorporate an additional supervised contrastive learning term based on the server-side generated item membership into the local training phase for each client. Extensive experiments on four datasets are provided, which verify the effectiveness of the proposed CoFedRec.},
  isbn       = {9798400701719},
  keywords   = {/unread,co-clustering,federated recommendation,recommand system,supervised contrastive learning},
  annotation = {0 citations (Crossref/DOI) [2024-05-27]\\
                CCF: A},
  file       = {/Volumes/Mac_Ext/Zotero/storage/M47RZX4H/He et al. - 2024 - Co-clustering for federated recommender system.pdf}
}
@article{higham2007SpectralClusteringIts,
  title        = {Spectral Clustering and Its Use in Bioinformatics},
  author       = {Higham, Desmond J. and Kalna, Gabriela and Kibble, Milla},
  date         = {2007-07-01},
  journaltitle = {Journal of Computational and Applied Mathematics},
  shortjournal = {J. Comput. Appl. Math.},
  series       = {Special Issue Dedicated to {{Professor Shinnosuke Oharu}} on the Occasion of His 65th Birthday},
  volume       = {204},
  number       = {1},
  pages        = {25--37},
  issn         = {0377-0427},
  doi          = {10.1016/j.cam.2006.04.026},
  url          = {https://www.sciencedirect.com/science/article/pii/S0377042706002366},
  urldate      = {2024-02-09},
  abstract     = {We formulate a discrete optimization problem that leads to a simple and informative derivation of a widely used class of spectral clustering algorithms. Regarding the algorithms as attempting to bi-partition a weighted graph with N vertices, our derivation indicates that they are inherently tuned to tolerate all partitions into two non-empty sets, independently of the cardinality of the two sets. This approach also helps to explain the difference in behaviour observed between methods based on the unnormalized and normalized graph Laplacian. We also give a direct explanation of why Laplacian eigenvectors beyond the Fiedler vector may contain fine-detail information of relevance to clustering. We show numerical results on synthetic data to support the analysis. Further, we provide examples where normalized and unnormalized spectral clustering is applied to microarray data—here the graph summarizes similarity of gene activity across different tissue samples, and accurate clustering of samples is a key task in bioinformatics.},
  langid       = {english},
  keywords     = {Balancing threshold,Fiedler vector,Gene expression,Graph Laplacian,Maximum likelihood,Microarray,Partitioning,Random graph,Rayleigh–Ritz Theorem,Scaling},
  file         = {D\:\\zihan\\Zotero\\storage\\9CYFGIJN\\Higham 等 - 2007 - Spectral clustering and its use in bioinformatics.pdf;D\:\\zihan\\Zotero\\storage\\6WUE95ZK\\S0377042706002366.html}
}

@book{horn1985MatrixAnalysis,
  title     = {Matrix analysis},
  author    = {Horn, Roger A. and Johnson, Charles R.},
  date      = {1985},
  publisher = {Cambridge University Press},
  location  = {Cambridge},
  doi       = {10.1017/CBO9780511810817},
  url       = {https://www.cambridge.org/core/books/matrix-analysis/9CF2CB491C9E97948B15FAD835EF9A8B},
  urldate   = {2025-01-21},
  abstract  = {In this book the authors present classical and recent results for matrix analysis that have proved to be important to applied mathematics. Facts about matrices, beyond those found in an elementary linear algebra course, are needed to understand virtually any area of mathematics, and the necessary material has only occurred sporadically in the literature and university curricula. As the interest in applied mathematics has grown, the need for a text and a reference work offering a broad selection of topics has become apparent, and this book aims to meet that need. This book will be welcomed as an undergraduate or graduate textbook for students studying matrix analysis. The authors assume a background in elementary linear algebra and knowledge of rudimentary analytical concepts. They begin with a review and discussion of eigenvalues and eigenvectors. The following chapters each treat a major topic in depth. This volume should be useful not only as a text, but also as a self-contained reference work to a variety of audiences in other scientific fields.},
  langid    = {english}
}

@article{huang2020CombiningBiclusteringMining,
  title        = {On Combining Biclustering Mining and {{AdaBoost}} for Breast Tumor Classification},
  author       = {Huang, Qinghua and Chen, Yongdong and Liu, Longzhong and Tao, Dacheng and Li, Xuelong},
  date         = {2020-04-01},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {32},
  number       = {4},
  pages        = {728--738},
  publisher    = {IEEE},
  issn         = {1041-4347, 1558-2191, 2326-3865},
  doi          = {10.1109/TKDE.2019.2891622},
  url          = {https://ieeexplore.ieee.org/document/8611096/},
  langid       = {english},
  keywords     = {/unread,⛔ No INSPIRE recid found,Bioinformatics},
  annotation   = {JCR分区: Q1\\
                  中科院分区升级版: 计算机科学2区\\
                  影响因子: 9.24\\
                  5年影响因子: 8.032\\
                  EI: 是\\
                  CCF: A\\
                  FMS: B\\
                  JCI: 1.80\\
                  南农核心: 无\\
                  南农高质量: 无},
  file         = {/Volumes/Mac_Ext/Zotero/storage/K4H3GSXD/Huang et al_2020_On Combining Biclustering Mining and AdaBoost for Breast Tumor Classification.pdf;/Volumes/Mac_Ext/Zotero/storage/W727QLUX/8611096.html}
}
@article{huang2024EnergyAwareIntegratedNeural,
  title    = {Energy-Aware Integrated Neural Architecture Search and Partitioning for Distributed Internet of Things ({{IoT}})},
  author   = {Huang, Baichuan and Abtahi, Azra and Aminifar, Amir},
  year     = {2024},
  month    = dec,
  journal  = {IEEE Transactions on Circuits and Systems for Artificial Intelligence},
  volume   = {1},
  number   = {2},
  pages    = {257--271},
  issn     = {2996-6647},
  doi      = {10.1109/TCASAI.2024.3493036},
  urldate  = {2025-02-02},
  abstract = {Internet of Things (IoT) are one of the key enablers of personalized health. However, IoT devices often have stringent constraints in terms of resources, e.g., energy budget, and, therefore, limited possibilities to exploit the state-of-the-art Deep Neural Networks (DNNs). Energy-aware Neural Architecture Search (NAS) is proposed to tackle this challenge, by exploring lightweight DNN (DNN) architectures on a single IoT device, but not leveraging the inherently distributed nature of IoT systems. As a result, the joint optimization of DNN architectures and DNN computation partitioning/offloading has not been addressed to date. In this paper, we propose an energy-aware NAS framework for distributed IoT, aiming to search for distributed Deep Neural Networks (DNNs) to maximize prediction performance subjected to Flash Memory (Flash), Random Access Memory (RAM), and energy constraints. Our framework searches for lightweight DNN architecture with optimized prediction performance and its corresponding optimal computation partitioning to offload the partial DNN from edge to fog in a joint optimization. We evaluate our framework in the context of two common health applications, namely, seizure detection and arrhythmia classification, and demonstrate the effectiveness of our proposed joint optimization framework compared to NAS benchmarks.},
  langid   = {american},
  keywords = {/unread,and energy optimization,Arrhythmia,Artificial intelligence,Artificial neural networks,battery-powered Internet of Things (IoT),computation offloading,distributed computing,Distributed computing,Energy consumption,Energy-aware neural architecture search (NAS),Internet of Things,Low power electronics,low-power IoT,low-power wearables,mobile edge computing,Mobile handsets,Multi-access edge computing,Wearable devices}
}

@inproceedings{jia2021ScalingVisualVisionlanguage,
  title      = {Scaling up Visual and Vision-Language Representation Learning with Noisy Text Supervision},
  booktitle  = {Proceedings of the 38th {{International Conference}} on {{Machine Learning}}},
  author     = {Jia, Chao and Yang, Yinfei and Xia, Ye and Chen, Yi-Ting and Parekh, Zarana and Pham, Hieu and Le, Quoc and Sung, Yun-Hsuan and Li, Zhen and Duerig, Tom},
  date       = {2021-07-01},
  pages      = {4904--4916},
  publisher  = {PMLR},
  issn       = {2640-3498},
  url        = {https://proceedings.mlr.press/v139/jia21b.html},
  urldate    = {2024-05-26},
  abstract   = {Pre-trained representations are becoming crucial for many NLP and perception tasks. While representation learning in NLP has transitioned to training on raw text without human annotations, visual and vision-language representations still rely heavily on curated training datasets that are expensive or require expert knowledge. For vision applications, representations are mostly learned using datasets with explicit class labels such as ImageNet or OpenImages. For vision-language, popular datasets like Conceptual Captions, MSCOCO, or CLIP all involve a non-trivial data collection (and cleaning) process. This costly curation process limits the size of datasets and hence hinders the scaling of trained models. In this paper, we leverage a noisy dataset of over one billion image alt-text pairs, obtained without expensive filtering or post-processing steps in the Conceptual Captions dataset. A simple dual-encoder architecture learns to align visual and language representations of the image and text pairs using a contrastive loss. We show that the scale of our corpus can make up for its noise and leads to state-of-the-art representations even with such a simple learning scheme. Our visual representation achieves strong performance when transferred to classification tasks such as ImageNet and VTAB. The aligned visual and language representations enables zero-shot image classification and also set new state-of-the-art results on Flickr30K and MSCOCO image-text retrieval benchmarks, even when compared with more sophisticated cross-attention models. The representations also enable cross-modality search with complex text and text + image queries.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid     = {english},
  keywords   = {/unread},
  file       = {/Volumes/Mac_Ext/Zotero/storage/6W6X95BL/Jia et al. - 2021 - Scaling up visual and vision-language representation learning with noisy text supervision.pdf;/Volumes/Mac_Ext/Zotero/storage/UY2AYTRD/Jia et al. - 2021 - Scaling up visual and vision-language representation learning with noisy text supervision.pdf}
}

@article{junweihan2017BilateralKMeansAlgorithm,
  title    = {Bilateral K-{{Means Algorithm}} for {{Fast Co-Clustering}}},
  author   = {{Junwei Han} and {Kun Song} and {Feiping Nie} and {Xuelong Li}},
  date     = {2017},
  abstract = {With the development of the information technology, the amount of data, e.g. text, image and video, has been increased rapidly. Efficiently clustering those large scale data sets is a challenge. To address this problem, this paper proposes a novel co-clustering method named bilateral k-means algorithm (BKM) for fast co-clustering. Different from traditional k-means algorithms, the proposed method has two indicator matrices P and Q and a diagonal matrix S to be solved, which represent the cluster memberships of samples and features, and the co-cluster centres, respectively. Therefore, it could implement different clustering tasks on the samples and features simultaneously. We also introduce an effective approach to solve the proposed method, which involves less multiplication. The computational complexity is analyzed. Extensive experiments on various types of data sets are conducted. Compared with the state-of-the-art clustering methods, the proposed BKM not only has faster computational speed, but also achieves promising clustering results.}
}
@inproceedings{keshet2016prediction,
  title        = {Prediction-Based, Prioritized Market-Share Insight Extraction},
  author       = {Keshet, Renato and Maor, Alina and Kour, George},
  booktitle    = {Advanced Data Mining and Applications: 12th International Conference, ADMA 2016, Gold Coast, QLD, Australia, December 12-15, 2016, Proceedings 12},
  pages        = {81--94},
  year         = {2016},
  organization = {Springer}
}

@article{khan2020CoClusteringRevealSalient,
  title        = {Co-{{Clustering}} to {{Reveal Salient Facial Features}} for {{Expression Recognition}}},
  author       = {Khan, Sheheryar and Chen, Lijiang and Yan, Hong},
  date         = {2020-04-01},
  journaltitle = {IEEE Transactions on Affective Computing},
  shortjournal = {IEEE Trans. Affective Comput.},
  volume       = {11},
  number       = {2},
  pages        = {348--360},
  issn         = {1949-3045, 2371-9850},
  doi          = {10.1109/TAFFC.2017.2780838},
  abstract     = {Facial expressions are a strong visual intimation of gestural behaviors. The intelligent ability to learn these non-verbal cues of the humans is the key characteristic to develop efficient human computer interaction systems. Extracting an effective representation from facial expression images is a crucial step that impacts the recognition accuracy. In this paper, we propose a novel feature selection strategy using singular value decomposition (SVD) based co-clustering to search for the most salient regions in terms of facial features that possess a high discriminating ability among all expressions. To the best of our knowledge, this is the first known attempt to explicitly perform co-clustering in the facial expression recognition domain. In our method, Gabor filters are used to extract local features from an image and then discriminant features are selected based on the class membership in co-clusters. Experiments demonstrate that co-clustering localizes the salient regions of the face image. Not only does the procedure reduce the dimensionality but also improves the recognition accuracy. Experiments on CK plus, JAFFE and MMI databases validate the existence and effectiveness of these learned facial features.},
  eventtitle   = {{{IEEE Transactions}} on {{Affective Computing}}},
  langid       = {english}
}

@article{kim2011FastNonnegativeMatrix,
  title        = {Fast Nonnegative Matrix Factorization: An Active-Set-like Method and Comparisons},
  shorttitle   = {Fast {{Nonnegative Matrix Factorization}}},
  author       = {Kim, Jingu and Park, Haesun},
  date         = {2011-01},
  journaltitle = {SIAM Journal on Scientific Computing},
  shortjournal = {SIAM J. Sci. Comput.},
  volume       = {33},
  number       = {6},
  pages        = {3261--3281},
  issn         = {1064-8275, 1095-7197},
  doi          = {10.1137/110821172},
  url          = {http://epubs.siam.org/doi/10.1137/110821172},
  urldate      = {2023-12-12},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {影响因子: 3.1\\
                  5年影响因子: 3.2},
  file         = {/Volumes/Mac_Ext/Zotero/storage/N6BRGSTM/Kim and Park - 2011 - Fast Nonnegative Matrix Factorization An Active-Set-Like Method and Comparisons.pdf}
}

@article{kim2022ABCAttributedBipartite,
  title        = {{{ABC}}: {{Attributed}} Bipartite Co-Clustering},
  shorttitle   = {{{ABC}}},
  author       = {Kim, Junghoon and Feng, Kaiyu and Cong, Gao and Zhu, Diwen and Yu, Wenyuan and Miao, Chunyan},
  date         = {2022-06-01},
  journaltitle = {Proceedings of the VLDB Endowment},
  shortjournal = {Proceedings of the VLDB Endowment},
  volume       = {15},
  number       = {10},
  pages        = {2134--2147},
  issn         = {2150-8097},
  doi          = {10.14778/3547305.3547318},
  abstract     = {Finding a set of co-clusters in a bipartite network is a fundamental and important problem. In this paper, we present the Attributed Bipartite Co-clustering (ABC) problem which unifies two main concepts: (i) bipartite modularity optimization, and (ii) attribute cohesiveness. To the best of our knowledge, this is the first work to find co-clusters while considering the attribute cohesiveness. We prove that ABC is NP-hard and is not in APX, unless P=NP. We propose three algorithms: (1) a top-down algorithm; (2) a bottom-up algorithm; (3) a group matching algorithm. Extensive experimental results on real-world attributed bipartite networks demonstrate the efficiency and effectiveness of our algorithms.}
}
@article{kluger2003SpectralBiclusteringMicroarray,
  title        = {Spectral {{Biclustering}} of {{Microarray Data}}: {{Coclustering Genes}} and {{Conditions}}},
  shorttitle   = {Spectral {{Biclustering}} of {{Microarray Data}}},
  author       = {Kluger, Yuval and Basri, Ronen and Chang, Joseph T. and Gerstein, Mark},
  date         = {2003-04-01},
  journaltitle = {Genome Research},
  shortjournal = {Genome Res.},
  volume       = {13},
  number       = {4},
  eprint       = {12671006},
  eprinttype   = {pmid},
  pages        = {703--716},
  publisher    = {{Cold Spring Harbor Lab}},
  issn         = {1088-9051, 1549-5469},
  doi          = {10.1101/gr.648603},
  abstract     = {Global analyses of RNA expression levels are useful for classifying genes and overall phenotypes. Often these classification problems are linked, and one wants to find ``marker genes'' that are differentially expressed in particular sets of ``conditions.'' We have developed a method that simultaneously clusters genes and conditions, finding distinctive ``checkerboard'' patterns in matrices of gene expression data, if they exist. In a cancer context, these checkerboards correspond to genes that are markedly up- or downregulated in patients with particular types of tumors. Our method, spectral biclustering, is based on the observation that checkerboard structures in matrices of expression data can be found in eigenvectors corresponding to characteristic expression patterns across genes or conditions. In addition, these eigenvectors can be readily identified by commonly used linear algebra approaches, in particular the singular value decomposition (SVD), coupled with closely integrated normalization steps. We present a number of variants of the approach, depending on whether the normalization over genes and conditions is done independently or in a coupled fashion. We then apply spectral biclustering to a selection of publicly available cancer expression data sets, and examine the degree to which the approach is able to identify checkerboard structures. Furthermore, we compare the performance of our biclustering methods against a number of reasonable benchmarks (e.g., direct application of SVD or normalized cuts to raw data).},
  langid       = {english}
}
@inproceedings{kumar2023CoclusteringBasedMethods,
  title     = {Co-Clustering Based Methods and Their Significance for Recommender Systems},
  author    = {Kumar, Naresh and Sheeba, Merlin},
  editor    = {Morusupalli, Raghava and Dandibhotla, Teja Santosh and Atluri, Vani Vathsala and Windridge, David and Lingras, Pawan and Komati, Venkateswara Rao},
  date      = {2023},
  series    = {Lecture {{Notes}} in {{Computer Science}}},
  volume    = {14078},
  pages     = {513--522},
  publisher = {{Springer Nature Switzerland}},
  location  = {{Cham}},
  doi       = {10.1007/978-3-031-36402-0_48},
  url       = {https://link.springer.com/10.1007/978-3-031-36402-0_48},
  abstract  = {In the contemporary era, businesses are driven by Internet based web or mobile applications. In every conceivable area of research, it is indispensable for such applications to have a recommender system to expedite the interactions between customers and business entity for faster convergence. Provided this fact, there is need for leveraging such systems as they have unprecedented impact on businesses across the globe. In this regard, identification of merits and demerits in the existing methods used to realize recommender systems is to be given paramount importance. In this paper, we review literature to ascertain useful facts pertaining to different approaches to make recommender systems. Since recommender systems lubricate the process of commercial or otherwise interactions with consumers, for business entities it is imperative to have applications with built-in recommender system. The literature review made in this paper provides different aspects of recommender systems such as datasets, methods and their utility in the current business scenarios. It throws light into the research gaps that help in further research and improvement based on novel data mining approaches.},
  isbn      = {978-3-031-36402-0},
  langid    = {english},
  keywords  = {/unread,co-clustering 协同聚类,collaborative filtering methods
               协同过滤方法,content based filtering methods
               基于内容的过滤方法,recommendation system},
  file      = {D:\zihan\Zotero\storage\4UE7FRTS\Kumar and Sheeba - 2023 - Co-clustering Based Methods and Their Significance for Recommender Systems.pdf}
}
@inproceedings{lee2014BigDataMatrix,
  title      = {Big Data Matrix Singular Value Decomposition Based on Low-Rank Tensor Train Decomposition},
  booktitle  = {Advances in {{Neural Networks}} – {{ISNN}} 2014},
  author     = {Lee, Namgil and Cichocki, Andrzej},
  editor     = {Zeng, Zhigang and Li, Yangmin and King, Irwin},
  date       = {2014},
  volume     = {8866},
  pages      = {121--130},
  publisher  = {Springer International Publishing},
  location   = {Cham},
  doi        = {10.1007/978-3-319-12436-0_14},
  url        = {https://link.springer.com/10.1007/978-3-319-12436-0_14},
  urldate    = {2024-06-08},
  abstract   = {We propose singular value decomposition (SVD) algorithms for very large-scale matrices based on a low-rank tensor decomposition technique called the tensor train (TT) format. By using the proposed algorithms, we can compute several dominant singular values and corresponding singular vectors of large-scale structured matrices given in a low-rank TT format. We propose a large-scale trace optimization problem, and in the proposed methods, the large-scale optimization problem is reduced to sequential small-scale optimization problems. We show that the computational complexity of the proposed algorithms scales logarithmically with the matrix size if the TT-ranks are bounded. Numerical simulations based on very large-scale Hilbert matrix demonstrate the effectiveness of the proposed methods.},
  eventtitle = {Advances in {{Neural Networks}} – {{ISNN}} 2014},
  isbn       = {978-3-319-12435-3 978-3-319-12436-0},
  langid     = {english},
  keywords   = {/unread},
  annotation = {3 citations (Crossref/DOI) [2024-06-08]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/PWNEQZD4/Lee和Cichocki - 2014 - Big Data Matrix Singular Value Decomposition Based on Low-Rank Tensor Train Decomposition.pdf}
}
@inproceedings{leung2011CLRCollaborativeLocation,
  title      = {{{CLR}}: A Collaborative Location Recommendation Framework Based on Co-Clustering},
  shorttitle = {{{CLR}}},
  booktitle  = {Proceedings of the 34th International {{ACM SIGIR}} Conference on {{Research}} and Development in {{Information Retrieval}}},
  author     = {Leung, Kenneth Wai-Ting and Lee, Dik Lun and Lee, Wang-Chien},
  date       = {2011},
  series     = {{{SIGIR}} '11},
  pages      = {305--314},
  publisher  = {Association for Computing Machinery},
  location   = {New York, NY, USA},
  doi        = {10.1145/2009916.2009960},
  url        = {https://dl.acm.org/doi/10.1145/2009916.2009960},
  urldate    = {2024-05-20},
  abstract   = {GPS data tracked on mobile devices contains rich information about human activities and preferences. In this paper, GPS data is used in location-based services (LBSs) to provide collaborative location recommendations. We observe that most existing LBSs provide location recommendations by clustering the User-Location matrix. Since the User-Location matrix created based on GPS data is huge, there are two major problems with these methods. First, the number of similar locations that need to be considered in computing the recommendations can be numerous. As a result, the identification of truly relevant locations from numerous candidates is challenging. Second, the clustering process on large matrix is time consuming. Thus, when new GPS data arrives, complete re-clustering of the whole matrix is infeasible. To tackle these two problems, we propose the Collaborative Location Recommendation (CLR) framework for location recommendation. By considering activities (i.e., temporal preferences) and different user classes (i.e., Pattern Users, Normal Users, and Travelers) in the recommendation process, CLR is capable of generating more precise and refined recommendations to the users compared to the existing methods. Moreover, CLR employs a dynamic clustering algorithm CADC to cluster the trajectory data into groups of similar users, similar activities and similar locations efficiently by supporting incremental update of the groups when new GPS trajectory data arrives. We evaluate CLR with a real-world GPS dataset, and confirm that the CLR framework provides more accurate location recommendations compared to the existing methods.移动设备上跟踪的 GPS 数据包含有关人类活动和偏好的丰富信息。在本文中，GPS 数据用于基于位置的服务（LBS）以提供协作位置建议。我们观察到大多数现有的 LBS 通过对用户位置矩阵进行聚类来提供位置推荐。由于基于GPS数据创建的用户位置矩阵巨大，这些方法存在两个主要问题。首先，在计算推荐时需要考虑的相似位置的数量可能很多。因此，从众多候选者中识别真正相关的位置具有挑战性。其次，大矩阵上的聚类过程非常耗时。因此，当新的 GPS 数据到达时，对整个矩阵进行完全重新聚类是不可行的。为了解决这两个问题，我们提出了用于位置推荐的协作位置推荐（CLR）框架。通过在推荐过程中考虑活动（即时间偏好）和不同的用户类别（即模式用户、普通用户和旅行者），与现有方法相比，CLR 能够为用户生成更精确、更精细的推荐。此外，CLR 采用动态聚类算法 CADC，通过在新的 GPS 轨迹数据到达时支持增量更新，将轨迹数据有效地聚类为相似用户、相似活动和相似位置的组。我们使用真实世界的 GPS 数据集评估 CLR，并确认与现有方法相比，CLR 框架提供了更准确的位置建议。},
  isbn       = {978-1-4503-0757-4},
  langid     = {english},
  keywords   = {/unread,co-clustering,collaborative filtering,location recommendation},
  annotation = {91 citations (Crossref/DOI) [2024-05-21]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/DRP4WVCU/Leung et al. - 2011 - CLR A collaborative location recommendation framework based on co-clustering.pdf}
}
@article{lewis2004Rcv1NewBenchmark,
  title        = {Rcv1: {{A}} New Benchmark Collection for Text Categorization Research},
  shorttitle   = {Rcv1},
  author       = {Lewis, David D. and Yang, Yiming and Russell-Rose, Tony and Li, Fan},
  date         = {2004},
  journaltitle = {Journal of machine learning research},
  volume       = {5},
  pages        = {361--397},
  publisher    = {Goldsmiths, University of London},
  url          = {https://www.jmlr.org/papers/volume5/lewis04a/lewis04a.pdf},
  urldate      = {2024-06-06},
  issue        = {Apr},
  keywords     = {/unread},
  file         = {/Volumes/Mac_Ext/Zotero/storage/4KAJY5IQ/Lewis 等 - 2004 - Rcv1 A new benchmark collection for text categorization research.pdf}
}@article{li2014ClusteringguidedSparseStructural,
  title        = {Clustering-Guided Sparse Structural Learning for Unsupervised Feature Selection},
  author       = {Li, Zechao and Liu, J. and Yang, Yi and Zhou, Xiaofang and Lu, Hanqing},
  date         = {2014},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {26},
  pages        = {2138--2150},
  doi          = {10.1109/TKDE.2013.65},
  url          = {https://consensus.app/papers/sparse-structural-learning-unsupervised-feature-li/4ddcb83e61ee5f2393e00164fabc2f48/},
  urldate      = {2024-05-27},
  abstract     = {Many pattern analysis and data mining problems have witnessed high-dimensional data represented by a large number of features, which are often redundant and noisy. Feature selection is one main technique for dimensionality reduction that involves identifying a subset of the most useful features. In this paper, a novel unsupervised feature selection algorithm, named clustering-guided sparse structural learning (CGSSL), is proposed by integrating cluster analysis and sparse structural analysis into a joint framework and experimentally evaluated. Nonnegative spectral clustering is developed to learn more accurate cluster labels of the input samples, which guide feature selection simultaneously. Meanwhile, the cluster labels are also predicted by exploiting the hidden structure shared by different features, which can uncover feature correlations to make the results more reliable. Row-wise sparse models are leveraged to make the proposed model suitable for feature selection. To optimize the proposed formulation, we propose an efficient iterative algorithm. Finally, extensive experiments are conducted on 12 diverse benchmarks, including face data, handwritten digit data, document data, and biomedical data. The encouraging experimental results in comparison with several representative algorithms and the theoretical analysis demonstrate the efficiency and effectiveness of the proposed algorithm for feature selection.},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {212 citations (Crossref/DOI) [2024-05-27]\\
                  影响因子: 8.9\\
                  CCF: A},
  file         = {/Volumes/Mac_Ext/Zotero/storage/BSI3J87R/Li et al. - 2014 - Clustering-guided sparse structural learning for unsupervised feature selection.pdf;/Volumes/Mac_Ext/Zotero/storage/WKJPGJGS/4ddcb83e61ee5f2393e00164fabc2f48.html}
}
@article{li2021NovelCollaborativeFiltering,
  title        = {A Novel Collaborative Filtering Recommendation Approach Based on Soft Co-Clustering},
  author       = {Li, Man and Wen, Luosheng and Chen, Feiyu},
  date         = {2021-01-01},
  journaltitle = {Physica A: Statistical Mechanics and its Applications},
  shortjournal = {Physica A},
  volume       = {561},
  pages        = {125140},
  issn         = {0378-4371},
  doi          = {10.1016/j.physa.2020.125140},
  url          = {https://www.sciencedirect.com/science/article/pii/S0378437120305963},
  urldate      = {2024-05-21},
  abstract     = {Collaborative Filtering (CF) recommendation algorithm has been widely applied into recommender systems. Many CF algorithms associate a user/an item with one of subgroups by explicit or implicit features. However, considering that users may have multiple personalities and items may have diverse attributes, it is more reasonable to associate a user/an item with more than one group. In this paper, we propose the Soft K-indicators Alternative Projection (SKAP) algorithm, which can efficiently resolve soft clustering problem with high dimensions, to generate a sparse partition matrix and further a Top-N recommendation list is given. Unlike fuzzy C-means clustering, the SKAP algorithm is independent on the selection of initial values. In addition to that, we integrate the item type information into recommender systems to improve recommendation accuracy. Experimental results show that the proposed approach behaves superior performance in Top-N recommendation in terms of classical metrics and further show that multi-label classification framework is a better description than classical Co-Clustering framework.},
  langid       = {english},
  keywords     = {/unread,Collaborative Filtering,Partition matrix,Recommender systems,Soft Co-Clustering (SCoC),Soft K-indicators Alternative Projection (SKAP) algorithm},
  file         = {/Volumes/Mac_Ext/Zotero/storage/A6RK7M8L/Li et al. - 2021 - A novel collaborative filtering recommendation approach based on soft co-clustering.pdf;/Volumes/Mac_Ext/Zotero/storage/7UX45TIE/S0378437120305963.html}
}
@inproceedings{li2022BLIPBootstrappingLanguageimage,
  title      = {{{BLIP}}: Bootstrapping Language-Image Pre-Training for Unified Vision-Language Understanding and Generation},
  shorttitle = {{{BLIP}}},
  booktitle  = {Proceedings of the 39th {{International Conference}} on {{Machine Learning}}},
  author     = {Li, Junnan and Li, Dongxu and Xiong, Caiming and Hoi, Steven},
  date       = {2022-06-28},
  pages      = {12888--12900},
  publisher  = {PMLR},
  issn       = {2640-3498},
  url        = {https://proceedings.mlr.press/v162/li22n.html},
  urldate    = {2024-05-26},
  abstract   = {Vision-Language Pre-training (VLP) has advanced the performance for many vision-language tasks. However, most existing pre-trained models only excel in either understanding-based tasks or generation-based tasks. Furthermore, performance improvement has been largely achieved by scaling up the dataset with noisy image-text pairs collected from the web, which is a suboptimal source of supervision. In this paper, we propose BLIP, a new VLP framework which transfers flexibly to both vision-language understanding and generation tasks. BLIP effectively utilizes the noisy web data by bootstrapping the captions, where a captioner generates synthetic captions and a filter removes the noisy ones. We achieve state-of-the-art results on a wide range of vision-language tasks, such as image-text retrieval (+2.7\% in average recall@1), image captioning (+2.8\% in CIDEr), and VQA (+1.6\% in VQA score). BLIP also demonstrates strong generalization ability when directly transferred to video-language tasks in a zero-shot manner. Code and models are available at https://github.com/salesforce/BLIP.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid     = {english},
  keywords   = {/unread},
  file       = {/Volumes/Mac_Ext/Zotero/storage/PNURRJ85/Li et al. - 2022 - BLIP bootstrapping language-image pre-training for unified vision-language understanding and genera.pdf}
}

@article{li2023DistributedClusteringCooperative,
  title        = {Distributed Clustering for Cooperative Multi-Task Learning Networks},
  author       = {Li, Jiani and Wang, Weihan and Abbas, Waseem and Koutsoukos, Xenofon},
  date         = {2023},
  journaltitle = {IEEE Transactions on Network Science and Engineering},
  shortjournal = {IEEE Trans. Netw. Sci. Eng.},
  volume       = {10},
  pages        = {1--10},
  issn         = {2327-4697, 2334-329X},
  doi          = {10.1109/TNSE.2023.3276854},
  url          = {https://ieeexplore.ieee.org/document/10125058/},
  urldate      = {2024-05-27},
  abstract     = {Distributed learning enables collaborative training of machine learning models across multiple agents by exchanging model parameters without sharing local data. Each agent generates data from distinct but related distributions, and multi-task learning can be effectively used to model related tasks. This article focuses on clustered multi-task learning, where agents are partitioned into clusters with distinct objectives, and agents in the same cluster share the same objective. The structure of such clusters is unknown apriori. Cooperation with the agents in the same cluster is beneficial and improves the overall learning performance. However, indiscriminate cooperation of agents with different objectives leads to undesired outcomes. Accurately capturing the clustering structure benefits the cooperation and offers many practical benefits; for instance, it helps advertising companies better target their ads. This article proposes an adaptive clustering method that allows distributed agents to learn the most appropriate neighbors to collaborate with and form clusters. We prove the convergence of every agent towards its objective and analyze the network learning performance using the proposed clustering method. Further, we present a method of computing combination weights that approximately optimizes the network's learning performance to determine how one should aggregate the neighbors' model parameters after the clustering step. The theoretical analysis is well-validated by the evaluation results using target localization and digits classification, showing that the proposed clustering method outperforms existing distributed clustering methods as well as the case where agents do not cooperate.},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {0 citations (Crossref/DOI) [2024-05-27]\\
                  影响因子: 6.6},
  file         = {/Volumes/Mac_Ext/Zotero/storage/EDJX4GP3/25729236ece75a478aac6932900bb161.html}
}

@inproceedings{lian2014GeoMFJointGeographical,
  title      = {{{GeoMF}}: {{Joint}} Geographical Modeling and Matrix Factorization for Point-of-Interest Recommendation},
  shorttitle = {{{GeoMF}}},
  booktitle  = {Proceedings of the 20th {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
  author     = {Lian, Defu and Zhao, Cong and Xie, Xing and Sun, Guangzhong and Chen, Enhong and Rui, Yong},
  date       = {2014-08-24},
  series     = {{{KDD}} '14},
  pages      = {831--840},
  publisher  = {Association for Computing Machinery},
  location   = {New York, NY, USA},
  doi        = {10.1145/2623330.2623638},
  url        = {https://dl.acm.org/doi/10.1145/2623330.2623638},
  urldate    = {2024-05-26},
  abstract   = {Point-of-Interest (POI) recommendation has become an important means to help people discover attractive locations. However, extreme sparsity of user-POI matrices creates a severe challenge. To cope with this challenge, viewing mobility records on location-based social networks (LBSNs) as implicit feedback for POI recommendation, we first propose to exploit weighted matrix factorization for this task since it usually serves collaborative filtering with implicit feedback better. Besides, researchers have recently discovered a spatial clustering phenomenon in human mobility behavior on the LBSNs, i.e., individual visiting locations tend to cluster together, and also demonstrated its effectiveness in POI recommendation, thus we incorporate it into the factorization model. Particularly, we augment users' and POIs' latent factors in the factorization model with activity area vectors of users and influence area vectors of POIs, respectively. Based on such an augmented model, we not only capture the spatial clustering phenomenon in terms of two-dimensional kernel density estimation, but we also explain why the introduction of such a phenomenon into matrix factorization helps to deal with the challenge from matrix sparsity. We then evaluate the proposed algorithm on a large-scale LBSN dataset. The results indicate that weighted matrix factorization is superior to other forms of factorization models and that incorporating the spatial clustering phenomenon into matrix factorization improves recommendation performance.},
  isbn       = {978-1-4503-2956-9},
  keywords   = {/unread,kernel density estimation,location recommendation,location-based social network,weighted matrix factorization},
  annotation = {426 citations (Crossref/DOI) [2024-05-26]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/N8JQVDXP/Lian et al. - 2014 - GeoMF Joint geographical modeling and matrix factorization for point-of-interest recommendation.pdf}
}
@article{lin2019OverviewCoClusteringMatrix,
  title        = {An Overview of Co-Clustering via Matrix Factorization},
  author       = {Lin, Renjie and Wang, Shiping and Guo, Wenzhong},
  date         = {2019},
  journaltitle = {IEEE Access},
  shortjournal = {IEEE Access},
  volume       = {7},
  pages        = {33481--33493},
  issn         = {2169-3536},
  doi          = {10.1109/ACCESS.2019.2904314},
  abstract     = {Co-clustering algorithms have been widely used for text clustering and gene expression through matrix factorization. In recent years, diverse co-clustering algorithms which group data points and features synchronously have shown their advantages over traditional one-side clustering. In order to solve the co-clustering problems, most existing methods relaxed constraints via matrix factorization. In this paper, we provide a detailed understanding of six co-clustering algorithms with different performance and robustness. We conduct comprehensive experiments in eight real-world datasets to compare and evaluate these co-clustering methods based on four evaluation metrics including clustering accuracy, normalized mutual information, adjusted rand index, and purity. Our findings demonstrate the strengths and weaknesses of these methods and provide insights to motivate further exploration of co-clustering methods and matrix factorization.},
  eventtitle   = {{{IEEE Access}}}
}

@article{lloyd1982LeastSquaresQuantization,
  title        = {Least squares quantization in PCM},
  author       = {Lloyd, S.},
  date         = {1982-03},
  journaltitle = {IEEE Transactions on Information Theory},
  shortjournal = {IEEE Trans. Inf. Theory},
  volume       = {28},
  number       = {2},
  pages        = {129--137},
  issn         = {1557-9654},
  doi          = {10.1109/TIT.1982.1056489},
  url          = {https://ieeexplore.ieee.org/abstract/document/1056489?casa_token=kwi9HKhZpa4AAAAA:XAz-8p-AnmEMY7QTsU6zifNgg9FlV6r0P9lyerWMwFcXGL7KI9Q0L3sILu4fFYupB0_clS_KmRU},
  urldate      = {2024-02-09},
  abstract     = {It has long been realized that in pulse-code modulation (PCM), with a given ensemble of signals to handle, the quantum values should be spaced more closely in the voltage regions where the signal amplitude is more likely to fall. It has been shown by Panter and Dite that, in the limit as the number of quanta becomes infinite, the asymptotic fractional density of quanta per unit voltage should vary as the one-third power of the probability density per unit voltage of signal amplitudes. In this paper the corresponding result for any finite number of quanta is derived; that is, necessary conditions are found that the quanta and associated quantization intervals of an optimum finite quantization scheme must satisfy. The optimization criterion used is that the average quantization noise power be a minimum. It is shown that the result obtained here goes over into the Panter and Dite result as the number of quanta become large. The optimum quautization schemes for2\^bquanta,b=1,2, \textbackslash cdots, 7, are given numerically for Gaussian and for Laplacian distribution of signal amplitudes.},
  eventtitle   = {IEEE Transactions on Information Theory},
  langid       = {latin},
  file         = {D:\zihan\Zotero\storage\9KQS869D\1056489.html}
}

@inproceedings{long2005CoclusteringBlockValue,
  title     = {Co-clustering by block value decomposition},
  author    = {Long, Bo and Zhang, Zhongfei and Yu, Philip S},
  booktitle = {Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining},
  pages     = {635--640},
  year      = {2005},
  doi       = {10.1145/1081870.1081949}
}
@article{luo2024CLEARClusterenhancedContrast,
  title        = {{{CLEAR}}: {{Cluster-enhanced}} Contrast for Self-Supervised Graph Representation Learning},
  shorttitle   = {{{CLEAR}}},
  author       = {Luo, Xiao and Ju, Wei and Qu, Meng and Gu, Yiyang and Chen, Chong and Deng, Minghua and Hua, Xian-Sheng and Zhang, Ming},
  date         = {2024-01},
  journaltitle = {IEEE Transactions on Neural Networks and Learning Systems},
  volume       = {35},
  number       = {1},
  pages        = {899--912},
  issn         = {2162-2388},
  doi          = {10.1109/TNNLS.2022.3177775},
  url          = {https://ieeexplore.ieee.org/abstract/document/9791433},
  urldate      = {2024-05-26},
  abstract     = {This article studies self-supervised graph representation learning, which is critical to various tasks, such as protein property prediction. Existing methods typically aggregate representations of each individual node as graph representations, but fail to comprehensively explore local substructures (i.e., motifs and subgraphs), which also play important roles in many graph mining tasks. In this article, we propose a self-supervised graph representation learning framework named cluster-enhanced Contrast (CLEAR) that models the structural semantics of a graph from graph-level and substructure-level granularities, i.e., global semantics and local semantics, respectively. Specifically, we use graph-level augmentation strategies followed by a graph neural network-based encoder to explore global semantics. As for local semantics, we first use graph clustering techniques to partition each whole graph into several subgraphs while preserving as much semantic information as possible. We further employ a self-attention interaction module to aggregate the semantics of all subgraphs into a local-view graph representation. Moreover, we integrate both global semantics and local semantics into a multiview graph contrastive learning framework, enhancing the semantic-discriminative ability of graph representations. Extensive experiments on various real-world benchmarks demonstrate the efficacy of the proposed CLEAR over current graph self-supervised representation learning approaches on both graph classification and transfer learning tasks.},
  eventtitle   = {{{IEEE Transactions}} on {{Neural Networks}} and {{Learning Systems}}},
  keywords     = {/unread,Aggregates,Clustering algorithms,Contrastive learning (CL),graph clustering,graph representation learning,Partitioning algorithms,Proteins,Representation learning,self-supervised learning,Semantics,Task analysis},
  annotation   = {8 citations (Crossref/DOI) [2024-05-26]},
  file         = {/Volumes/Mac_Ext/Zotero/storage/YPVZLYLN/9791433.html}
}

@incollection{macqueen1967MethodsClassificationAnalysis,
  title     = {Some Methods for Classification and Analysis of Multivariate Observations},
  booktitle = {Proceedings of the {{Fifth Berkeley Symposium}} on {{Mathematical Statistics}} and {{Probability}}, {{Volume}} 1: {{Statistics}}},
  author    = {MacQueen, J.},
  date      = {1967-01-01},
  volume    = {5.1},
  pages     = {281--298},
  publisher = {{University of California Press}},
  url       = {https://projecteuclid.org/ebooks/berkeley-symposium-on-mathematical-statistics-and-probability/Proceedings-of-the-Fifth-Berkeley-Symposium-on-Mathematical-Statistics-and/chapter/Some-methods-for-classification-and-analysis-of-multivariate-observations/bsmsp/1200512992},
  urldate   = {2024-02-09},
  langid    = {english},
  file      = {D:\zihan\Zotero\storage\G68JERKL\MacQueen - 1967 - Some methods for classification and analysis of multivariate observations.pdf}
}
@article{madeira2004BiclusteringAlgorithmsBiological,
  title        = {Biclustering Algorithms for Biological Data Analysis: {{A}} Survey},
  author       = {Madeira, Sara C. and Oliveira, Arlindo L.},
  date         = {2004},
  journaltitle = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
  shortjournal = {IEEE/acm Trans. Comput. Biol. Bioinformatics},
  eprint       = {17048406},
  eprinttype   = {pmid},
  doi          = {10.1109/tcbb.2004.2},
  abstract     = {A large number of clustering approaches have been proposed for the analysis of gene expression data obtained from microarray experiments. However, the results from the application of standard clustering methods to genes are limited. This limitation is imposed by the existence of a number of experimental conditions where the activity of genes is uncorrelated. A similar limitation exists when clustering of conditions is performed. For this reason, a number of algorithms that perform simultaneous clustering on the row and column dimensions of the data matrix has been proposed. The goal is to find submatrices, that is, subgroups of genes and subgroups of conditions, where the genes exhibit highly correlated activities for every condition. In this paper, we refer to this class of algorithms as biclustering. Biclustering is also referred in the literature as coclustering and direct clustering, among others names, and has also been used in fields such as information retrieval and data mining. In this comprehensive survey, we analyze a large number of existing approaches to biclustering, and classify them in accordance with the type of biclusters they can find, the patterns of biclusters that are discovered, the methods used to perform the search, the approaches used to evaluate the solution, and the target applications.},
  mag_id       = {2144544802},
  pmcid        = {null}
}
@book{mclachlan1987MixtureModelsInference,
  title      = {Mixture Models: Inference and Applications to Clustering},
  shorttitle = {Mixture Models},
  author     = {McLachlan, Geoffrey J. and Basford, Kaye E.},
  date       = {1987-09-28},
  publisher  = {Marcel Dekker Inc},
  location   = {New York, N.Y},
  isbn       = {978-0-8247-7691-6},
  langid     = {Anglais},
  pagetotal  = {272},
  keywords   = {/unread}
}

@inproceedings{mu2022LearningHybridBehavior,
  title      = {Learning {{Hybrid Behavior Patterns}} for {{Multimedia Recommendation}}},
  booktitle  = {Proceedings of the 30th {{ACM International Conference}} on {{Multimedia}}},
  author     = {Mu, Zongshen and Zhuang, Yueting and Tan, Jie and Xiao, Jun and Tang, Siliang},
  date       = {2022-10-10},
  series     = {{{MM}} '22},
  pages      = {376--384},
  publisher  = {Association for Computing Machinery},
  location   = {New York, NY, USA},
  doi        = {10.1145/3503161.3548119},
  url        = {https://dl.acm.org/doi/10.1145/3503161.3548119},
  urldate    = {2024-06-05},
  abstract   = {Multimedia recommendation aims to predict user preferences where users interact with multimodal items. Collaborative filtering based on graph convolutional networks manifests impressive performance gains in multimedia recommendation. This is attributed to the capability of learning good user and item embeddings by aggregating the collaborative signals from high-order neighbors. However, previous researches [37,38] fail to explicitly mine different behavior patterns (i.e., item categories, common user interests) by exploiting user-item and item-item graphs simultaneously, which plays an important role in modeling user preferences. And it is the lack of different behavior pattern constraints and multimodal feature reconciliations that results in performance degradation. Towards this end, We propose a Hybrid Clustering Graph Convolutional Network (HCGCN) for multimedia recommendation. We perform high-order graph convolutions inside user-item clusters and item-item clusters to capture various user behavior patterns. Meanwhile, we design corresponding clustering losses to enhance user-item preference feedback and multimodal representation learning constraint to adjust the modality importance, making more accurate recommendations. Experimental results on three real-world multimedia datasets not only demonstrate the significant improvement of our model over the state-of-the-art methods, but also validate the effectiveness of integrating hybrid user behavior patterns for multimedia recommendation.},
  isbn       = {978-1-4503-9203-7},
  keywords   = {/unread,graph clustering,graph-based collaborative filtering,multimedia recommendation},
  annotation = {11 citations (Crossref/DOI) [2024-06-05]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/K5X9DU7B/Mu 等 - 2022 - Learning Hybrid Behavior Patterns for Multimedia Recommendation.pdf}
}

@inproceedings{ni2019JustifyingRecommendationsUsing,
  title      = {Justifying {{Recommendations}} Using {{Distantly-Labeled Reviews}} and {{Fine-Grained Aspects}}},
  booktitle  = {Proceedings of the 2019 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}} and the 9th {{International Joint Conference}} on {{Natural Language Processing}} ({{EMNLP-IJCNLP}})},
  author     = {Ni, Jianmo and Li, Jiacheng and McAuley, Julian},
  date       = {2019},
  pages      = {188--197},
  publisher  = {Association for Computational Linguistics},
  location   = {Hong Kong, China},
  doi        = {10.18653/v1/D19-1018},
  url        = {https://www.aclweb.org/anthology/D19-1018},
  urldate    = {2024-06-06},
  abstract   = {Several recent works have considered the problem of generating reviews (or ‘tips’) as a form of explanation as to why a recommendation might match a user’s interests. While promising, we demonstrate that existing approaches struggle (in terms of both quality and content) to generate justifications that are relevant to users’ decision-making process. We seek to introduce new datasets and methods to address this recommendation justification task. In terms of data, we first propose an ‘extractive’ approach to identify review segments which justify users’ intentions; this approach is then used to distantly label massive review corpora and construct largescale personalized recommendation justification datasets. In terms of generation, we design two personalized generation models with this data: (1) a reference-based Seq2Seq model with aspect-planning which can generate justifications covering different aspects, and (2) an aspect-conditional masked language model which can generate diverse justifications based on templates extracted from justification histories. We conduct experiments on two real-world datasets which show that our model is capable of generating convincing and diverse justifications.},
  eventtitle = {Proceedings of the 2019 {{Conference}} on {{Empirical Methods}} in {{Natural Language Processing}} and the 9th {{International Joint Conference}} on {{Natural Language Processing}} ({{EMNLP-IJCNLP}})},
  langid     = {english},
  keywords   = {/unread},
  annotation = {384 citations (Crossref/DOI) [2024-06-06]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/DRPUMQIP/emnlp19a.pdf}
}
@inproceedings{nie2017LearningStructuredOptimal,
  title     = {Learning a Structured Optimal Bipartite Graph for Co-Clustering},
  booktitle = {Advances in {{Neural Information Processing Systems}}},
  author    = {Nie, Feiping and Wang, Xiaoqian and Deng, Cheng and Huang, Heng},
  editor    = {Guyon, I. and Luxburg, U. Von and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.},
  date      = {2017},
  volume    = {30},
  publisher = {Curran Associates, Inc.},
  url       = {https://proceedings.neurips.cc/paper_files/paper/2017/file/00a03ec6533ca7f5c644d198d815329c-Paper.pdf},
  langid    = {english},
  keywords  = {/unread,⛔ No DOI found,nips,to follow,TODO},
  file      = {/Volumes/Mac_Ext/Zotero/storage/BVLTH3XJ/Nie et al. - 2017 - Learning a structured optimal bipartite graph for co-clustering.pdf}
}
@inproceedings{pmlr-v139-radford21a,
  title     = {Learning Transferable Visual Models from Natural Language Supervision},
  booktitle = {Proceedings of the 38th International Conference on Machine Learning},
  author    = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and Krueger, Gretchen and Sutskever, Ilya},
  editor    = {Meila, Marina and Zhang, Tong},
  date      = {2021-07-18/2021-07-24},
  series    = {Proceedings of Machine Learning Research},
  volume    = {139},
  pages     = {8748--8763},
  publisher = {PMLR},
  url       = {https://proceedings.mlr.press/v139/radford21a.html},
  abstract  = {State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This restricted form of supervision limits their generality and usability since additional labeled data is needed to specify any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400 million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot without needing to use any of the 1.28 million training examples it was trained on.},
  keywords  = {/unread},
  file      = {/Volumes/Mac_Ext/Zotero/storage/ULXZK8P6/Radford et al. - 2021 - Learning transferable visual models from natural language supervision.pdf}
}
@article{purwar2015HybridPredictionModel,
  title        = {Hybrid Prediction Model with Missing Value Imputation for Medical Data},
  author       = {Purwar, Archana and Singh, Sandeep Kumar},
  date         = {2015-08-01},
  journaltitle = {Expert Systems with Applications},
  shortjournal = {Expert Systems with Applications},
  volume       = {42},
  number       = {13},
  pages        = {5621--5631},
  issn         = {0957-4174},
  doi          = {10.1016/j.eswa.2015.02.050},
  url          = {https://www.sciencedirect.com/science/article/pii/S0957417415001578},
  urldate      = {2024-05-26},
  abstract     = {Accurate prediction in the presence of large number of missing values in the data set has always been a challenging problem. Most of hybrid models to address this challenge have either deleted the missing instances from the data set (popularly known as case deletion) or have used some default way to fill the missing values. This paper, presents a novel hybrid prediction model with missing value imputation (HPM-MI) that analyze various imputation techniques using simple K-means clustering and apply the best one to a data set. The proposed hybrid model is the first one to use combination of K-means clustering with Multilayer Perceptron. K-means clustering is also used to validate class labels of given data (incorrectly classified instances are deleted i.e. pattern extracted from original data) before applying classifier. The proposed system has significantly improved data quality by use of best imputation technique after quantitative analysis of eleven imputation approaches. The efficiency of proposed model as predictive classification system is investigated on three benchmark medical data sets namely Pima Indians Diabetes, Wisconsin Breast Cancer, and Hepatitis from the UCI Repository of Machine Learning. In addition to accuracy, sensitivity, specificity; kappa statistics and the area under ROC are also computed. The experimental results show HPM-MI has produced accuracy, sensitivity, specificity, kappa and ROC as 99.82\%, 100\%, 99.74\%, 0.996 and 1.0 respectively for Pima Indian Diabetes data set, 99.39\%, 99.31\%, 99.54\%, 0.986, and 1.0 respectively for breast cancer data set and 99.08\%, 100\%, 96.55\%, 0.978 and 0.99 respectively for Hepatitis data set. Results are best in comparison with existing methods. Further, the performance of our model is measured and analyzed as function of missing rate and train-test ratio using 2D synthetic data set and Wisconsin Diagnostics Breast Cancer Data Sets. Results are promising and therefore the proposed model will be very useful in prediction for medical domain especially when numbers of missing value are large in the data set.},
  keywords     = {/unread,Data mining,K-means clustering,Missing value imputation,Multilayer Perceptron (MLP)},
  annotation   = {122 citations (Crossref/DOI) [2024-05-26]},
  file         = {/Volumes/Mac_Ext/Zotero/storage/629JUUQ9/Purwar and Singh - 2015 - Hybrid prediction model with missing value imputation for medical data.pdf;/Volumes/Mac_Ext/Zotero/storage/JSLLNXZ5/S0957417415001578.html}
}
@inproceedings{radford2021LearningTransferableVisual,
  title      = {Learning Transferable Visual Models from Natural Language Supervision},
  booktitle  = {Proceedings of the 38th {{International Conference}} on {{Machine Learning}}},
  author     = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and Goh, Gabriel and Agarwal, Sandhini and Sastry, Girish and Askell, Amanda and Mishkin, Pamela and Clark, Jack and Krueger, Gretchen and Sutskever, Ilya},
  date       = {2021-07-01},
  pages      = {8748--8763},
  publisher  = {PMLR},
  issn       = {2640-3498},
  url        = {https://proceedings.mlr.press/v139/radford21a.html},
  urldate    = {2024-05-26},
  abstract   = {State-of-the-art computer vision systems are trained to predict a fixed set of predetermined object categories. This restricted form of supervision limits their generality and usability since additional labeled data is needed to specify any other visual concept. Learning directly from raw text about images is a promising alternative which leverages a much broader source of supervision. We demonstrate that the simple pre-training task of predicting which caption goes with which image is an efficient and scalable way to learn SOTA image representations from scratch on a dataset of 400 million (image, text) pairs collected from the internet. After pre-training, natural language is used to reference learned visual concepts (or describe new ones) enabling zero-shot transfer of the model to downstream tasks. We study the performance of this approach by benchmarking on over 30 different existing computer vision datasets, spanning tasks such as OCR, action recognition in videos, geo-localization, and many types of fine-grained object classification. The model transfers non-trivially to most tasks and is often competitive with a fully supervised baseline without the need for any dataset specific training. For instance, we match the accuracy of the original ResNet-50 on ImageNet zero-shot without needing to use any of the 1.28 million training examples it was trained on.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid     = {english},
  keywords   = {/unread},
  file       = {/Volumes/Mac_Ext/Zotero/storage/K2Q96LIQ/Radford et al. - 2021 - Learning transferable visual models from natural language supervision.pdf;/Volumes/Mac_Ext/Zotero/storage/WTFBURGA/Radford et al. - 2021 - Learning transferable visual models from natural language supervision.pdf}
}
@inproceedings{ramesh2021ZeroshotTexttoimageGeneration,
  title      = {Zero-Shot Text-to-Image Generation},
  booktitle  = {Proceedings of the 38th {{International Conference}} on {{Machine Learning}}},
  author     = {Ramesh, Aditya and Pavlov, Mikhail and Goh, Gabriel and Gray, Scott and Voss, Chelsea and Radford, Alec and Chen, Mark and Sutskever, Ilya},
  date       = {2021-07-01},
  pages      = {8821--8831},
  publisher  = {PMLR},
  issn       = {2640-3498},
  url        = {https://proceedings.mlr.press/v139/ramesh21a.html},
  urldate    = {2024-05-26},
  abstract   = {Text-to-image generation has traditionally focused on finding better modeling assumptions for training on a fixed dataset. These assumptions might involve complex architectures, auxiliary losses, or side information such as object part labels or segmentation masks supplied during training. We describe a simple approach for this task based on a transformer that autoregressively models the text and image tokens as a single stream of data. With sufficient data and scale, our approach is competitive with previous domain-specific models when evaluated in a zero-shot fashion.},
  eventtitle = {International {{Conference}} on {{Machine Learning}}},
  langid     = {english},
  keywords   = {/unread},
  file       = {/Volumes/Mac_Ext/Zotero/storage/LDGC3RZB/Ramesh et al. - 2021 - Zero-shot text-to-image generation.pdf}
}
@inproceedings{raskutti2002CombiningClusteringCotraining,
  title      = {Combining Clustering and Co-Training to Enhance Text Classification Using Unlabelled Data},
  booktitle  = {Proceedings of the Eighth {{ACM SIGKDD}} International Conference on {{Knowledge}} Discovery and Data Mining},
  author     = {Raskutti, Bhavani and Ferrá, Herman and Kowalczyk, Adam},
  date       = {2002-07-23},
  series     = {{{KDD}} '02},
  pages      = {620--625},
  publisher  = {Association for Computing Machinery},
  location   = {New York, NY, USA},
  doi        = {10.1145/775047.775139},
  url        = {https://dl.acm.org/doi/10.1145/775047.775139},
  urldate    = {2024-05-26},
  abstract   = {In this paper, we present a new co-training strategy that makes use of unlabelled data. It trains two predictors in parallel, with each predictor labelling the unlabelled data for training the other predictor in the next round. Both predictors are support vector machines, one trained using data from the original feature space, the other trained with new features that are derived by clustering both the labelled and unlabelled data. Hence, unlike standard co-training methods, our method does not require a priori the existence of two redundant views either of which can be used for classification, nor is it dependent on the availability of two different supervised learning algorithms that complement each other.We evaluated our method with two classifiers and three text benchmarks: WebKB, Reuters newswire articles and 20 NewsGroups. Our evaluation shows that our co-training technique improves text classification accuracy especially when the number of labelled examples are very few.},
  isbn       = {978-1-58113-567-1},
  keywords   = {/unread,cluster helps training},
  annotation = {20 citations (Crossref/DOI) [2024-05-26]},
  file       = {/Volumes/Mac_Ext/Zotero/storage/QKIZNBBG/Raskutti et al. - 2002 - Combining clustering and co-training to enhance text classification using unlabelled data.pdf}
}
@article{role2019CoClustPythonPackage,
  title        = {{{{\textbf{CoClust}}}} : A {\emph{Python}} Package for {{Co-clustering}}},
  shorttitle   = {{{{\textbf{CoClust}}}}},
  author       = {Role, François and Morbieu, Stanislas and Nadif, Mohamed},
  date         = {2019},
  journaltitle = {Journal of Statistical Software},
  shortjournal = {J. Stat. Softw.},
  volume       = {88},
  number       = {7},
  issn         = {1548-7660},
  doi          = {10.18637/jss.v088.i07},
  url          = {http://www.jstatsoft.org/v88/i07/},
  urldate      = {2023-12-12},
  langid       = {english},
  keywords     = {/unread},
  annotation   = {影响因子: 5.8\\
                  5年影响因子: 22.2},
  file         = {/Volumes/Mac_Ext/Zotero/storage/23SNFUU7/Role et al. - 2019 - CoClust  A Python Package for Co-Clustering.pdf}
}
@inproceedings{salah2018WordCooccurrenceRegularized,
  title      = {Word Co-Occurrence Regularized Non-Negative Matrix Tri-Factorization for Text Data Co-Clustering},
  booktitle  = {Proceedings of the {{AAAI Conference}} on {{Artificial Intelligence}}},
  author     = {Salah, Aghiles and Ailem, Melissa and Nadif, Mohamed},
  date       = {2018-04-29},
  volume     = {32},
  number     = {1},
  doi        = {10.1609/aaai.v32i1.11659},
  url        = {https://ojs.aaai.org/index.php/AAAI/article/view/11659},
  urldate    = {2023-12-11},
  abstract   = {Text data co-clustering is the process of partitioning the documents and words simultaneously. This approach has proven to be more useful than traditional one-sided clustering when dealing with sparsity.  Among the wide range of co-clustering approaches, Non-Negative Matrix Tri-Factorization (NMTF) is recognized for its high performance, flexibility and theoretical foundations. One important aspect when dealing with text data, is to capture the semantic relationships between words since documents that are about the same topic may not necessarily use exactly the same vocabulary. However, this aspect has been overlooked by previous co-clustering models, including NMTF. To address this issue, we rely on the distributional hypothesis stating that words which co-occur frequently within the same context, e.g., a document or sentence, are likely to have similar meanings. We then propose a new NMTF model that maps frequently co-occurring words roughly to the same direction in the latent space to reflect the relationships between them. To infer the factor matrices, we derive a scalable alternating optimization algorithm, whose convergence is guaranteed. Extensive experiments, on several real-world datasets, provide strong evidence for the effectiveness  of the proposed approach, in terms of co-clustering.},
  eventtitle = {{{AAAI}}},
  langid     = {english},
  keywords   = {/unread,⛔ No INSPIRE recid found,Text Data,to follow,TODO},
  annotation = {11 citations (CrossRef 2024/1/23)\\
                CCF: A},
  file       = {/Volumes/Mac_Ext/Zotero/storage/HQJLHU36/Salah et al. - 2018 - Word Co-Occurrence Regularized Non-Negative Matrix Tri-Factorization for Text Data Co-Clustering.pdf}
}
@inproceedings{siklosi2012ContentbasedTrustBias,
  title      = {Content-Based Trust and Bias Classification via Biclustering},
  booktitle  = {Proceedings of the 2nd {{Joint WICOW}}/{{AIRWeb Workshop}} on {{Web Quality}}},
  author     = {Sikl\'osi, D\'avid and Dar\'oczy, B\'alint and Bencz\'ur, Andr\'as A.},
  date       = {2012-04-16},
  series     = {{{WebQuality}} '12},
  pages      = {41--47},
  publisher  = {{Association for Computing Machinery}},
  location   = {{New York, NY, USA}},
  doi        = {10.1145/2184305.2184314},
  abstract   = {In this paper we improve trust, bias and factuality classification over Web data on the domain level. Unlike the majority of literature in this area that aims at extracting opinion and handling short text on the micro level, we aim to aid a researcher or an archivist in obtaining a large collection that, on the high level, originates from unbiased and trustworthy sources. Our method generates features as Jensen-Shannon distances from centers in a host-term biclustering. On top of the distance features, we apply kernel methods and also combine with baseline text classifiers. We test our method on the ECML/PKDD Discovery Challenge data set DC2010. Our method improves over the best achieved text classification NDCG results by over 3--10\% for neutrality, bias and trustworthiness. The fact that the ECML/PKDD Discovery Challenge 2010 participants reached an AUC only slightly above 0.5 indicates the hardness of the task.在本文中，我们在域级别上改进了对 Web 数据的信任、偏见和事实分类。与该领域的大多数旨在从微观层面提取意见\hspace{0pt}\hspace{0pt}和处理短文本的文献不同，我们的目标是帮助研究人员或档案管理员获得大量收藏，这些收藏在高层次上来自公正和可信赖的来源。我们的方法生成的特征是 Jensen-Shannon 与主项双聚类中心的距离。在距离特征之上，我们应用内核方法并结合基线文本分类器。我们在 ECML/PKDD 发现挑战数据集 DC2010 上测试了我们的方法。我们的方法在中立性、偏见和可信度方面比最佳实现的文本分类 NDCG 结果提高了 3--10\% 以上。事实上，2010 年 ECML/PKDD 发现挑战赛参与者达到的 AUC 仅略高于 0.5，这表明任务的难度。},
  eventtitle = {Joint {{WICOW}}/{{AIRWeb Workshop}} on {{Web Quality}}},
  isbn       = {978-1-4503-1237-0},
  langid     = {english}
}
@article{song2013ConstrainedTextCoclustering,
  author   = {Song, Yangqiu and Pan, Shimei and Liu, Shixia and Wei, Furu and Zhou, Michelle X. and Qian, Weihong},
  journal  = {IEEE Transactions on Knowledge and Data Engineering},
  title    = {Constrained Text Coclustering with Supervised and Unsupervised Constraints},
  year     = {2013},
  volume   = {25},
  number   = {6},
  pages    = {1227-1239},
  abstract = {In this paper, we propose a novel constrained coclustering method to achieve two goals. First, we combine information-theoretic coclustering and constrained clustering to improve clustering performance. Second, we adopt both supervised and unsupervised constraints to demonstrate the effectiveness of our algorithm. The unsupervised constraints are automatically derived from existing knowledge sources, thus saving the effort and cost of using manually labeled constraints. To achieve our first goal, we develop a two-sided hidden Markov random field (HMRF) model to represent both document and word constraints. We then use an alternating expectation maximization (EM) algorithm to optimize the model. We also propose two novel methods to automatically construct and incorporate document and word constraints to support unsupervised constrained clustering: 1) automatically construct document constraints based on overlapping named entities (NE) extracted by an NE extractor; 2) automatically construct word constraints based on their semantic distance inferred from WordNet. The results of our evaluation over two benchmark data sets demonstrate the superiority of our approaches against a number of existing approaches.},
  keywords = {},
  doi      = {10.1109/TKDE.2012.45},
  issn     = {1558-2191},
  month    = {6}
}
@book{stewart1993MatrixPerturbationTheory,
  title     = {Matrix perturbation theory},
  author    = {Stewart, Gilbert W. and Sun, Ji-guang},
  date      = {1993},
  publisher = {Academic Press},
  location  = {San Diego},
  isbn      = {978-0-12-670230-9},
  langid    = {english},
  pagetotal = {365}
}
@article{sun2014BiforceLargescaleBicluster,
  title        = {Bi-Force: {{Large-scale}} Bicluster Editing and Its Application to Gene Expression Data Biclustering},
  shorttitle   = {Bi-{{Force}}},
  author       = {Sun, Peng and Speicher, Nora K and R\"ottger, Richard and Guo, Jiong and Baumbach, Jan},
  date         = {2014-05-01},
  journaltitle = {Nucleic Acids Research},
  shortjournal = {Nucleic Acids Res.},
  volume       = {42},
  number       = {9},
  pages        = {e78-e78},
  issn         = {0305-1048, 1362-4962},
  doi          = {10.1093/nar/gku201},
  langid       = {english}
}
@article{vonluxburg2007TutorialSpectralClustering,
  title        = {A Tutorial on Spectral Clustering},
  author       = {family=Luxburg, given=Ulrike, prefix=von, useprefix=true},
  date         = {2007-12-01},
  journaltitle = {Statistics and Computing},
  shortjournal = {Stat. Comput.},
  volume       = {17},
  number       = {4},
  pages        = {395--416},
  issn         = {1573-1375},
  doi          = {10.1007/s11222-007-9033-z},
  url          = {https://doi.org/10.1007/s11222-007-9033-z},
  urldate      = {2024-02-08},
  abstract     = {In recent years, spectral clustering has become one of the most popular modern clustering algorithms. It is simple to implement, can be solved efficiently by standard linear algebra software, and very often outperforms traditional clustering algorithms such as the k-means algorithm. On the first glance spectral clustering appears slightly mysterious, and it is not obvious to see why it works at all and what it really does. The goal of this tutorial is to give some intuition on those questions. We describe different graph Laplacians and their basic properties, present the most common spectral clustering algorithms, and derive those algorithms from scratch by several different approaches. Advantages and disadvantages of the different spectral clustering algorithms are discussed.},
  langid       = {english},
  keywords     = {Graph Laplacian,Spectral clustering},
  file         = {D:\zihan\Zotero\storage\R4CIW2ME\von Luxburg - 2007 - A tutorial on spectral clustering.pdf}
}
@inproceedings{wang2011FastNonnegativeMatrix,
  title      = {Fast Nonnegative Matrix Tri-Factorization for Large-Scale Data Co-Clustering},
  booktitle  = {{{IJCAI}} 2011, {{Proceedings}} of the 22nd {{International Joint Conference}} on {{Artificial Intelligence}}, {{Barcelona}}, {{Catalonia}}, {{Spain}}, {{July}} 16-22, 2011},
  author     = {Wang, Hua and Nie, Feiping and Huang, Heng and Makedon, Fillia},
  editor     = {Walsh, Toby},
  date       = {2011},
  pages      = {1553--1558},
  publisher  = {IJCAI/AAAI},
  doi        = {10.5591/978-1-57735-516-8/IJCAI11-261},
  langid     = {english},
  keywords   = {linter/error},
  annotation = {CCF: A},
  file       = {/Volumes/Mac_Ext/Zotero/storage/8S952AFC/Wang et al. - 2011 - Fast nonnegative matrix tri-factorization for large-scale data co-clustering.pdf}
}
@article{wang2019DualHypergraphRegularized,
  title        = {Dual {{Hypergraph Regularized PCA}} for {{Biclustering}} of {{Tumor Gene Expression Data}}},
  author       = {Wang, Xuesong and Liu, Jian and Cheng, Yuhu and Liu, Aiping and Chen, Enhong},
  date         = {2019-12-01},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {31},
  number       = {12},
  pages        = {2292--2303},
  issn         = {1041-4347, 1558-2191, 2326-3865},
  doi          = {10.1109/TKDE.2018.2874881},
  abstract     = {Clustering is a powerful approach to analyze gene expression data which is crucial to the investigation of effective treatment of cancer. Many graph regularize-based clustering methods have been proposed and shown to be superior to the traditional clustering methods. However, they only focus on the inner structure in samples and fail to take the feature manifold into account. In gene expression data, it's practical to hypothesize that both the samples and the genes lie on nonlinear low dimensional manifolds, namely sample manifold and gene manifold, respectively. Therefore in this paper, incorporating the geometric structures in both samples and features, we propose a Dual Hypergraph Regularized PCA (DHPCA) method for biclustering of tumor data. First, for gene expression data, we construct two hypergraphs, i.e., sample hypergraph and gene hypergraph, to estimate the intrinsic geometric structures of samples and genes. Then, we introduce the hypergraph regularization on both gene side and sample side. Finally, our biclustering method is formulated as two hypergraph regularized PCA with closed-form solution. We experimentally validate our proposed DHPCA algorithm on real applications and the promising results indicate its potential in high dimension data analysis.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english}
}

@article{wei2021HierarchicalHighorderCoclustering,
  title        = {Hierarchical High-Order Co-Clustering Algorithm by Maximizing Modularity},
  author       = {Wei, Jiahui and Ma, Huifang and Liu, Yuhang and Li, Zhixin and Li, Ning},
  date         = {2021-10-01},
  journaltitle = {International Journal of Machine Learning and Cybernetics},
  shortjournal = {Int. J. Mach. Learn. Cybern.},
  volume       = {12},
  number       = {10},
  pages        = {2887--2898},
  issn         = {1868-808X},
  doi          = {10.1007/s13042-021-01375-9},
  url          = {https://doi.org/10.1007/s13042-021-01375-9},
  urldate      = {2023-12-11},
  abstract     = {The star-structured high-order heterogeneous data is ubiquitous, such data represent objects of a certain type, connected to other types of data, or the features, so that the overall data schema forms a star-structure of inter-relationships. In this paper, we study the problem of co-clustering of star-structured high-order heterogeneous data. We present a new solution, a Hierarchical High-order Co-clustering Algorithm by Maximizing Modularity, MHCoC, which iteratively optimizes the objective function based on modularity and finally converges to a unique clustering result. In contrast to the traditional co-clustering methods, MHCoC merges information of multiple feature spaces of high-order heterogeneous data. Moreover, MHCoC takes a top-down strategy to perform a greedy divisive procedure, generating a tree-like hierarchical clustering result that reveal the relationship between clusters. To illustrate the process in more detail, we design a toy example to describe how MHCoC selects the appropriate co-cluster and splits it. Extensive experiments on real-world datasets demonstrate the effectiveness of the proposed method.},
  langid       = {english},
  keywords     = {/unread,Co-clustering,Hierarchical structure,High-order heterogeneous data,Modularity},
  annotation   = {影响因子: 5.6 5年影响因子: 4.5 abstractTranslation: 星型结构的高阶异构数据无处不在，这类数据表示某种类型的对象，连接到其他类型的数据，或特征，使得整体数据模式形成星型结构的相互关系。本文研究了星型结构高阶异构数据的共聚类问题。我们提出了一种新的解决方案，即基于最大化模块化的分层高阶共聚类算法MHCoC，该算法基于模块化迭代优化目标函数，最终收敛到独特的聚类结果。与传统的共聚类方法相比，MHCoC融合了高阶异构数据的多个特征空间的信息。此外，MHCoC采用自上而下的策略来执行贪婪的分裂过程，生成树状的分层聚类结果，揭示聚类之间的关系。为了更详细地说明该过程，我们设计了一个玩具示例来描述MHCoC如何选择适当的共簇并将其拆分。在真实数据集上进行了大量实验，验证了所提方法的有效性。},
  file         = {/Volumes/Mac_Ext/Zotero/storage/H9XNMMH5/Wei et al. - 2021 - Hierarchical high-order co-clustering algorithm by maximizing modularity.pdf}
}
@article{wu2023EffectiveClusteringStructured,
  title        = {Effective Clustering via Structured Graph Learning},
  author       = {Wu, Danyang and Nie, Feiping and Lu, Jitao and Wang, Rong and Li, Xuelong},
  date         = {2023-08},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {35},
  number       = {8},
  pages        = {7909--7920},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2022.3222411},
  url          = {https://ieeexplore.ieee.org/abstract/document/9950731?casa_token=l2kiAyfTkRwAAAAA:AuM_jHYA3hOo4aAOU-6OPj0RsYeyW5TdCM5wGn7XISn5AuoVja6moGdhUdx66jNocR6Q3_BMO5s},
  urldate      = {2024-02-09},
  abstract     = {Given an affinity graph of data samples, graph-based clustering aims to partition these samples into disjoint groups based on the affinities, and most previous works are based on spectral clustering. However, two problems among spectral-based methods heavily affect the clustering performance. First, the randomness of post-processing procedures, such as KK-means, affects the stability of clustering. Second, the separated stages of spectral-based methods, including graph construction, spectral embedding learning, and clustering decision, lead to mismatched problems. In this paper, we explore a structured graph learning (SGL) framework that aims to fuse these stages to improve clustering stability. Specifically, SGL adaptively learns a structured affinity graph that contains exact kk connected components. Each connected component corresponds to a cluster so clustering assignments can be directly obtained according to the connectivity of the learned graph. In this way, SGL avoids the randomness brought by reliance on traditional post-processing procedures. Meanwhile, the graph construction and structured graph learning procedures happen simultaneously, which alleviates the mismatched problem effectively. Moreover, we propose an efficient algorithm to solve the involved optimization problems and discuss the connections between this work and previous works. Numerical experiments on several synthetic and real datasets demonstrate the effectiveness of our methods.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english},
  keywords     = {adaptive neighbors,block diagonal similarity matrix,Clustering,Clustering algorithms,Clustering methods,Eigenvalues and eigenfunctions,Optimization,Partitioning algorithms,Stability analysis,structured graph learning,Task analysis},
  file         = {D:\zihan\Zotero\storage\VETKFYQY\9950731.html}
}
@misc{wu2024AccurateDetectionEllipses,
  title    = {Accurate Detection of Ellipses Based on Co-Clustering},
  author   = {Wu, Zihan and Yan, Hong},
  year     = {2024},
  month    = {2},
  day      = {7},
  doi      = {10.21203/rs.3.rs-3926094/v1},
  url      = {https://www.researchsquare.com/article/rs-3926094/v1},
  note     = {Preprint},
  abstract = {Ellipse detection plays a vital role in computer vision, primarily due to the absence of efficient techniques to incorporate information from arcs in a global view. In this paper, we present a novel co-clustering-based approach that captures the global compatibility of arc pairs. Specifically, we compute the probability distributions of arcs and measure their similarity using Jensen-Shannon divergence to construct a compatibility matrix. Co-clustering extracts low-rank substruc-tures, each representing an ellipse. Experiments on synthetic images show a 10\% improvement in accuracy and 15\% in recall compared to previous methods. We also demonstrate performance on real images containing occlusions and overlapping ellipses from the Prasad dataset, the Coins dataset, the Tableware dataset and the Iris V dataset. Compared to techniques relying solely on local information, our method achieves higher accuracy in detecting small and occluded ellipses owing to its global perspective. The proposed technique provides an efficient, robust solution to ellipse detection across diverse scenarios.},
  file     = {/Volumes/Mac_Ext/Zotero/storage/XIRBQJ4E/Wu and Yan - 2024 - Accurate detection of ellipses based on co-clustering.pdf}
}
@inproceedings{wu2024ScalableCoClusteringLargeScale,
  title      = {Scalable co-clustering for large-scale data through dynamic partitioning and hierarchical merging},
  booktitle  = {2024 {{IEEE International Conference}} on {{Systems}}, {{Man}}, and {{Cybernetics}} ({{SMC}})},
  author     = {Wu, Zihan and Huang, Zhaoke and Yan, Hong},
  date       = {2024-10-06},
  pages      = {4686--4691},
  publisher  = {IEEE},
  location   = {Kuching, Malaysia},
  doi        = {10.1109/SMC54092.2024.10832071},
  url        = {https://ieeexplore.ieee.org/document/10832071/},
  urldate    = {2025-01-23},
  eventtitle = {2024 {{IEEE International Conference}} on {{Systems}}, {{Man}}, and {{Cybernetics}} ({{SMC}})},
  isbn       = {978-1-6654-1020-5},
  langid     = {english}
}
@article{xu2023MultimodalLearningTransformers,
  title        = {Multimodal {{Learning With Transformers}}: {{A Survey}}},
  shorttitle   = {Multimodal {{Learning With Transformers}}},
  author       = {Xu, Peng and Zhu, Xiatian and Clifton, David A.},
  date         = {2023-10},
  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume       = {45},
  number       = {10},
  pages        = {12113--12132},
  issn         = {1939-3539},
  doi          = {10.1109/TPAMI.2023.3275156},
  url          = {https://ieeexplore.ieee.org/abstract/document/10123038},
  urldate      = {2024-05-25},
  abstract     = {Transformer is a promising neural network learner, and has achieved great success in various machine learning tasks. Thanks to the recent prevalence of multimodal applications and Big Data, Transformer-based multimodal learning has become a hot topic in AI research. This paper presents a comprehensive survey of Transformer techniques oriented at multimodal data. The main contents of this survey include: (1) a background of multimodal learning, Transformer ecosystem, and the multimodal Big Data era, (2) a systematic review of Vanilla Transformer, Vision Transformer, and multimodal Transformers, from a geometrically topological perspective, (3) a review of multimodal Transformer applications, via two important paradigms, i.e., for multimodal pretraining and for specific multimodal tasks, (4) a summary of the common challenges and designs shared by the multimodal Transformer models and applications, and (5) a discussion of open problems and potential research directions for the community.},
  eventtitle   = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  keywords     = {/unread,Data models,deep learning,introductory,machine learning,Mathematical models,Multimodal learning,Surveys,Task analysis,taxonomy,Taxonomy,transformer,Transformers,Visualization},
  annotation   = {55 citations (Crossref/DOI) [2024-05-25]},
  file         = {/Volumes/Mac_Ext/Zotero/storage/GU3T77LE/Xu et al. - 2023 - Multimodal Learning With Transformers A Survey.pdf;/Volumes/Mac_Ext/Zotero/storage/44KWSX8E/10123038.html}
}
@article{yan2017CoclusteringMultidimensionalBig,
  title        = {Coclustering of {{Multidimensional Big Data}}: {{A Useful Tool}} for {{Genomic}}, {{Financial}}, and {{Other Data Analysis}}},
  author       = {Yan, Hong},
  date         = {2017-04},
  journaltitle = {IEEE Systems, Man, and Cybernetics Magazine},
  shortjournal = {IEEE Syst. Man Cybern. Mag.},
  volume       = {3},
  number       = {2},
  pages        = {23--30},
  publisher    = {{Institute of Electrical and Electronics Engineers (IEEE)}},
  issn         = {2333-942X, 2380-1298},
  doi          = {10.1109/msmc.2017.2664218},
  abstract     = {The analysis of a multidimensional data array is necessary in many applications. Although a data set can be very large, it is possible that meaningful and coherent patterns embedded in the data array are much smaller in size. For example, in genomic data, we may want to find a subset of genes that coexpress under a subset of conditions. In this article, I will explain coclustering algorithms for solving the coherent pattern-detection problem. In these methods, a coherent pattern corresponds to a low-rank matrix or tensor and can be represented as an intersection of hyperplanes in a high-dimensional space. We can then extract coherent patterns from the large data array by detecting hyperplanes. Examples will be provided to demonstrate the effectiveness of the coclustering algorithms for solving unsupervised pattern classification problems.},
  langid       = {english}
}
@article{yang2011FindingCorrelatedBiclusters,
  title        = {Finding Correlated Biclusters from Gene Expression Data},
  author       = {Yang, Wen-Hui and Dai, Dao-Qing and Yan, Hong},
  date         = {2011-04},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {23},
  number       = {4},
  pages        = {568--584},
  issn         = {1041-4347},
  doi          = {10.1109/TKDE.2010.150},
  url          = {http://ieeexplore.ieee.org/document/5560654/},
  urldate      = {2022-06-20},
  langid       = {english},
  keywords     = {Bioinformatics,yan cocluster},
  annotation   = {35 citations (Crossref) [2022-10-21]\\
                  JCR分区: Q1\\
                  中科院分区升级版: 计算机科学2区\\
                  影响因子: 9.24\\
                  5年影响因子: 8.032\\
                  EI: 是\\
                  CCF: A\\
                  FMS: B\\
                  JCI: 1.80\\
                  南农核心: 无\\
                  南农高质量: 无},
  file         = {/Volumes/Mac_Ext/Zotero/storage/RCUXDKPW/Finding_Correlated_Biclusters_from_Gene_Expression_Data_Yang_et_al_2011.pdf}
}
@article{yu2021CoClusteringEnsemblesBased,
  title        = {Co-Clustering Ensembles Based on Multiple Relevance Measures},
  author       = {Yu, Xianxue and Yu, Guoxian and Wang, Jun and Domeniconi, Carlotta},
  date         = {2021-04},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {TKDE},
  volume       = {33},
  number       = {4},
  pages        = {1389--1400},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2019.2942029},
  url          = {https://ieeexplore.ieee.org/abstract/document/8840892},
  urldate      = {2023-12-11},
  abstract     = {Co-clustering aims at discovering groups of both objects and features from a given data matrix. Co-clustering ensembles can produce robust co-clusters by combining multiple base co-clusterings. However, current co-clustering ensemble solutions either ignore the constraints resulting from feature-to-feature and object-to-object relevance information, or ignore feature-to-object relevance information. In this paper, we advocate that all three information sources contribute to the achievement of good consensus solutions, and propose a co-clustering ensemble (CoCE) approach based on multiple relevance measures. CoCE first evaluates the quality of base co-clusters and consequently measures feature-to-object relevance. The latter, along with feature-to-feature and object-to-object relevance measures, contribute to the definition of a hybrid graph. The consensus process uses the resulting hybrid graph; it's formulated as a trace minimization problem and introduces a block-wise matrix multiplication technique to perform the optimization. Experimental results on various datasets show that CoCE not only frequently outperforms other related co-clustering ensembles, but also has reduced runtime cost and is more robust to poor base co-clusterings.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english},
  keywords     = {/unread,⛔ No INSPIRE recid found,Bioinformatics,to follow,TODO},
  annotation   = {13 citations (CrossRef 2024/1/23)\\
                  影响因子: 8.9\\
                  CCF: A\\
                  5年影响因子: 8.9},
  file         = {/Volumes/Mac_Ext/Zotero/storage/8SWUB78X/Yu et al. - 2021 - Co-clustering ensembles based on multiple relevance measures.pdf}
}
@article{yuan2023JointNetworkTopology,
  title        = {Joint Network Topology Inference via Structural Fusion Regularization},
  author       = {Yuan, Yanli and Soh, De Wen and Guo, Kun and Xiong, Zehui and Quek, Tony Q. S.},
  date         = {2023-10},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {35},
  number       = {10},
  pages        = {10351--10364},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2023.3264971},
  url          = {https://ieeexplore.ieee.org/abstract/document/10093994?casa_token=iyl8ydQdtNEAAAAA:XAHX1NU0NYuwyDlW4WR9Is31rkCuKzEMhB7yCpsxpyTvv5J_Z7_cEUipgeQn7L0-PnjMVx9ZtlY},
  urldate      = {2024-02-09},
  abstract     = {Joint network topology inference represents a canonical problem of jointly learning multiple graph Laplacian matrices from heterogeneous graph signals. In such a problem, a widely employed assumption is that of a simple common component shared among multiple graphs. However, in practice, a more intricate topological pattern, comprising simultaneously of homogeneous and heterogeneous components, would exhibit in multiple graphs. In this paper, we propose a general graph estimator based on a novel structural fusion regularization that enables us to jointly learn multiple graphs with such complex topological patterns, and enjoys rigorous theoretical guarantees. Specifically, in the proposed regularization term, the structural similarity among graphs is characterized by a Gram matrix, which enables us to flexibly model different types of network structural similarities through different Gram matrix choices. Algorithmically, the regularization term, coupling the parameters together, makes the formulated optimization problem intractable, and thus, we develop an implementable algorithm based on the alternating direction method of multipliers (ADMM) to solve it. Theoretically, non-asymptotic statistical analysis is provided, which precisely characterizes the minimum sample size required for the consistency of the graph estimator. This analysis also provides high-probability bounds on the estimation error as a function of graph structural similarities and other key problem parameters. Finally, the superior performance of the proposed method is demonstrated through simulated and real data examples.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english},
  keywords     = {Correlation,Graph Laplacian,graph signals,Laplace equations,Network topology,network topology inference,non-asymptotic statistical analysis,regularization,Social networking (online),Statistical analysis,Task analysis,Topology},
  file         = {D\:\\zihan\\Zotero\\storage\\RL32UYSL\\Yuan 等 - 2023 - Joint network topology inference via structural fusion regularization.pdf;D\:\\zihan\\Zotero\\storage\\34XLMT56\\10093994.html}
}
@article{zhang2023AdaptiveGraphConvolution,
  title        = {Adaptive Graph Convolution Methods for Attributed Graph Clustering},
  author       = {Zhang, Xiaotong and Liu, Han and Li, Qimai and Wu, Xiao-Ming and Zhang, Xianchao},
  date         = {2023-12},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  shortjournal = {IEEE Trans. Knowl. Data Eng.},
  volume       = {35},
  number       = {12},
  pages        = {12384--12399},
  issn         = {1558-2191},
  doi          = {10.1109/TKDE.2023.3278721},
  url          = {https://ieeexplore.ieee.org/abstract/document/10130603?casa_token=9w0BFlpS3OIAAAAA:R3hJZ_ESjZ6nxT56IrrTC8T44sgQagqKkz-DerHrRaWdag45BkswpEgOi_c0fHvKPHMjKQ7JNrs},
  urldate      = {2024-02-09},
  abstract     = {Attributed graph clustering is a challenging task as it requires to jointly model graph structure and node attributes. Although recent advances in graph convolutional networks have shown the effectiveness of graph convolution in combining structural and content information, there is limited understanding of how to properly apply it for attributed graph clustering. Previous methods commonly use a fixed and low order graph convolution, which only aggregates information of few-hop neighbours and hence cannot fully capture the cluster structures of diverse graphs. In this paper, we first propose an adaptive graph convolution method (AGC) for attributed graph clustering, which exploits high-order graph convolutions to capture global cluster structures and adaptively selects an appropriate order kk via intra-cluster distance. While AGC can find a reasonable kk and avoid over-smoothing, it is not sensitive to the gradual decline of clustering performance as kk increases. To search for a better kk, we further propose an improved adaptive graph convolution method (IAGC) that not only observes the variation of intra-cluster distance, but also considers the inconsistencies of filtered features with graph structure and raw features, respectively. We establish the validity of our methods by theoretical analysis and extensive experiments on various benchmark datasets.},
  eventtitle   = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  langid       = {english},
  keywords     = {Adaptation models,Adaptive graph convolution,attributed graph clustering,Automatic generation control,Clustering methods,Convolution,Feature extraction,low-pass graph filter,Proteins,Social networking (online)},
  file         = {D\:\\zihan\\Zotero\\storage\\IETDDZCP\\Zhang 等 - 2023 - Adaptive graph convolution methods for attributed graph clustering.pdf;D\:\\zihan\\Zotero\\storage\\T59D3IJ5\\10130603.html}
}

@article{zhao2012BiclusteringAnalysisPattern,
  title        = {Biclustering Analysis for Pattern Discovery: Current Techniques, Comparative Studies and Applications},
  shorttitle   = {Biclustering {{Analysis}} for {{Pattern Discovery}}},
  author       = {Zhao, Hongya and Wee-Chung Liew, Alan and Z. Wang, Doris and Yan, Hong},
  date         = {2012-03-01},
  journaltitle = {Current Bioinformatics},
  shortjournal = {Curr. Bioinf.},
  volume       = {7},
  number       = {1},
  pages        = {43--55},
  issn         = {15748936},
  doi          = {10.2174/157489312799304413},
  url          = {http://www.eurekaselect.com/openurl/content.php?genre=article&issn=1574-8936&volume=7&issue=1&spage=43},
  urldate      = {2022-09-29},
  langid       = {english},
  keywords     = {/unread,⛔ No INSPIRE recid found,yan cocluster},
  annotation   = {31 citations (Crossref) [2022-10-21] JCR分区: Q1 中科院分区升级版: 生物学4区 影响因子: 4.85 5年影响因子: 3.111 JCI: 0.86 南农核心: 无 南农高质量: 无},
  file         = {D:\zihan\Zotero\storage\62MPAKFA\Biclustering_Analysis_for_Pattern_Discovery_Zhao_et_al_2012.pdf}
}

@article{zhao2016IdentifyingMultidimensionalCoclusters,
  title        = {Identifying Multi-Dimensional {{Co-clusters}} in Tensors Based on Hyperplane Detection in Singular Vector Spaces},
  author       = {Zhao, Hongya and Wang, Debby D. and Chen, Long and Liu, Xinyu and Yan, Hong},
  editor       = {Rapallo, Fabio},
  date         = {2016-09-06},
  journaltitle = {PLoS One},
  shortjournal = {PLoS One},
  volume       = {11},
  number       = {9},
  eprint       = {27598575},
  eprinttype   = {pmid},
  pages        = {e0162293},
  publisher    = {Public Library of Science},
  issn         = {1932-6203},
  doi          = {10.1371/journal.pone.0162293},
  url          = {https://dx.plos.org/10.1371/journal.pone.0162293},
  abstract     = {Co-clustering, often called biclustering for two-dimensional data, has found many applications, such as gene expression data analysis and text mining. Nowadays, a variety of multidimensional arrays (tensors) frequently occur in data analysis tasks, and co-clustering techniques play a key role in dealing with such datasets. Co-clusters represent coherent patterns and exhibit important properties along all the modes. Development of robust coclustering techniques is important for the detection and analysis of these patterns. In this paper, a co-clustering method based on hyperplane detection in singular vector spaces (HDSVS) is proposed. Specifically in this method, higher-order singular value decomposition (HOSVD) transforms a tensor into a core part and a singular vector matrix along each mode, whose row vectors can be clustered by a linear grouping algorithm (LGA). Meanwhile, hyperplanar patterns are extracted and successfully supported the identification of multi-dimensional co-clusters. To validate HDSVS, a number of synthetic and biological tensors were adopted. The synthetic tensors attested a favorable performance of this algorithm on noisy or overlapped data. Experiments with gene expression data and lineage data of embryonic cells further verified the reliability of HDSVS to practical problems. Moreover, the detected co-clusters are well consistent with important genetic pathways and gene ontology annotations. Finally, a series of comparisons between HDSVS and state-of-the-art methods on synthetic tensors and a yeast gene expression tensor were implemented, verifying the robust and stable performance of our method.},
  langid       = {english},
  keywords     = {to follow,yan cocluster},
  annotation   = {11 citations (Crossref) [2022-10-21]\\
                  JCR分区: Q2\\
                  中科院分区升级版: 综合性期刊3区\\
                  影响因子: 3.7\\
                  5年影响因子: 4.069\\
                  JCI: 0.88\\
                  南农核心: 无\\
                  南农高质量: 无},
  file         = {/Volumes/Mac_Ext/Zotero/storage/2NNLX28B/Identifying_Multi-Dimensional_Co-Clusters_in_Tensors_Based_on_Hyperplane_Zhao_et_al_2016.pdf}
}

@inproceedings{zhao2017DetectionCorrelatedCoclusters,
  title      = {Detection of Correlated Co-Clusters in Tensor Data Based on the Slice-Wise Factorization},
  booktitle  = {International {{Conference}} on {{Machine Learning}} and {{Cybernetics}} ({{ICMLC}})},
  author     = {Zhao, Hongya and Wei, Zhenghong and Yan, Hong},
  date       = {2017-07},
  volume     = {1},
  pages      = {182--188},
  publisher  = {IEEE},
  location   = {Ningbo, China},
  doi        = {10.1109/ICMLC.2017.8107762},
  url        = {http://ieeexplore.ieee.org/document/8107762/},
  abstract   = {As the extension of matrices, tensors are very powerful tools to model the heterogeneous multidimensional arrays in applications. Similar to bi-clustering in matrices, co-clustering can simultaneously extract the coherent patterns along all modes of tensors. In recent years, various techniques have been developed to identify co-clusters in tensor. In this paper, a novel co-clustering method based on the slice-wise full rank factorization (SFRF) is proposed. We first develop the workhorse alternating least squares (ALS) with the flexible constraint of the full rank decomposition of sliced matrices. The computation time can be greatly reduced with the compression technique based on matrix singular value decomposition (SVD). Next it is proven that the original identification of the co-clusters can be transformed to the linear grouping in the row space of every factor matrix. We use a linear grouping algorithm (LGA) to detect the geometrical patterns of the points in the corresponding row space. Finally the combination of the point index on the hyperplanes successfully supports the detection of the co-clusters in a tensor. This paper attempts to provide a flexible and fast algorithm to identify the co-clusters of tensor data based on the full rank tensor decomposition. Extensive simulations provide the empirical evidence of validity and efficiency of the proposed algorithm.},
  eventtitle = {2017 {{International Conference}} on {{Machine Learning}} and {{Cybernetics}} ({{ICMLC}})},
  langid     = {english},
  keywords   = {/unread},
  annotation = {1 citations (Crossref) [2023-04-27]\\
                EI: 是\\
                南农核心: 无\\
                南农高质量: 无\\
                CCF: A},
  file       = {/Volumes/Mac_Ext/Zotero/storage/HZ6PWSMD/Zhao et al_2017_Detection of correlated co-clusters in tensor data based on the slice-wise.pdf;/Volumes/Mac_Ext/Zotero/storage/KSD8GPI8/Zhao et al. - 2017 - Detection of correlated co-clusters in tensor data.pdf}
}

@article{zhao2023MultiviewCoclusteringMultisimilarity,
  title        = {Multi-View Co-Clustering with Multi-Similarity},
  author       = {Zhao, Ling and Ma, Yunpeng and Chen, Shanxiong and Zhou, Jun},
  date         = {2023-07-01},
  journaltitle = {Applied Intelligence},
  shortjournal = {Appl. Intell.},
  volume       = {53},
  number       = {13},
  pages        = {16961--16972},
  issn         = {1573-7497},
  doi          = {10.1007/s10489-022-04385-4},
  url          = {https://doi.org/10.1007/s10489-022-04385-4},
  urldate      = {2023-12-11},
  abstract     = {Multi-view co-clustering, which clustering the two dimensions of samples and features of multi-view data at the same time, has attracted much attention in recent years. It aims to exploit the duality of multi-view data to get better clustering results. However, most of the existing multi-view co-clustering algorithms consider the sample-feature information of the data while ignoring the sample-sample, feature-feature information, and thus cannot fully mine the potential information contained in the data. Therefore, this paper proposes a multi-view co-clustering based on multi-similarity. In particular, based on spectral clustering, we propose a method of constructing graph to improve the performance of clustering, which is no longer limited to the relevance between samples and features. At the same time, inspired by the ensemble algorithm, we use multiple co-clustering algorithms to calculate the similarity information of each view data, which makes the algorithm more robust. Compared with the existing multi-view co-clustering methods, the proposed algorithm exploits the more comprehensive similarity information in each view data, including sample-sample, feature-feature, and sample-feature similarity information. We performed experiments on several benchmark datasets. Due to mining and using more similarity information, our experimental results are better than the comparison method in the three evaluation indicators. In particular, on some data with co-occurrence features such as (word-document), our algorithm achieves better results and can obtain higher accuracy.},
  langid       = {english},
  keywords     = {/unread,Co-clustering,Ensemble,Multi-view clustering,Similarity},
  annotation   = {影响因子: 5.3 CCF: C 5年影响因子: 5.2},
  file         = {D:\zihan\Zotero\storage\TJXWYC8K\Zhao et al. - 2023 - Multi-view co-clustering with multi-similarity.pdf}
}