{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,13]],"date-time":"2026-02-13T14:07:22Z","timestamp":1770991642683,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,17]],"date-time":"2023-06-17T00:00:00Z","timestamp":1686960000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Basic Science Research Program through the National Research Foundation of Korea (NRF) funded by the Ministry of Science, ICT & Future Planning in Korea","award":["No.2020R1A2B5B02001717"],"award-info":[{"award-number":["No.2020R1A2B5B02001717"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61911540482 and 61702324"],"award-info":[{"award-number":["61911540482 and 61702324"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,17]]},"DOI":"10.1145\/3606043.3606048","type":"proceedings-article","created":{"date-parts":[[2023,11,16]],"date-time":"2023-11-16T17:08:44Z","timestamp":1700154524000},"page":"27-33","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["A Novel Distributed K-Means Clustering Algorithm for Big Text Data"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-0489-3135","authenticated-orcid":false,"given":"Min","family":"Li","sequence":"first","affiliation":[{"name":"College of Information Engineering, Shanghai Maritime University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3626-9066","authenticated-orcid":false,"given":"Meijing","family":"Li","sequence":"additional","affiliation":[{"name":"College of Information Engineering, Shanghai Maritime University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1515-1108","authenticated-orcid":false,"given":"Yonglong","family":"Cheng","sequence":"additional","affiliation":[{"name":"Big Data and Artificial Intelligence Laboratory, Industrial and Commercial Bank of China Software Development Center, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0394-9054","authenticated-orcid":false,"given":"Keun Ho","family":"Ryu","sequence":"additional","affiliation":[{"name":"Data Science Laboratory, Faculty of Information Technology, Ton Duc Thang University, Vietnam"}]}],"member":"320","published-online":{"date-parts":[[2023,11,16]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"2022","article-title":"Text Mining Based on the Lexicon-Constrained Network in the Context of Big Data[J]","author":"Wan B","year":"2022","unstructured":"Wan B , Sohail M . Text Mining Based on the Lexicon-Constrained Network in the Context of Big Data[J] . Wireless Communications and Mobile Computing , 2022 , 2022 . Wan B, Sohail M. Text Mining Based on the Lexicon-Constrained Network in the Context of Big Data[J]. Wireless Communications and Mobile Computing, 2022, 2022.","journal-title":"Wireless Communications and Mobile Computing"},{"key":"e_1_3_2_1_2_1","first-page":"91","article-title":"Multi-Label Classification Using Problem Transformation Approach and Machine Learning on Text Mining for Multiple Event Detection[J]. Cyber Physical","volume":"2021","author":"Safari H","unstructured":"Safari H , Mutijarsa K . Multi-Label Classification Using Problem Transformation Approach and Machine Learning on Text Mining for Multiple Event Detection[J]. Cyber Physical , Computer and Automation System: A Study of New Technologies , 2021 : 91 - 105 . Safari H, Mutijarsa K. Multi-Label Classification Using Problem Transformation Approach and Machine Learning on Text Mining for Multiple Event Detection[J]. Cyber Physical, Computer and Automation System: A Study of New Technologies, 2021: 91-105.","journal-title":"Computer and Automation System: A Study of New Technologies"},{"key":"e_1_3_2_1_3_1","article-title":"Learning discriminative text representation for streaming social event detection[J]","author":"Tong C","year":"2021","unstructured":"Tong C , Peng H , Bai X , Learning discriminative text representation for streaming social event detection[J] . IEEE Transactions on Knowledge and Data Engineering , 2021 . Tong C, Peng H, Bai X, Learning discriminative text representation for streaming social event detection[J]. IEEE Transactions on Knowledge and Data Engineering, 2021.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-022-04708-9"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3233\/JIFS-189239"},{"key":"e_1_3_2_1_6_1","first-page":"2022","article-title":"Research on text similarity measurement hybrid algorithm with term semantic information and TF-IDF method[J]","author":"Lan F","year":"2022","unstructured":"Lan F . Research on text similarity measurement hybrid algorithm with term semantic information and TF-IDF method[J] . Advances in Multimedia , 2022 , 2022 . Lan F. Research on text similarity measurement hybrid algorithm with term semantic information and TF-IDF method[J]. Advances in Multimedia, 2022, 2022.","journal-title":"Advances in Multimedia"},{"key":"e_1_3_2_1_7_1","volume-title":"Approaches for semantic textual similarity[J]","author":"Chengcheng H A N","year":"2020","unstructured":"Chengcheng H A N , Lei L I , Tingting L I U , Approaches for semantic textual similarity[J] . Journal of East China Normal University (Natural Science) , 2020 , 2020(5): 95. Chengcheng H A N, Lei L I, Tingting L I U, Approaches for semantic textual similarity[J]. Journal of East China Normal University (Natural Science), 2020, 2020(5): 95."},{"issue":"3","key":"e_1_3_2_1_8_1","first-page":"158","article-title":"A review of text similarity approaches[J]","volume":"37","author":"Wang C L","year":"2019","unstructured":"Wang C L , Yang Y H , Deng F , A review of text similarity approaches[J] . Information Science , 2019 , 37 ( 3 ): 158 - 168 . Wang C L, Yang Y H, Deng F, A review of text similarity approaches[J]. Information Science, 2019, 37(3): 158-168.","journal-title":"Information Science"},{"key":"e_1_3_2_1_9_1","volume-title":"A solution to Plato's problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge[J]. Psychological review","author":"Landauer T K","year":"1997","unstructured":"Landauer T K , Dumais S T . A solution to Plato's problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge[J]. Psychological review , 1997 , 104(2): 211. Landauer T K, Dumais S T. A solution to Plato's problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge[J]. Psychological review, 1997, 104(2): 211."},{"issue":"6","key":"e_1_3_2_1_10_1","first-page":"289","article-title":"Probabilistic latent semantic analysis [J]","volume":"15","year":"1999","unstructured":"HOFMANN T . Probabilistic latent semantic analysis [J] . Uncertainty in Artificial Intelligence , 1999 , 15 ( 6 ): 289 - 296 . HOFMANN T. Probabilistic latent semantic analysis [J]. Uncertainty in Artificial Intelligence, 1999, 15(6): 289-296.","journal-title":"Uncertainty in Artificial Intelligence"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2019.102188"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1017\/S1351324916000334"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Pennington J Socher R Manning C D. Glove: Global vectors for word representation[C]\/\/Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP). 2014: 1532-1543.  Pennington J Socher R Manning C D. Glove: Global vectors for word representation[C]\/\/Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP). 2014: 1532-1543.","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_2_1_14_1","volume-title":"Lee K","author":"Devlin J","year":"1810","unstructured":"D Devlin J , Chang M W , Lee K , Bert : Pre-training of de ep bidirectional transformers for language understanding[J]. arXiv preprint arXiv: 1810 .04805, 2018. D Devlin J, Chang M W, Lee K, Bert: Pre-training of deep bidirectional transformers for language understanding[J]. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_1_15_1","first-page":"345","article-title":"Improving the initial centroids of k-means clustering algorithm to generalize its applicability[J]. Journal of The Institution of Engineers (India)","volume":"95","author":"Goyal M","year":"2014","unstructured":"Goyal M , Kumar S . Improving the initial centroids of k-means clustering algorithm to generalize its applicability[J]. Journal of The Institution of Engineers (India) : Series B , 2014 , 95 : 345 - 350 . Goyal M, Kumar S. Improving the initial centroids of k-means clustering algorithm to generalize its applicability[J]. Journal of The Institution of Engineers (India): Series B, 2014, 95: 345-350.","journal-title":"Series B"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2011.05.016"},{"key":"e_1_3_2_1_17_1","volume-title":"Data clustering: a review[J]. ACM computing surveys (CSUR)","author":"Jain A K","year":"1999","unstructured":"Jain A K , Murty M N , Flynn P J . Data clustering: a review[J]. ACM computing surveys (CSUR) , 1999 , 31(3): 264-323. Jain A K, Murty M N, Flynn P J. Data clustering: a review[J]. ACM computing surveys (CSUR), 1999, 31(3): 264-323."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10723-019-09475-1"},{"key":"e_1_3_2_1_19_1","volume-title":"Concept decompositions for large sparse text data using clustering[J]. Machine learning","author":"Dhillon I S","year":"2001","unstructured":"Dhillon I S , Modha D S . Concept decompositions for large sparse text data using clustering[J]. Machine learning , 2001 , 42: 143-175. Dhillon I S, Modha D S. Concept decompositions for large sparse text data using clustering[J]. Machine learning, 2001, 42: 143-175."},{"key":"e_1_3_2_1_20_1","volume-title":"A survey of clustering algorithms for big data: Taxonomy and empirical analysis[J]","author":"Fahad A","year":"2014","unstructured":"Fahad A , Alshatri N , Tari Z , A survey of clustering algorithms for big data: Taxonomy and empirical analysis[J] . IEEE transactions on emerging topics in computing, 2014 , 2(3): 267-279. Fahad A, Alshatri N, Tari Z, A survey of clustering algorithms for big data: Taxonomy and empirical analysis[J]. IEEE transactions on emerging topics in computing, 2014, 2(3): 267-279."},{"key":"e_1_3_2_1_21_1","volume-title":"Data clustering: 50 years beyond K-means[J]. Pattern recognition letters","author":"Jain A K","year":"2010","unstructured":"Jain A K . Data clustering: 50 years beyond K-means[J]. Pattern recognition letters , 2010 , 31(8): 651-666. Jain A K. Data clustering: 50 years beyond K-means[J]. Pattern recognition letters, 2010, 31(8): 651-666."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2014.10.012"},{"issue":"1","key":"e_1_3_2_1_23_1","first-page":"61","article-title":"The parallel implementation and application of an improved K-means algorithm[J]","volume":"46","author":"Li X","year":"2017","unstructured":"Li X , Yu L , Lei H , The parallel implementation and application of an improved K-means algorithm[J] . Journal of University of Electronic Science and Technology of China , 2017 , 46 ( 1 ): 61 - 68 . Li X, Yu L, Lei H, The parallel implementation and application of an improved K-means algorithm[J]. Journal of University of Electronic Science and Technology of China, 2017, 46(1): 61-68.","journal-title":"Journal of University of Electronic Science and Technology of China"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.4304\/jetwi.4.1.51-59"},{"key":"e_1_3_2_1_25_1","volume-title":"SODA 2007","author":"Arthur D","year":"2007","unstructured":"Arthur D , Vassilvitskii S . K- Means ++ : The Advantages of Careful Seeding[C]\/\/ Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms , SODA 2007 , New Orleans, Louisiana, USA , January 7-9, 2007 . ACM, 2007. Arthur D , Vassilvitskii S . K-Means++: The Advantages of Careful Seeding[C]\/\/ Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, SODA 2007, New Orleans, Louisiana, USA, January 7-9, 2007. ACM, 2007."},{"key":"e_1_3_2_1_26_1","volume-title":"Based on the local density of nuclear K-means algorithm [J][J]. Computer application research","author":"Li M N","year":"2011","unstructured":"Li M N , Zhu Y Q , Chen G , Based on the local density of nuclear K-means algorithm [J][J]. Computer application research , 2011 , 28(1): 78-80+ 90. Li M N, Zhu Y Q, Chen G, Based on the local density of nuclear K-means algorithm [J][J]. Computer application research, 2011, 28(1): 78-80+ 90."},{"issue":"2","key":"e_1_3_2_1_27_1","first-page":"134","article-title":"A k-means Algorithm based on the radius [J]","volume":"33","author":"Liu J X","year":"2013","unstructured":"Liu J X , Zhu G H , Xi M . A k-means Algorithm based on the radius [J] . Journal of Guilin University of Electronic Technology , 2013 , 33 ( 2 ): 134 - 138 . Liu J X , Zhu G H, Xi M. A k-means Algorithm based on the radius [J]. Journal of Guilin University of Electronic Technology, 2013,33(2):134-138.","journal-title":"Journal of Guilin University of Electronic Technology"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2020.113288"},{"key":"e_1_3_2_1_29_1","volume-title":"Term-weighting approaches in automatic text retrieval[J]. Information processing & management","author":"Salton G","year":"1988","unstructured":"Salton G , Buckley C. Term-weighting approaches in automatic text retrieval[J]. Information processing & management , 1988 , 24(5): 513-523. Salton G, Buckley C. Term-weighting approaches in automatic text retrieval[J]. Information processing & management, 1988, 24(5): 513-523."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/1327452.1327492"},{"key":"e_1_3_2_1_31_1","volume-title":"HDFS architecture guide[J]. Hadoop apache project","author":"Borthakur D.","year":"2008","unstructured":"Borthakur D. HDFS architecture guide[J]. Hadoop apache project , 2008 , 53(1-13): 2. Borthakur D. HDFS architecture guide[J]. Hadoop apache project, 2008, 53(1-13): 2."}],"event":{"name":"HP3C 2023: 2023 7th International Conference on High Performance Compilation, Computing and Communications","location":"Jinan China","acronym":"HP3C 2023"},"container-title":["Proceedings of the 2023 7th International Conference on High Performance Compilation, Computing and Communications"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3606043.3606048","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3606043.3606048","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:46:07Z","timestamp":1750178767000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3606043.3606048"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,17]]},"references-count":31,"alternative-id":["10.1145\/3606043.3606048","10.1145\/3606043"],"URL":"https:\/\/doi.org\/10.1145\/3606043.3606048","relation":{},"subject":[],"published":{"date-parts":[[2023,6,17]]},"assertion":[{"value":"2023-11-16","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}