{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T21:46:47Z","timestamp":1743112007192,"version":"3.40.3"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031089732"},{"type":"electronic","value":"9783031089749"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-08974-9_22","type":"book-chapter","created":{"date-parts":[[2022,7,3]],"date-time":"2022-07-03T23:02:52Z","timestamp":1656889372000},"page":"275-287","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving Text Clustering Using a\u00a0New Technique for\u00a0Selecting Trustworthy Content in\u00a0Social Networks"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9263-1402","authenticated-orcid":false,"given":"J. Angel","family":"Diaz-Garcia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8809-8676","authenticated-orcid":false,"given":"Carlos","family":"Fernandez-Basso","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2711-4625","authenticated-orcid":false,"given":"Karel","family":"Guti\u00e9rrez-Batista","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1077-3173","authenticated-orcid":false,"given":"M. Dolores","family":"Ruiz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6973-477X","authenticated-orcid":false,"given":"Maria J.","family":"Martin-Bautista","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,7,4]]},"reference":[{"issue":"2","key":"22_CR1","doi-asserted-by":"publisher","first-page":"259","DOI":"10.1177\/0165551518790424","volume":"45","author":"B Abu-Salih","year":"2019","unstructured":"Abu-Salih, B., Wongthongtham, P., Chan, K.Y., Zhu, D.: CredSat: credibility ranking of users in big social data incorporating semantic analysis and temporal factor. J. Inf. Sci. 45(2), 259\u2013280 (2019)","journal-title":"J. Inf. Sci."},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Abualigah, L.M., Khader, A.T., Al-Betar, M.A.: Unsupervised feature selection technique based on genetic algorithm for improving the text clustering. In: 2016 7th International Conference on Computer Science and Information Technology (CSIT), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/CSIT.2016.7549453"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Abualigah, L.M., Khader, A.T., AlBetar, M.A., Hanandeh, E.S.: Unsupervised text feature selection technique based on particle swarm optimization algorithm for improving the text clustering. In: 1st EAI International Conference on Computer Science and Engineering, p. 169. European Alliance for Innovation (EAI) (2016)","DOI":"10.4108\/eai.27-2-2017.152282"},{"issue":"4","key":"22_CR4","doi-asserted-by":"publisher","first-page":"661","DOI":"10.1109\/TDSC.2016.2602338","volume":"15","author":"M Alrubaian","year":"2018","unstructured":"Alrubaian, M., Al-Qurishi, M., Hassan, M.M., Alamri, A.: A credibility analysis system for assessing information on twitter. IEEE Trans. Depend. Secure Comput. 15(4), 661\u2013674 (2018). https:\/\/doi.org\/10.1109\/TDSC.2016.2602338","journal-title":"IEEE Trans. Depend. Secure Comput."},{"key":"22_CR5","doi-asserted-by":"publisher","unstructured":"Alrubaian, M., AL-Qurishi, M., Alrakhami, M., Hassan, M., Alamri, A.: Reputation-based credibility analysis of Twitter social network users: reputation-based credibility analysis of Twitter social network users. Concurrency Comput. Pract. Exp. 29 (2016). https:\/\/doi.org\/10.1002\/cpe.3873","DOI":"10.1002\/cpe.3873"},{"issue":"4","key":"22_CR6","first-page":"1005","volume":"15","author":"IA Alshabeeb","year":"2020","unstructured":"Alshabeeb, I.A., Ali, N.G., Naser, S.A., Shakir, W.M.: A clustering algorithm application in Parkinson disease based on k-means method. Comput. Sci. 15(4), 1005\u20131014 (2020)","journal-title":"Comput. Sci."},{"issue":"2","key":"22_CR7","doi-asserted-by":"publisher","first-page":"373","DOI":"10.1140\/epjb\/e2004-00130-1","volume":"38","author":"A Arenas","year":"2004","unstructured":"Arenas, A., Danon, L., D\u00edaz-Guilera, A., Gleiser, P.M., Guimer\u00e1, R.: Community analysis in social networks. Eur. Phys. J. B 38(2), 373\u2013380 (2004). https:\/\/doi.org\/10.1140\/epjb\/e2004-00130-1","journal-title":"Eur. Phys. J. B"},{"issue":"1","key":"22_CR8","doi-asserted-by":"publisher","first-page":"193","DOI":"10.32604\/cmc.2020.011489","volume":"65","author":"I Arpaci","year":"2020","unstructured":"Arpaci, I., et al.: Analysis of Twitter data using evolutionary clustering during the Covid-19 pandemic. Comput. Mater. Continua 65(1), 193\u2013204 (2020)","journal-title":"Comput. Mater. Continua"},{"key":"22_CR9","unstructured":"Arthur, D., Vassilvitskii, S.: k-means++: the advantages of careful seeding. Technical report, Stanford (2006)"},{"key":"22_CR10","doi-asserted-by":"publisher","unstructured":"Asyaky, M.S., Mandala, R.: Improving the performance of HDBSCAN on short text clustering by using word embedding and UMAP. In: 2021 8th International Conference on Advanced Informatics: Concepts, Theory and Applications (ICAICTA), pp. 1\u20136 (2021). https:\/\/doi.org\/10.1109\/ICAICTA53211.2021.9640285","DOI":"10.1109\/ICAICTA53211.2021.9640285"},{"issue":"9","key":"22_CR11","first-page":"548","volume":"45","author":"MW Berry","year":"2004","unstructured":"Berry, M.W., Castellanos, M.: Survey of text mining. Comput. Rev. 45(9), 548 (2004)","journal-title":"Comput. Rev."},{"key":"22_CR12","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. arXiv preprint arXiv:1607.04606 (2016)","DOI":"10.1162\/tacl_a_00051"},{"key":"22_CR13","unstructured":"Chaudhary, G., Kshirsagar, M.: Enhanced text clustering approach using hierarchical agglomerative clustering with principal components analysis to design document recommendation system. Adv. Res. Comput. Eng. Res. Transcripts Comput. Electr. Electron. Eng. 2, 1\u201318 (2021)"},{"issue":"11","key":"22_CR14","doi-asserted-by":"publisher","first-page":"657","DOI":"10.1016\/0167-8655(91)90002-4","volume":"12","author":"RN Dave","year":"1991","unstructured":"Dave, R.N.: Characterization and detection of noise in clustering. Pattern Recogn. Lett. 12(11), 657\u2013664 (1991)","journal-title":"Pattern Recogn. Lett."},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Davies, D.L., Bouldin, D.W.: A cluster separation measure. IEEE Trans. Pattern Anal. Mach. Intell. PAMI-1(2), 224\u2013227 (1979)","DOI":"10.1109\/TPAMI.1979.4766909"},{"key":"22_CR16","series-title":"Communications in Computer and Information Science","doi-asserted-by":"publisher","first-page":"648","DOI":"10.1007\/978-3-030-50143-3_51","volume-title":"Information Processing and Management of Uncertainty in Knowledge-Based Systems","author":"JA Diaz-Garcia","year":"2020","unstructured":"Diaz-Garcia, J.A., Fernandez-Basso, C., Ruiz, M.D., Martin-Bautista, M.J.: Mining text patterns over fake and real tweets. In: Lesot, M.-J., et al. (eds.) IPMU 2020. CCIS, vol. 1238, pp. 648\u2013660. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-50143-3_51"},{"key":"22_CR17","doi-asserted-by":"publisher","first-page":"78166","DOI":"10.1109\/ACCESS.2020.2990461","volume":"8","author":"JA Diaz-Garcia","year":"2020","unstructured":"Diaz-Garcia, J.A., Ruiz, M.D., Martin-Bautista, M.J.: Non-query-based pattern mining and sentiment analysis for massive microblogging online texts. IEEE Access 8, 78166\u201378182 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.2990461","journal-title":"IEEE Access"},{"key":"22_CR18","doi-asserted-by":"crossref","unstructured":"Ghosh, S., Sharma, N., Benevenuto, F., Ganguly, N., Gummadi, K.: Cognos: crowdsourcing search for topic experts in microblogs. In: Proceedings of the 35th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 575\u2013590 (2012)","DOI":"10.1145\/2348283.2348361"},{"issue":"8","key":"22_CR19","first-page":"10","volume":"11","author":"N Godara","year":"2020","unstructured":"Godara, N., Kumar, S.: Twitter sentiment classification using machine learning techniques. Waffen-Und Kostumkunde J. 11(8), 10\u201320 (2020)","journal-title":"Waffen-Und Kostumkunde J."},{"issue":"10223","key":"22_CR20","doi-asserted-by":"publisher","first-page":"497","DOI":"10.1016\/S0140-6736(20)30183-5","volume":"395","author":"C Huang","year":"2020","unstructured":"Huang, C., et al.: Clinical features of patients infected with 2019 novel coronavirus in Wuhan, China. Lancet 395(10223), 497\u2013506 (2020)","journal-title":"Lancet"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Jalil, A.M., Hafidi, I., Alami, L., Ensa, K.: Comparative study of clustering algorithms in text mining context (2016)","DOI":"10.9781\/ijimai.2016.376"},{"key":"22_CR22","doi-asserted-by":"crossref","unstructured":"Jin, C., Zhang, S.: Micro-blog short text clustering algorithm based on bootstrapping. In: 2019 12th International Symposium on Computational Intelligence and Design (ISCID), vol. 2, pp. 264\u2013266. IEEE (2019)","DOI":"10.1109\/ISCID.2019.10143"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Jin, Y., Liu, Y., Zhang, W., Zhang, S., Lou, Y.: A novel multi-stage ensemble model with multiple k-means-based selective undersampling: an application in credit scoring. J. Intell. Fuzzy Syst. 1\u201314 (2021, Preprint)","DOI":"10.3233\/JIFS-201954"},{"issue":"6","key":"22_CR24","first-page":"90","volume":"1","author":"TM Kodinariya","year":"2013","unstructured":"Kodinariya, T.M., Makwana, P.R.: Review on determining number of cluster in k-means clustering. Int. J. 1(6), 90\u201395 (2013)","journal-title":"Int. J."},{"key":"22_CR25","doi-asserted-by":"publisher","unstructured":"Lamsal, R.: Coronavirus (Covid-19) tweets dataset (2020). https:\/\/doi.org\/10.21227\/781w-ef42","DOI":"10.21227\/781w-ef42"},{"issue":"2","key":"22_CR26","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1016\/j.dss.2009.09.003","volume":"48","author":"N Li","year":"2010","unstructured":"Li, N., Wu, D.D.: Using text mining and sentiment analysis for online forums hotspot detection and forecast. Decis. Support Syst. 48(2), 354\u2013368 (2010)","journal-title":"Decis. Support Syst."},{"issue":"2","key":"22_CR27","doi-asserted-by":"publisher","first-page":"451","DOI":"10.1016\/S0031-3203(02)00060-2","volume":"36","author":"A Likas","year":"2003","unstructured":"Likas, A., Vlassis, N., Verbeek, J.J.: The global k-means clustering algorithm. Pattern Recogn. 36(2), 451\u2013461 (2003)","journal-title":"Pattern Recogn."},{"key":"22_CR28","unstructured":"Maaten, L.v.d., Hinton, G.: Visualizing data using T-SNE. J. Mach. Learn. Res. 9(Nov), 2579\u20132605 (2008)"},{"key":"22_CR29","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"22_CR30","first-page":"3111","volume":"26","author":"T Mikolov","year":"2013","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G.S., Dean, J.: Distributed representations of words and phrases and their compositionality. Adv. Neural. Inf. Process. Syst. 26, 3111\u20133119 (2013)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"1","key":"22_CR31","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/0377-0427(87)90125-7","volume":"20","author":"P Rousseeuw","year":"1987","unstructured":"Rousseeuw, P.: Silhouettes: a graphical aid to the interpretation and validation of cluster analysis. J. Comput. Appl. Math. 20(1), 53\u201365 (1987)","journal-title":"J. Comput. Appl. Math."},{"key":"22_CR32","doi-asserted-by":"crossref","unstructured":"Shi, K., Li, L., He, J., Zhang, N., Liu, H., Song, W.: Improved GA-based text clustering algorithm. In: 2011 4th IEEE International Conference on Broadband Network and Multimedia Technology, pp. 675\u2013679. IEEE (2011)","DOI":"10.1109\/ICBNMT.2011.6156021"},{"issue":"6","key":"22_CR33","doi-asserted-by":"publisher","first-page":"80","DOI":"10.2307\/3001968","volume":"1","author":"F Wilcoxon","year":"1945","unstructured":"Wilcoxon, F.: Individual comparisons by ranking methods. Biometrics Bull. 1(6), 80\u201383 (1945)","journal-title":"Biometrics Bull."},{"key":"22_CR34","doi-asserted-by":"publisher","unstructured":"Xingliang, M., Fangfang, L.: Clustering of short text in micro-blog based on k-means algorithm. In: 2018 IEEE International Conference of Safety Produce Informatization (IICSPI), pp. 812\u2013815 (2018). https:\/\/doi.org\/10.1109\/IICSPI.2018.8690507","DOI":"10.1109\/IICSPI.2018.8690507"},{"issue":"2","key":"22_CR35","first-page":"121","volume":"1","author":"M Yedla","year":"2010","unstructured":"Yedla, M., Pathakota, S.R., Srinivasa, T.: Enhancing k-means clustering algorithm with improved initial center. Int. J. Comput. Sci. Inf. Technol. 1(2), 121\u2013125 (2010)","journal-title":"Int. J. Comput. Sci. Inf. Technol."},{"key":"22_CR36","doi-asserted-by":"publisher","unstructured":"Yuan, S., Wenbin, G.: A text clustering algorithm based on simplified cluster hypothesis. In: 2013 2nd International Symposium on Instrumentation and Measurement, Sensor Network and Automation (IMSNA), pp. 412\u2013415 (2013). https:\/\/doi.org\/10.1109\/IMSNA.2013.6743303","DOI":"10.1109\/IMSNA.2013.6743303"},{"key":"22_CR37","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1016\/j.knosys.2018.01.031","volume":"145","author":"G Zhang","year":"2018","unstructured":"Zhang, G., Zhang, C., Zhang, H.: Improved k-means algorithm based on density canopy. Knowl.-Based Syst. 145, 289\u2013297 (2018)","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"22_CR38","doi-asserted-by":"publisher","first-page":"27","DOI":"10.3390\/info11010027","volume":"11","author":"G Zhang","year":"2020","unstructured":"Zhang, G., Li, Y., Deng, X.: K-means clustering-based electrical equipment identification for smart building application. Information 11(1), 27 (2020)","journal-title":"Information"},{"issue":"7798","key":"22_CR39","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1038\/s41586-020-2012-7","volume":"579","author":"P Zhou","year":"2020","unstructured":"Zhou, P., et al.: A pneumonia outbreak associated with a new coronavirus of probable bat origin. Nature 579(7798), 270\u2013273 (2020)","journal-title":"Nature"}],"container-title":["Communications in Computer and Information Science","Information Processing and Management of Uncertainty in Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-08974-9_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,3]],"date-time":"2022-07-03T23:20:34Z","timestamp":1656890434000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-08974-9_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031089732","9783031089749"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-08974-9_22","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"4 July 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IPMU","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Information Processing and Management of Uncertainty in Knowledge-Based Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 July 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 July 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ipmu2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ipmu2022.disco.unimib.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"188","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"124","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"66% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}