{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T11:42:05Z","timestamp":1761824525993,"version":"3.40.3"},"publisher-location":"Cham","reference-count":23,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031790348"},{"type":"electronic","value":"9783031790355"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-79035-5_17","type":"book-chapter","created":{"date-parts":[[2025,1,29]],"date-time":"2025-01-29T22:07:50Z","timestamp":1738188470000},"page":"245-259","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Evaluating Short Text Stream Clustering on\u00a0Large E-commerce Datasets"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-2218-6648","authenticated-orcid":false,"given":"Cesar","family":"Andrade","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6852-8077","authenticated-orcid":false,"given":"Rita P.","family":"Ribeiro","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3357-1195","authenticated-orcid":false,"given":"Jo\u00e3o","family":"Gama","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,30]]},"reference":[{"key":"17_CR1","doi-asserted-by":"publisher","unstructured":"Cheng, X., Yan, X., Lan, Y., Guo, J.: BTM: topic modeling over short texts. In: IEEE Transactions on Knowledge and Data Engineering, vol. 26, no. 12, pp. 2928\u20132941 (2014). https:\/\/doi.org\/10.1109\/TKDE.2014.2313872","DOI":"10.1109\/TKDE.2014.2313872"},{"issue":"11","key":"17_CR2","doi-asserted-by":"publisher","first-page":"P11006","DOI":"10.1088\/1742-5468\/2015\/11\/P11006","volume":"2015","author":"P Zhang","year":"2015","unstructured":"Zhang, P.: Evaluating accuracy of community detection using the relative normalized mutual information. J. Stat. Mech: Theory Exp. 2015(11), P11006 (2015)","journal-title":"J. Stat. Mech: Theory Exp."},{"key":"17_CR3","unstructured":"Jerdee, M., Kirkley, A., Newman, M.E.J.: Normalized mutual information is a biased measure for classification and community detection. arXiv preprint arXiv:2307.01282 (2023)"},{"key":"17_CR4","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Campello, R.J.G.B., Moulavi, D., Sander, J.: Density-based clustering based on hierarchical density estimates. In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 160\u2013172. Springer (2013)","DOI":"10.1007\/978-3-642-37456-2_14"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Dai, Z., Callan, J.: Deeper text understanding for IR with contextual neural language modeling. In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval (2019)","DOI":"10.1145\/3331184.3331303"},{"key":"17_CR7","doi-asserted-by":"crossref","unstructured":"Asyaky, M.S., Mandala, R.: Improving the performance of HDBSCAN on short text clustering by using word embedding and UMAP. In: 8th International Conference on Advanced Informatics: Concepts, Theory, and Applications (ICAICTA), pp. 1\u20136. IEEE (2021)","DOI":"10.1109\/ICAICTA53211.2021.9640285"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Eklund, A., Forsman, M.: Topic modeling by clustering language model embeddings: human validation on an industry dataset. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 635\u2013643 (2022)","DOI":"10.18653\/v1\/2022.emnlp-industry.65"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Schulte, J.P., et al.: ELINAC: autoencoder approach for electronic invoices data clustering. Appl. Sci. 12(6), 3008 (2022)","DOI":"10.3390\/app12063008"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Andrade, C., Ribeiro, R.P., Gama, J.: Topic model with contextual outlier handling: a study on electronic invoice product descriptions. In: EPIA Conference on Artificial Intelligence, pp. 365\u2013377. Springer (2023)","DOI":"10.1007\/978-3-031-49008-8_29"},{"key":"17_CR11","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Learn. Res. 3(Jan), 993\u20131022 (2003)"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Liang, S., Yilmaz, E., Kanoulas, E.: Dynamic clustering of streaming short documents. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 995\u20131004 (2016)","DOI":"10.1145\/2939672.2939748"},{"key":"17_CR13","unstructured":"Yin, J., Wang, J., Xu, W., Gao, M.: Model-based clustering of short text streams. In: 27th ACM International Conference on Information and Knowledge Management, pp. 697\u2013706. ACM (2018)"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Chen, J., Gong, Z., Liu, W.: A nonparametric model for online topic discovery with word embeddings. Inf. Sci. 504, 32\u201347 (2019). Elsevier","DOI":"10.1016\/j.ins.2019.07.048"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Chen, J., Gong, Z., Liu, W.: A Dirichlet process Biterm-based mixture model for short text stream clustering. Appl. Intell. 50, 1609\u20131619 (2020). Springer","DOI":"10.1007\/s10489-019-01606-1"},{"key":"17_CR16","first-page":"22852","volume":"10","author":"Y Xu","year":"2022","unstructured":"Xu, Y., Wang, S., Zhang, S., Wang, F.: Dynamic clustering for short text stream based on Dirichlet process. IEEE Access 10, 22852\u201322865 (2022)","journal-title":"IEEE Access"},{"key":"17_CR17","unstructured":"Rakib, M.R.H., Asaduzzaman, M.: Fast clustering of short text streams using efficient cluster indexing and dynamic similarity thresholds. CoRR abs\/2101.08595 (2021)"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Rakib, M.R.H., Zeh, N., Jankowska, M., Milios, E.: Enhancement of short text clustering by iterative classification. In: Natural Language Processing and Information Systems: 25th International Conference on Applications of Natural Language to Information Systems, NLDB 2020, Saarbr\u00fccken, Germany, June 24\u201326, 2020, Proceedings 25, pp. 105\u2013117. Springer (2020)","DOI":"10.1007\/978-3-030-51310-8_10"},{"key":"17_CR19","doi-asserted-by":"crossref","unstructured":"Kumar, J., Shao, J., Uddin, S., Ali, W.: An online semantic-enhanced Dirichlet model for short text stream clustering. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 766\u2013776. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.70"},{"key":"17_CR20","doi-asserted-by":"crossref","unstructured":"Kumar, J., Ud Din, S., Yang, Q., Kumar, R., Shao, J.: An online semantic-enhanced graphical model for evolving short text stream clustering. IEEE Trans. Cybern. 52(12), 13809\u201313820 (2021)","DOI":"10.1109\/TCYB.2021.3108897"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Kumar, J., Shao, J., Kumar, R., Ud Din, S., Mawuli, C.B., Yang, Q.: A context-enhanced Dirichlet model for online clustering in short text streams. In: Expert Systems with Applications, vol. 228, p. 120262 (2023)","DOI":"10.1016\/j.eswa.2023.120262"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Rakib, M.R.H., Zeh, N., Milios, E.: Efficient clustering of short text streams using online-offline clustering. In: Proceedings of the 21st ACM Symposium on Document Engineering, pp. 1\u201310 (2021)","DOI":"10.1145\/3469096.3469866"},{"key":"17_CR23","doi-asserted-by":"crossref","unstructured":"Murshed, B.A.H., et al.: Short text topic modelling approaches in the context of big data: taxonomy, survey, and analysis. Artif. Intell. Rev. 56(6), 5133\u20135260 (2023)","DOI":"10.1007\/s10462-022-10254-w"}],"container-title":["Lecture Notes in Computer Science","Intelligent Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-79035-5_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,29]],"date-time":"2025-01-29T22:08:11Z","timestamp":1738188491000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-79035-5_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031790348","9783031790355"],"references-count":23,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-79035-5_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"30 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"BRACIS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazilian Conference on Intelligent Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Bel\u00e9m do Par\u00e1","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brazil","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 November 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21 November 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"34","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"bracis2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}