{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T02:25:13Z","timestamp":1743128713457,"version":"3.40.3"},"publisher-location":"Cham","reference-count":20,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031490071"},{"type":"electronic","value":"9783031490088"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-49008-8_29","type":"book-chapter","created":{"date-parts":[[2023,12,14]],"date-time":"2023-12-14T13:04:15Z","timestamp":1702559055000},"page":"365-377","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Topic Model with Contextual Outlier Handling: a Study on Electronic Invoice Product Descriptions"],"prefix":"10.1007","author":[{"given":"Cesar","family":"Andrade","sequence":"first","affiliation":[]},{"given":"Rita P.","family":"Ribeiro","sequence":"additional","affiliation":[]},{"given":"Jo\u00e3o","family":"Gama","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,15]]},"reference":[{"key":"29_CR1","doi-asserted-by":"crossref","unstructured":"Asyaky, M.S., Mandala, R.: Improving the performance of HDBSCAN on short text clustering by using word embedding and UMAP. In: 8th International Conference on Advanced Informatics: Concepts, Theory, and Applications (ICAICTA), pp. 1\u20136. IEEE (2021)","DOI":"10.1109\/ICAICTA53211.2021.9640285"},{"key":"29_CR2","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei, D.M., Ng, A.Y., Jordan, M.I.: Latent dirichlet allocation. J. Mach. Learn. Res. 3, 993\u20131022 (2003)","journal-title":"J. Mach. Learn. Res."},{"key":"29_CR3","doi-asserted-by":"crossref","unstructured":"Campello, R.J.G.B., Moulavi, D., Sander, J.: Density-based clustering based on hierarchical density estimates. In: Pacific-Asia Conference on Knowledge Discovery and Data Mining, pp. 160\u2013172. Springer (2013)","DOI":"10.1007\/978-3-642-37456-2_14"},{"issue":"3","key":"29_CR4","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1541880.1541882","volume":"41","author":"V Chandola","year":"2009","unstructured":"Chandola, V., Banerjee, A., Kumar, V.: Anomaly detection: a survey. ACM Comput. Surv. (CSUR) 41(3), 1\u201358 (2009)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"29_CR5","unstructured":"Devlin, J., Chang, M.-W., Lee, K., Toutanova, K.: BERT: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"29_CR6","doi-asserted-by":"crossref","unstructured":"Dai, Z., Callan, J.: Deeper text understanding for IR with contextual neural language modeling. In: Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval (2019)","DOI":"10.1145\/3331184.3331303"},{"key":"29_CR7","doi-asserted-by":"crossref","unstructured":"Eklund, A., Forsman, M.: Topic modeling by clustering language model embeddings: human validation on an industry dataset. In: Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: Industry Track, pp. 635\u2013643 (2022)","DOI":"10.18653\/v1\/2022.emnlp-industry.65"},{"key":"29_CR8","doi-asserted-by":"crossref","unstructured":"Kenter, T., De Rijke, M.: Short text similarity with word embeddings. In: Proceedings of the 24th ACM Int. Conf. on Information and Knowledge Management, pp. 1411\u20131420. ACM (2015)","DOI":"10.1145\/2806416.2806475"},{"key":"29_CR9","doi-asserted-by":"crossref","unstructured":"Kumar, J., Shao, J., Uddin, S., Ali, W.: An online semantic-enhanced Dirichlet model for short text stream clustering. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 766\u2013776. Association for Computational Linguistics (2020)","DOI":"10.18653\/v1\/2020.acl-main.70"},{"key":"29_CR10","doi-asserted-by":"crossref","unstructured":"Muller, B., Sagot, B., Seddah, D.: Enhancing BERT for lexical normalization. In: The 5th Workshop on Noisy User-generated Text (W-NUT), pp. 1\u201310. Association for Computational Linguistics (2019)","DOI":"10.18653\/v1\/D19-5539"},{"key":"29_CR11","doi-asserted-by":"crossref","unstructured":"Murshed, B.A.H., Mallappa, S., Abawajy, J., Saif, M.A.N., Al-Ariki, H.D.E., Abdulwahab, H.M.: Short text topic modelling approaches in the context of big data: taxonomy, survey, and analysis. Artificial Intelligence Review, pp. 1\u2013128. Springer (2022)","DOI":"10.1007\/s10462-022-10254-w"},{"key":"29_CR12","doi-asserted-by":"crossref","unstructured":"Paalman, J., Mullick, S., Zervanou, K., Zhang, Y.: Term based semantic clusters for very short text classification. In: International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pp. 878\u2013887. INCOMA Ltd. (2019)","DOI":"10.26615\/978-954-452-056-4_102"},{"key":"29_CR13","doi-asserted-by":"crossref","unstructured":"Rakib, M.R.H., Asaduzzaman, M.: Fast clustering of short text streams using efficient cluster indexing and dynamic similarity thresholds. CoRR abs\/2101.08595 (2021)","DOI":"10.1145\/3469096.3469866"},{"key":"29_CR14","doi-asserted-by":"crossref","unstructured":"Rakib, M.R.H., Zeh, N., Milios, E.: Short text stream clustering via frequent word pairs and reassignment of outliers to clusters. In: Proceedings of the ACM Symposium on Document Engineering 2020, pp. 1\u20134. ACM (2020)","DOI":"10.1145\/3395027.3419589"},{"key":"29_CR15","doi-asserted-by":"crossref","unstructured":"Schulte, J.P., Giuntini, F.T., Nobre, R.A., Nascimento, K.C.D., Meneguette, R.I., Li, W., Gon\u00e7alves, V.P., Rocha Filho, G.P.: ELINAC: Autoencoder approach for electronic invoices data clustering. Appl. Sci. 12(6), 3008 (2022)","DOI":"10.3390\/app12063008"},{"key":"29_CR16","doi-asserted-by":"crossref","unstructured":"Si, X., Li, P., Hu, X., Zhang, Y.: An online Dirichlet model based on sentence embedding and DBSCAN for noisy short text stream clustering. In: 2022 International Joint Conference on Neural Networks (IJCNN), pp. 1\u20138. IEEE (2022)","DOI":"10.1109\/IJCNN55064.2022.9892414"},{"key":"29_CR17","first-page":"22852","volume":"10","author":"Y Xu","year":"2022","unstructured":"Xu, Y., Wang, S., Zhang, S., Wang, F.: Dynamic clustering for short text stream based on Dirichlet process. IEEE Access 10, 22852\u201322865 (2022)","journal-title":"IEEE Access"},{"key":"29_CR18","doi-asserted-by":"crossref","unstructured":"Yin, J., Wang, J., Xu, W., Gao, M.: Model-based clustering of short text streams. In: 27th ACM International Conference on Information and Knowledge Management, pp. 697\u2013706. ACM (2018)","DOI":"10.1145\/3219819.3220094"},{"key":"29_CR19","doi-asserted-by":"crossref","unstructured":"Yin, J., Wang, J.: A model-based approach for text clustering with outlier detection. In: 2016 IEEE 32nd International Conference on Data Engineering (ICDE), pp. 625\u2013636. IEEE (2016)","DOI":"10.1109\/ICDE.2016.7498276"},{"key":"29_CR20","unstructured":"Ye, Q., Sachan, D., Felix, M., Padmanabhan, S., Neubig, G.: When and Why are pre-trained word embeddings useful for Neural Machine Translation. In: HLT-NAACL. ACM (2018)"}],"container-title":["Lecture Notes in Computer Science","Progress in Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-49008-8_29","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,14]],"date-time":"2023-12-14T13:18:08Z","timestamp":1702559888000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-49008-8_29"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031490071","9783031490088"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-49008-8_29","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"15 December 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"EPIA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"EPIA Conference on Artificial Intelligence","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Faial Island","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"epia2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/epia2023.inesctec.pt\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Easy Chair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"163","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"52% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}