{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T14:34:36Z","timestamp":1773498876172,"version":"3.50.1"},"reference-count":76,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2023,4,8]],"date-time":"2023-04-08T00:00:00Z","timestamp":1680912000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,4,8]],"date-time":"2023-04-08T00:00:00Z","timestamp":1680912000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Scientometrics"],"published-print":{"date-parts":[[2023,5]]},"DOI":"10.1007\/s11192-023-04689-3","type":"journal-article","created":{"date-parts":[[2023,4,8]],"date-time":"2023-04-08T13:02:41Z","timestamp":1680958961000},"page":"3197-3224","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Research proposal content extraction using natural language processing and semi-supervised clustering: A demonstration and comparative analysis"],"prefix":"10.1007","volume":"128","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8450-3198","authenticated-orcid":false,"given":"Benjamin M.","family":"Knisely","sequence":"first","affiliation":[]},{"given":"Holly H.","family":"Pavliscsak","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,4,8]]},"reference":[{"key":"4689_CR1","doi-asserted-by":"publisher","unstructured":"Afzali, M., & Kumar, S. (2019). Text Document Clustering Issues: and Challenges. International Conference on Machine Learning, Big Data, Cloud and Parallel Computing (COMITCon), 1, 263\u2013268 https:\/\/doi.org\/10.1109\/COMITCon.2019.8862247","DOI":"10.1109\/COMITCon.2019.8862247"},{"key":"4689_CR2","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/978-3-030-51935-3_34","volume-title":"Image and Signal Processing","author":"M Allaoui","year":"2020","unstructured":"Allaoui, M., Kherfi, M. L., & Cheriet, A. (2020). Considerably Improving Clustering Algorithms Using UMAP Dimensionality Reduction Technique: A Comparative Study. In A. El Moataz, D. Mammass, A. Mansouri, & F. Nouboud (Eds.), Image and Signal Processing (pp. 317\u2013325). Springer International Publishing."},{"key":"4689_CR3","unstructured":"Almeida, F., & Xex\u00e9o, G. (2019). Word Embeddings: A Survey. http:\/\/arxiv.org\/abs\/1901.09069"},{"issue":"2","key":"4689_CR4","doi-asserted-by":"publisher","first-page":"142","DOI":"10.1177\/1063293X20982973","volume":"29","author":"IO Arnarsson","year":"2021","unstructured":"Arnarsson, I. O., Frost, O., Gustavsson, E., Jirstrand, M., & Malmqvist, J. (2021). Natural language processing methods for knowledge management-applying document clustering for fast search and grouping of engineering documents. Concurrent Engineering, 29(2), 142\u2013152. https:\/\/doi.org\/10.1177\/1063293X20982973","journal-title":"Concurrent Engineering"},{"key":"4689_CR5","doi-asserted-by":"publisher","unstructured":"Asyaky, M. S., & Mandala, R. (2021). Improving the Performance of HDBSCAN on Short Text Clustering by Using Word Embedding and UMAP. 2021 8th International Conference on Advanced Informatics: Concepts, Theory and Applications (ICAICTA), 1\u20136. https:\/\/doi.org\/10.1109\/ICAICTA53211.2021.9640285","DOI":"10.1109\/ICAICTA53211.2021.9640285"},{"key":"4689_CR6","doi-asserted-by":"publisher","unstructured":"Babaki, B. (2017). COP-Kmeans version 1.5. https:\/\/doi.org\/10.5281\/zenodo.831850","DOI":"10.5281\/zenodo.831850"},{"issue":"5","key":"4689_CR7","doi-asserted-by":"publisher","first-page":"349","DOI":"10.1002\/wics.1270","volume":"5","author":"E Bair","year":"2013","unstructured":"Bair, E. (2013). Semi-supervised clustering methods. Wiley Interdisciplinary Reviews. Computational Statistics, 5(5), 349\u2013361. https:\/\/doi.org\/10.1002\/wics.1270","journal-title":"Wiley Interdisciplinary Reviews. Computational Statistics"},{"key":"4689_CR8","first-page":"20","volume":"41","author":"S Bajpai","year":"2015","unstructured":"Bajpai, S., Bajpai, R., & Chaturvedi, H. (2015). Evaluation of inter-rater agreement and inter-rater reliability for observational data: An overview of concepts and methods. Journal of the Indian Academy of Applied Psychology, 41, 20\u201327.","journal-title":"Journal of the Indian Academy of Applied Psychology"},{"issue":"10","key":"4689_CR9","first-page":"281","volume":"13","author":"J Bergstra","year":"2012","unstructured":"Bergstra, J., & Bengio, Y. (2012). Random search for hyper-parameter optimization. Journal of Machine Learning Research, 13(10), 281\u2013305.","journal-title":"Journal of Machine Learning Research"},{"issue":"6","key":"4689_CR10","doi-asserted-by":"publisher","first-page":"103069","DOI":"10.1016\/j.ipm.2022.103069","volume":"59","author":"P Bhattacharya","year":"2022","unstructured":"Bhattacharya, P., Ghosh, K., Pal, A., & Ghosh, S. (2022). Legal case document similarity: You need both network and text. Information Processing & Management, 59(6), 103069. https:\/\/doi.org\/10.1016\/j.ipm.2022.103069","journal-title":"Information Processing & Management"},{"key":"4689_CR11","volume-title":"Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit","author":"S Bird","year":"2009","unstructured":"Bird, S., Klein, E., & Loper, E. (2009). Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit (1st ed.). O\u2019Reilly Media.","edition":"1"},{"issue":"2","key":"4689_CR12","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/s11192-017-2609-2","volume":"114","author":"KW Boyack","year":"2018","unstructured":"Boyack, K. W., Smith, C., & Klavans, R. (2018). Toward predicting research proposal success. Scientometrics, 114(2), 449\u2013461. https:\/\/doi.org\/10.1007\/s11192-017-2609-2","journal-title":"Scientometrics"},{"key":"4689_CR13","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v025.i04","author":"G Brock","year":"2008","unstructured":"Brock, G., Pihur, V., Datta, S., & Datta, S. (2008). clValid: An R package for cluster validation. Journal of Statistical Software. https:\/\/doi.org\/10.18637\/jss.v025.i04","journal-title":"Journal of Statistical Software"},{"issue":"6","key":"4689_CR14","doi-asserted-by":"publisher","first-page":"807","DOI":"10.1016\/j.cptl.2018.03.019","volume":"10","author":"A Castleberry","year":"2018","unstructured":"Castleberry, A., & Nolen, A. (2018). Thematic analysis of qualitative research data: Is it as easy as it sounds? Currents in Pharmacy Teaching and Learning, 10(6), 807\u2013815. https:\/\/doi.org\/10.1016\/j.cptl.2018.03.019","journal-title":"Currents in Pharmacy Teaching and Learning"},{"issue":"3","key":"4689_CR15","doi-asserted-by":"publisher","first-page":"241","DOI":"10.1080\/09537325.2013.850477","volume":"26","author":"S Choi","year":"2014","unstructured":"Choi, S., & Jun, S. (2014). Vacant technology forecasting using new Bayesian patent clustering. Technology Analysis & Strategic Management, 26(3), 241\u2013251. https:\/\/doi.org\/10.1080\/09537325.2013.850477","journal-title":"Technology Analysis & Strategic Management"},{"key":"4689_CR16","doi-asserted-by":"publisher","unstructured":"Cohan, A., Beltagy, I., King, D., Dalvi, B., & Weld, D. S. (2019). Pretrained Language Models for Sequential Sentence Classification. Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP), 3691\u20133697. https:\/\/doi.org\/10.18653\/v1\/D19-1383","DOI":"10.18653\/v1\/D19-1383"},{"issue":"2","key":"4689_CR17","doi-asserted-by":"publisher","first-page":"102034","DOI":"10.1016\/j.ipm.2019.04.002","volume":"57","author":"SA Curiskis","year":"2020","unstructured":"Curiskis, S. A., Drake, B., Osborn, T. R., & Kennedy, P. J. (2020). An evaluation of document clustering and topic modelling in two online social networks: Twitter and Reddit. Information Processing & Management, 57(2), 102034. https:\/\/doi.org\/10.1016\/j.ipm.2019.04.002","journal-title":"Information Processing & Management"},{"key":"4689_CR18","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K., & Toutanova, K. (2019). BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 1, 4171\u20134186 https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"issue":"5","key":"4689_CR19","doi-asserted-by":"publisher","first-page":"5497","DOI":"10.3233\/JIFS-189871","volume":"41","author":"J Dhanani","year":"2021","unstructured":"Dhanani, J., Mehta, R., & Rana, D. (2021). Legal document recommendation system: A cluster based pairwise similarity computation. Journal of Intelligent & Fuzzy Systems, 41(5), 5497\u20135509. https:\/\/doi.org\/10.3233\/JIFS-189871","journal-title":"Journal of Intelligent & Fuzzy Systems"},{"issue":"2","key":"4689_CR20","doi-asserted-by":"publisher","first-page":"101018","DOI":"10.1016\/j.joi.2020.101018","volume":"14","author":"A Ebadi","year":"2020","unstructured":"Ebadi, A., Tremblay, S., Goutte, C., & Schiffauerova, A. (2020). Application of machine learning techniques to assess the trends and alignment of the funded research output. Journal of Informetrics, 14(2), 101018. https:\/\/doi.org\/10.1016\/j.joi.2020.101018","journal-title":"Journal of Informetrics"},{"issue":"4","key":"4689_CR21","doi-asserted-by":"publisher","first-page":"433","DOI":"10.1093\/scipol\/scy026","volume":"45","author":"J Edler","year":"2018","unstructured":"Edler, J., & Boon, W. P. (2018). \u2018The next generation of innovation policy: Directionality and the role of demand-oriented instruments\u2019\u2014Introduction to the special section. Science and Public Policy, 45(4), 433\u2013434. https:\/\/doi.org\/10.1093\/scipol\/scy026","journal-title":"Science and Public Policy"},{"key":"4689_CR22","doi-asserted-by":"publisher","first-page":"113679","DOI":"10.1016\/j.eswa.2020.113679","volume":"165","author":"WS El-Kassas","year":"2021","unstructured":"El-Kassas, W. S., Salama, C. R., Rafea, A. A., & Mohamed, H. K. (2021). Automatic text summarization: A comprehensive survey. Expert Systems with Applications, 165, 113679. https:\/\/doi.org\/10.1016\/j.eswa.2020.113679","journal-title":"Expert Systems with Applications"},{"issue":"4","key":"4689_CR23","doi-asserted-by":"publisher","first-page":"442","DOI":"10.1093\/reseval\/rvw016","volume":"25","author":"CA Freyman","year":"2016","unstructured":"Freyman, C. A., Byrnes, J. J., & Alexander, J. (2016). Machine-learning-based classification of research grant award records. Research Evaluation, 25(4), 442\u2013450. https:\/\/doi.org\/10.1093\/reseval\/rvw016","journal-title":"Research Evaluation"},{"key":"4689_CR24","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1007\/978-3-642-30111-7_25","volume-title":"Advances in Computer Science, Engineering & Applications","author":"S Gajawada","year":"2012","unstructured":"Gajawada, S., & Toshniwal, D. (2012). Hybrid Cluster Validation Techniques. In D. C. Wyld, J. Zizka, & D. Nagamalai (Eds.), Advances in Computer Science, Engineering & Applications (pp. 267\u2013273). Springer. https:\/\/doi.org\/10.1007\/978-3-642-30111-7_25"},{"issue":"1","key":"4689_CR25","first-page":"3049","volume":"18","author":"AJ Gates","year":"2017","unstructured":"Gates, A. J., & Ahn, Y.-Y. (2017). The impact of random models on clustering similarity. The Journal of Machine Learning Research, 18(1), 3049\u20133076.","journal-title":"The Journal of Machine Learning Research"},{"key":"4689_CR26","doi-asserted-by":"publisher","first-page":"116551","DOI":"10.1016\/j.eswa.2022.116551","volume":"195","author":"Z Ghasemi","year":"2022","unstructured":"Ghasemi, Z., Khorshidi, H. A., & Aickelin, U. (2022). Multi-objective Semi-supervised clustering for finding predictive clusters. Expert Systems with Applications, 195, 116551. https:\/\/doi.org\/10.1016\/j.eswa.2022.116551","journal-title":"Expert Systems with Applications"},{"issue":"3","key":"4689_CR27","doi-asserted-by":"publisher","first-page":"330","DOI":"10.1016\/j.sapharm.2012.04.004","volume":"9","author":"N Gisev","year":"2013","unstructured":"Gisev, N., Bell, J. S., & Chen, T. F. (2013). Interrater agreement and interrater reliability: Key concepts, approaches, and applications. Research in Social and Administrative Pharmacy, 9(3), 330\u2013338. https:\/\/doi.org\/10.1016\/j.sapharm.2012.04.004","journal-title":"Research in Social and Administrative Pharmacy"},{"issue":"3","key":"4689_CR28","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1111\/coin.12064","volume":"32","author":"Y Hu","year":"2016","unstructured":"Hu, Y., Milios, E. E., & Blustein, J. (2016). Document clustering with dual supervision through feature reweighting. Computational Intelligence, 32(3), 480\u2013513. https:\/\/doi.org\/10.1111\/coin.12064","journal-title":"Computational Intelligence"},{"issue":"6","key":"4689_CR29","doi-asserted-by":"publisher","first-page":"102683","DOI":"10.1016\/j.ipm.2021.102683","volume":"58","author":"P Jim\u00e9nez","year":"2021","unstructured":"Jim\u00e9nez, P., Rold\u00e1n, J. C., & Corchuelo, R. (2021). A clustering approach to extract data from HTML tables. Information Processing & Management, 58(6), 102683. https:\/\/doi.org\/10.1016\/j.ipm.2021.102683","journal-title":"Information Processing & Management"},{"key":"4689_CR30","doi-asserted-by":"publisher","DOI":"10.46743\/2160-3715\/2019.4120","author":"N Kalpokaite","year":"2019","unstructured":"Kalpokaite, N., & Radivojevic, I. (2019). Demystifying qualitative data analysis for novice qualitative researchers. The Qualitative Report. https:\/\/doi.org\/10.46743\/2160-3715\/2019.4120","journal-title":"The Qualitative Report"},{"issue":"1","key":"4689_CR31","doi-asserted-by":"publisher","first-page":"102816","DOI":"10.1016\/j.ipm.2021.102816","volume":"59","author":"K Kaya","year":"2022","unstructured":"Kaya, K., Y\u0131lmaz, Y., Yaslan, Y., \u00d6\u011f\u00fcd\u00fcc\u00fc, \u015eG., & \u00c7\u0131ng\u0131, F. (2022). Demand forecasting model using hotel clustering findings for hospitality industry. Information Processing & Management, 59(1), 102816. https:\/\/doi.org\/10.1016\/j.ipm.2021.102816","journal-title":"Information Processing & Management"},{"key":"4689_CR32","doi-asserted-by":"publisher","first-page":"100057","DOI":"10.1016\/j.yjbinx.2019.100057","volume":"100","author":"FK Khattak","year":"2019","unstructured":"Khattak, F. K., Jeblee, S., Pou-Prom, C., Abdalla, M., Meaney, C., & Rudzicz, F. (2019). A survey of word embeddings for clinical text. Journal of Biomedical Informatics, 100, 100057. https:\/\/doi.org\/10.1016\/j.yjbinx.2019.100057","journal-title":"Journal of Biomedical Informatics"},{"issue":"2","key":"4689_CR33","doi-asserted-by":"publisher","first-page":"563","DOI":"10.1007\/s11192-020-03396-7","volume":"123","author":"J Kim","year":"2020","unstructured":"Kim, J., Yoon, J., Park, E., & Choi, S. (2020). Patent document clustering with deep embeddings. Scientometrics, 123(2), 563\u2013577. https:\/\/doi.org\/10.1007\/s11192-020-03396-7","journal-title":"Scientometrics"},{"key":"4689_CR34","doi-asserted-by":"crossref","unstructured":"Levine, C. S., Knisely, B., Johnson, D., & Vaughn-Cooke, M. (2022). A structured method to achieve cognitive depth for medical device use error topic modeling. Human Factors in Healthcare, 2, 100016. https:\/\/doi.org\/10.1016\/j.hfh.2022.100016","DOI":"10.1016\/j.hfh.2022.100016"},{"key":"4689_CR35","doi-asserted-by":"publisher","first-page":"1426","DOI":"10.1109\/ITNEC48623.2020.9085059","volume":"1","author":"Y Li","year":"2020","unstructured":"Li, Y., Cai, J., & Wang, J. (2020). A Text document clustering method based on weighted BERT Model. IEEE 4th Information Technology, Networking Electronic and Automation Control Conference (ITNEC), 1, 1426\u20131430. https:\/\/doi.org\/10.1109\/ITNEC48623.2020.9085059","journal-title":"IEEE 4th Information Technology, Networking Electronic and Automation Control Conference (ITNEC)"},{"issue":"5","key":"4689_CR36","doi-asserted-by":"publisher","first-page":"1771","DOI":"10.1109\/TSE.2020.3036108","volume":"48","author":"M Li","year":"2022","unstructured":"Li, M., Chen, T., & Yao, X. (2022). How to Evaluate solutions in Pareto-based Search-based software engineering? A critical review and methodological guidance. IEEE Transactions on Software Engineering, 48(5), 1771\u20131799. https:\/\/doi.org\/10.1109\/TSE.2020.3036108","journal-title":"IEEE Transactions on Software Engineering"},{"key":"4689_CR37","unstructured":"Liu, Q., Kusner, M. J., & Blunsom, P. (2020). A Survey on Contextual Embeddings. http:\/\/arxiv.org\/abs\/2003.07278"},{"issue":"3","key":"4689_CR38","doi-asserted-by":"publisher","first-page":"784","DOI":"10.1109\/TSMCA.2011.2172205","volume":"42","author":"J Ma","year":"2012","unstructured":"Ma, J., Xu, W., Sun, Y., Turban, E., Wang, S., & Liu, O. (2012). An ontology-based text-mining method to cluster proposals for research project selection. IEEE Transactions on Systems, Man, and Cybernetics - Part a: Systems and Humans, 42(3), 784\u2013790. https:\/\/doi.org\/10.1109\/TSMCA.2011.2172205","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics - Part a: Systems and Humans"},{"issue":"3","key":"4689_CR39","doi-asserted-by":"publisher","first-page":"276","DOI":"10.11613\/BM.2012.031","volume":"22","author":"ML McHugh","year":"2012","unstructured":"McHugh, M. L. (2012). Interrater reliability: The kappa statistic. Biochemia Medica, 22(3), 276\u2013282.","journal-title":"Biochemia Medica"},{"key":"4689_CR40","unstructured":"McInnes, L., Healy, J., & Melville, J. (2020). UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction. http:\/\/arxiv.org\/abs\/1802.03426"},{"issue":"1","key":"4689_CR41","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1016\/j.envsci.2006.10.004","volume":"10","author":"EC McNie","year":"2007","unstructured":"McNie, E. C. (2007). Reconciling the supply of scientific information with user demands: An analysis of the problem and review of the literature. Environmental Science & Policy, 10(1), 17\u201338. https:\/\/doi.org\/10.1016\/j.envsci.2006.10.004","journal-title":"Environmental Science & Policy"},{"issue":"9","key":"4689_CR42","doi-asserted-by":"publisher","first-page":"1726","DOI":"10.1109\/TFUZZ.2018.2889010","volume":"27","author":"J-P Mei","year":"2019","unstructured":"Mei, J.-P. (2019). Semisupervised fuzzy clustering with partition information of subsets. IEEE Transactions on Fuzzy Systems, 27(9), 1726\u20131737. https:\/\/doi.org\/10.1109\/TFUZZ.2018.2889010","journal-title":"IEEE Transactions on Fuzzy Systems"},{"key":"4689_CR43","volume-title":"Advances in neural information processing systems","author":"T Mikolov","year":"2013","unstructured":"Mikolov, T., Sutskever, I., Chen, K., Corrado, G. S., & Dean, J. (2013). Distributed representations of words and phrases and their compositionality. In C. J. Burges, L. Bottou, M. Welling, Z. Ghahramani, & K. Q. Weinberger (Eds.), Advances in neural information processing systems.  (Vol. 26). Curran Associates Inc."},{"issue":"2","key":"4689_CR44","doi-asserted-by":"publisher","first-page":"1520","DOI":"10.1007\/s10489-021-02376-5","volume":"52","author":"SK Mishra","year":"2022","unstructured":"Mishra, S. K., Saini, N., Saha, S., & Bhattacharyya, P. (2022). Scientific document summarization in multi-objective clustering framework. Applied Intelligence, 52(2), 1520\u20131543. https:\/\/doi.org\/10.1007\/s10489-021-02376-5","journal-title":"Applied Intelligence"},{"issue":"3","key":"4689_CR45","doi-asserted-by":"publisher","first-page":"e1300","DOI":"10.1002\/widm.1300","volume":"9","author":"M Mittal","year":"2019","unstructured":"Mittal, M., Goyal, L. M., Hemanth, D. J., & Sethi, J. K. (2019). Clustering approaches for high-dimensional databases: A review. Wires Data Mining and Knowledge Discovery, 9(3), e1300. https:\/\/doi.org\/10.1002\/widm.1300","journal-title":"Wires Data Mining and Knowledge Discovery"},{"key":"4689_CR46","doi-asserted-by":"publisher","DOI":"10.1109\/ICOASE51841.2020.9436540","author":"SM Mohammed","year":"2020","unstructured":"Mohammed, S. M., Jacksi, K., & Zeebaree, S. R. M. (2020). Glove word embedding and DBSCAN algorithms for Semantic document clustering. International Conference on Advanced Science and Engineering (ICOASE). https:\/\/doi.org\/10.1109\/ICOASE51841.2020.9436540","journal-title":"International Conference on Advanced Science and Engineering (ICOASE)"},{"key":"4689_CR47","doi-asserted-by":"publisher","unstructured":"Molchanov, V., & Linsen, L. (2018). Overcoming the Curse of Dimensionality When Clustering Multivariate Volume Data. Proceedings of the 13th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, (pp. 29\u201339) https:\/\/doi.org\/10.5220\/0006541900290039","DOI":"10.5220\/0006541900290039"},{"key":"4689_CR48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICACSIS53237.2021.9631364","volume":"2021","author":"MA Mutasodirin","year":"2021","unstructured":"Mutasodirin, M. A., & Prasojo, R. E. (2021). Investigating text shortening strategy in BERT: Truncation vs summarization. International Conference on Advanced Computer Science and Information Systems (ICACSIS), 2021, 1\u20135. https:\/\/doi.org\/10.1109\/ICACSIS53237.2021.9631364","journal-title":"International Conference on Advanced Computer Science and Information Systems (ICACSIS)"},{"issue":"3","key":"4689_CR49","doi-asserted-by":"publisher","first-page":"741","DOI":"10.1007\/s11192-014-1319-2","volume":"100","author":"LG Nichols","year":"2014","unstructured":"Nichols, L. G. (2014). A topic model approach to measuring interdisciplinarity at the National Science Foundation. Scientometrics, 100(3), 741\u2013754. https:\/\/doi.org\/10.1007\/s11192-014-1319-2","journal-title":"Scientometrics"},{"key":"4689_CR50","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1109\/ASRU46091.2019.9003958","volume":"2019","author":"R Pappagari","year":"2019","unstructured":"Pappagari, R., Zelasko, P., Villalba, J., Carmiel, Y., & Dehak, N. (2019). Hierarchical transformers for long document classification. IEEE Automatic Speech Recognition and Understanding Workshop (ASRU), 2019, 838\u2013844. https:\/\/doi.org\/10.1109\/ASRU46091.2019.9003958","journal-title":"IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"},{"issue":"85","key":"4689_CR51","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., Michel, V., Thirion, B., Grisel, O., Blondel, M., Prettenhofer, P., Weiss, R., Dubourg, V., Vanderplas, J., Passos, A., Cournapeau, D., Brucher, M., Perrot, M., & Duchesnay, \u00c9. (2011). Scikit-learn: Machine learning in Python. Journal of Machine Learning Research, 12(85), 2825\u20132830.","journal-title":"Journal of Machine Learning Research"},{"key":"4689_CR52","doi-asserted-by":"publisher","unstructured":"Pennington, J., Socher, R., & Manning, C. (2014). GloVe: Global Vectors for Word Representation. Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), (pp. 1532\u20131543) https:\/\/doi.org\/10.3115\/v1\/D14-1162","DOI":"10.3115\/v1\/D14-1162"},{"key":"4689_CR53","doi-asserted-by":"publisher","first-page":"107342","DOI":"10.1016\/j.knosys.2021.107342","volume":"229","author":"A Penta","year":"2021","unstructured":"Penta, A., & Pal, A. (2021). What is this cluster about? Explaining textual clusters by extracting relevant keywords. Knowledge-Based Systems, 229, 107342. https:\/\/doi.org\/10.1016\/j.knosys.2021.107342","journal-title":"Knowledge-Based Systems"},{"key":"4689_CR54","doi-asserted-by":"publisher","DOI":"10.5441\/002\/edbt.2014.31","author":"M Pourrajabi","year":"2014","unstructured":"Pourrajabi, M., Moulavi, D., Campello, R. J. G. B., Zimek, A., Sander, J., & Goebel, R. (2014). Model selection for semi-supervised clustering. 17th International Conference on Extending Database Technology (EDBT). https:\/\/doi.org\/10.5441\/002\/edbt.2014.31","journal-title":"17th International Conference on Extending Database Technology (EDBT)"},{"key":"4689_CR55","doi-asserted-by":"publisher","DOI":"10.19026\/rjaset.8.1118","author":"DS Priya","year":"2014","unstructured":"Priya, D. S., & Karthikeyan, M. (2014). An efficient EM based ontology text-mining to cluster proposals for research project selection. Research Journal of Applied Sciences, Engineering and Technology,. https:\/\/doi.org\/10.19026\/rjaset.8.1118","journal-title":"Research Journal of Applied Sciences, Engineering and Technology,"},{"issue":"5","key":"4689_CR56","doi-asserted-by":"publisher","first-page":"599","DOI":"10.1007\/s12559-019-09664-w","volume":"11","author":"Y Qin","year":"2019","unstructured":"Qin, Y., Ding, S., Wang, L., & Wang, Y. (2019). Research progress on semi-supervised clustering. Cognitive Computation, 11(5), 599\u2013612. https:\/\/doi.org\/10.1007\/s12559-019-09664-w","journal-title":"Cognitive Computation"},{"key":"4689_CR57","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICISC.2017.8068581","volume":"2017","author":"K Rajput","year":"2017","unstructured":"Rajput, K., & Kandoi, N. (2017). An ontology-based text-mining method to develop intelligent information system using cluster based approach. International Conference on Inventive Systems and Control (ICISC), 2017, 1\u20136. https:\/\/doi.org\/10.1109\/ICISC.2017.8068581","journal-title":"International Conference on Inventive Systems and Control (ICISC)"},{"issue":"336","key":"4689_CR58","doi-asserted-by":"publisher","first-page":"846","DOI":"10.2307\/2284239","volume":"66","author":"WM Rand","year":"1971","unstructured":"Rand, W. M. (1971). Objective criteria for the evaluation of clustering methods. Journal of the American Statistical Association, 66(336), 846\u2013850. https:\/\/doi.org\/10.2307\/2284239","journal-title":"Journal of the American Statistical Association"},{"key":"4689_CR59","doi-asserted-by":"publisher","first-page":"54776","DOI":"10.1109\/ACCESS.2020.2980942","volume":"8","author":"GT Reddy","year":"2020","unstructured":"Reddy, G. T., Reddy, M. P. K., Lakshmanna, K., Kaluri, R., Rajput, D. S., Srivastava, G., & Baker, T. (2020). Analysis of dimensionality reduction techniques on big data. IEEE Access, 8, 54776\u201354788. https:\/\/doi.org\/10.1109\/ACCESS.2020.2980942","journal-title":"IEEE Access"},{"key":"4689_CR60","doi-asserted-by":"crossref","unstructured":"Reimers, N., & Gurevych, I. (2019). Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks. Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing, (pp.671\u2013688). http:\/\/arxiv.org\/abs\/1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"issue":"1","key":"4689_CR61","first-page":"27","volume":"5","author":"E Rend\u00f3n","year":"2011","unstructured":"Rend\u00f3n, E., Abundez, I., Arizmendi, A., & Quiroz, E. M. (2011). Internal versus external cluster validation indexes. International Journal of Computers and Communications, 5(1), 27\u201334.","journal-title":"International Journal of Computers and Communications"},{"issue":"12","key":"4689_CR62","doi-asserted-by":"publisher","first-page":"2648","DOI":"10.5829\/ije.2021.34.12C.10","volume":"34","author":"SM Sadjadi","year":"2021","unstructured":"Sadjadi, S. M., Mashayekhi, H., & Hassanpour, H. (2021). A two-level semi-supervised clustering technique for news articles. International Journal of Engineering, 34(12), 2648\u20132657. https:\/\/doi.org\/10.5829\/ije.2021.34.12C.10","journal-title":"International Journal of Engineering"},{"issue":"2","key":"4689_CR63","doi-asserted-by":"publisher","first-page":"3213","DOI":"10.1007\/s10586-018-2023-4","volume":"22","author":"R Sandhiya","year":"2019","unstructured":"Sandhiya, R., & Sundarambal, M. (2019). Clustering of biomedical documents using ontology-based TF-IGM enriched semantic smoothing model for telemedicine applications. Cluster Computing, 22(2), 3213\u20133230. https:\/\/doi.org\/10.1007\/s10586-018-2023-4","journal-title":"Cluster Computing"},{"issue":"3\u20134","key":"4689_CR64","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1504\/IJAIP.2021.116369","volume":"19","author":"RA Saravanan","year":"2021","unstructured":"Saravanan, R. A., & Babu, M. R. (2021). Information retrieval from multi-domain specific research proposal using hierarchical-based neural network clustering algorithm. International Journal of Advanced Intelligence Paradigms, 19(3\u20134), 422\u2013437. https:\/\/doi.org\/10.1504\/IJAIP.2021.116369","journal-title":"International Journal of Advanced Intelligence Paradigms"},{"issue":"1","key":"4689_CR65","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1016\/j.envsci.2006.10.001","volume":"10","author":"D Sarewitz","year":"2007","unstructured":"Sarewitz, D., & Pielke, R. A. (2007). The neglected heart of science policy: Reconciling supply of and demand for science. Environmental Science & Policy, 10(1), 5\u201316. https:\/\/doi.org\/10.1016\/j.envsci.2006.10.001","journal-title":"Environmental Science & Policy"},{"issue":"3","key":"4689_CR66","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1093\/ptj\/85.3.257","volume":"85","author":"J Sim","year":"2005","unstructured":"Sim, J., & Wright, C. C. (2005). The Kappa statistic in reliability studies: Use, interpretation, and sample size requirements. Physical Therapy, 85(3), 257\u2013268. https:\/\/doi.org\/10.1093\/ptj\/85.3.257","journal-title":"Physical Therapy"},{"key":"4689_CR67","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1007\/978-3-319-19369-4_5","volume-title":"Artificial intelligence and soft computing","author":"A Starczewski","year":"2015","unstructured":"Starczewski, A., & Krzy\u017cak, A. (2015). Performance evaluation of the Silhouette index. In L. Rutkowski, M. Korytkowski, R. Scherer, R. Tadeusiewicz, L. A. Zadeh, & J. M. Zurada (Eds.), Artificial intelligence and soft computing (pp. 49\u201358). Springer International Publishing."},{"issue":"1","key":"4689_CR68","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1186\/s40537-022-00564-9","volume":"9","author":"A Subakti","year":"2022","unstructured":"Subakti, A., Murfi, H., & Hariadi, N. (2022). The performance of BERT as data representation of text clustering. Journal of Big Data, 9(1), 15. https:\/\/doi.org\/10.1186\/s40537-022-00564-9","journal-title":"Journal of Big Data"},{"key":"4689_CR69","doi-asserted-by":"publisher","first-page":"194","DOI":"10.1007\/978-3-030-32381-3_16","volume-title":"Chinese Computational Linguistics","author":"C Sun","year":"2019","unstructured":"Sun, C., Qiu, X., Xu, Y., & Huang, X. (2019). How to Fine-Tune BERT for Text Classification. In M. Sun, X. Huang, H. Ji, Z. Liu, & Y. Liu (Eds.), Chinese Computational Linguistics (pp. 194\u2013206). Springer International Publishing. https:\/\/doi.org\/10.1007\/978-3-030-32381-3_16"},{"issue":"6","key":"4689_CR70","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1038\/nmeth.1619","volume":"8","author":"EM Talley","year":"2011","unstructured":"Talley, E. M., Newman, D., Mimno, D., Herr, B. W., Wallach, H. M., Burns, G. A. P. C., Leenders, A. G. M., & McCallum, A. (2011). Database of NIH grants using machine-learned categories and graphical clustering. Nature Methods, 8(6), 443\u2013444. https:\/\/doi.org\/10.1038\/nmeth.1619","journal-title":"Nature Methods"},{"key":"4689_CR71","unstructured":"Wagstaff, K., Cardie, C., Rogers, S., & Schr\u00f6dl, S. (2001). Constrained K-means Clustering with Background Knowledge. Proceedings of the Eighteenth International Conference on Machine Learning, 577\u2013584."},{"key":"4689_CR72","doi-asserted-by":"publisher","DOI":"10.1109\/HICSS.2015.153","author":"Y Wang","year":"2015","unstructured":"Wang, Y., Xu, W., & Jiang, H. (2015). Using text mining and clustering to group research proposals for research project selection. 48th Hawaii International Conference on System Sciences. https:\/\/doi.org\/10.1109\/HICSS.2015.153","journal-title":"48th Hawaii International Conference on System Sciences"},{"issue":"3, Part 2","key":"4689_CR73","doi-asserted-by":"publisher","first-page":"6050","DOI":"10.1016\/j.eswa.2008.06.093","volume":"36","author":"J Wu","year":"2009","unstructured":"Wu, J., Chen, J., Xiong, H., & Xie, M. (2009). External validation measures for K-means clustering: A data distribution perspective. Expert Systems with Applications, 36(3, Part 2), 6050\u20136061. https:\/\/doi.org\/10.1016\/j.eswa.2008.06.093","journal-title":"Expert Systems with Applications"},{"issue":"4","key":"4689_CR74","doi-asserted-by":"publisher","first-page":"1099","DOI":"10.1016\/j.joi.2018.09.004","volume":"12","author":"Y Zhang","year":"2018","unstructured":"Zhang, Y., Lu, J., Liu, F., Liu, Q., Porter, A., Chen, H., & Zhang, G. (2018). Does deep learning help topic extraction? A kernel k-means clustering method with word embedding. Journal of Informetrics, 12(4), 1099\u20131117. https:\/\/doi.org\/10.1016\/j.joi.2018.09.004","journal-title":"Journal of Informetrics"},{"issue":"1","key":"4689_CR75","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s10994-006-6540-7","volume":"65","author":"S Zhong","year":"2006","unstructured":"Zhong, S. (2006). Semi-supervised model-based document clustering: A comparative study. Machine Learning, 65(1), 3\u201329. https:\/\/doi.org\/10.1007\/s10994-006-6540-7","journal-title":"Machine Learning"},{"issue":"1","key":"4689_CR76","doi-asserted-by":"publisher","first-page":"167","DOI":"10.1007\/s11192-019-03126-8","volume":"120","author":"Y Zhou","year":"2019","unstructured":"Zhou, Y., Lin, H., Liu, Y., & Ding, W. (2019). A novel method to identify emerging technologies using a semi-supervised topic clustering model: A case of 3D printing industry. Scientometrics, 120(1), 167\u2013185. https:\/\/doi.org\/10.1007\/s11192-019-03126-8","journal-title":"Scientometrics"}],"container-title":["Scientometrics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-023-04689-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11192-023-04689-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11192-023-04689-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,9]],"date-time":"2025-04-09T15:46:17Z","timestamp":1744213577000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11192-023-04689-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,8]]},"references-count":76,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2023,5]]}},"alternative-id":["4689"],"URL":"https:\/\/doi.org\/10.1007\/s11192-023-04689-3","relation":{},"ISSN":["0138-9130","1588-2861"],"issn-type":[{"value":"0138-9130","type":"print"},{"value":"1588-2861","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,4,8]]},"assertion":[{"value":"9 December 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 March 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 April 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no relevant financial or non-financial interests to disclose.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}