{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T14:30:03Z","timestamp":1766068203586,"version":"3.43.0"},"reference-count":35,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100002322","name":"Coordena\u00e7\u00e3o de Aperfei\u00e7oamento de Pessoal de N\u00edvel Superior-Brazil","doi-asserted-by":"publisher","award":["001"],"award-info":[{"award-number":["001"]}],"id":[{"id":"10.13039\/501100002322","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2025]]},"DOI":"10.1109\/access.2025.3595423","type":"journal-article","created":{"date-parts":[[2025,8,4]],"date-time":"2025-08-04T18:46:18Z","timestamp":1754333178000},"page":"137348-137363","source":"Crossref","is-referenced-by-count":2,"title":["Investigating the Relationship Between Text Vectorization Cosine Similarity and Classification Performance"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7083-5789","authenticated-orcid":false,"given":"Fernando Rezende","family":"Zagatti","sequence":"first","affiliation":[{"name":"Department of Computing, Federal University of S&#x00E3;o Carlos, S&#x00E3;o Carlos, S&#x00E3;o Paulo, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3711-5592","authenticated-orcid":false,"given":"Gilson","family":"Yuuji Shimizu","sequence":"additional","affiliation":[{"name":"DIMEC, Center for Information Technology Renato Archer, Campinas, S&#x00E3;o Paulo, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1360-4036","authenticated-orcid":false,"given":"Daniel","family":"Lucr\u00e9dio","sequence":"additional","affiliation":[{"name":"Department of Computing, Federal University of S&#x00E3;o Carlos, S&#x00E3;o Carlos, S&#x00E3;o Paulo, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3996-8599","authenticated-orcid":false,"given":"Helena","family":"de Medeiros Caseli","sequence":"additional","affiliation":[{"name":"Department of Computing, Federal University of S&#x00E3;o Carlos, S&#x00E3;o Carlos, S&#x00E3;o Paulo, Brazil"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT45670.2019.8944795"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CBI.2019.00062"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CCNC.2019.8651696"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2937518"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-024-10212-3"},{"key":"ref6","first-page":"1304","article-title":"Text classification of cancer clinical trial eligibility criteria","volume-title":"AMIA Annu. Symp. Proc.","author":"Yang"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3357847"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT54827.2022.9984608"},{"key":"ref9","doi-asserted-by":"crossref","first-page":"295","DOI":"10.1016\/j.neucom.2020.07.061","article-title":"On hyperparameter optimization of machine learning algorithms: Theory and practice","volume":"415","author":"Yang","year":"2020","journal-title":"Neurocomputing"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1017\/pan.2020.8"},{"issue":"2","key":"ref11","doi-asserted-by":"crossref","DOI":"10.1016\/j.ipm.2021.102798","article-title":"A comparative study of automated legal text classification using random forests and deep learning","volume":"59","author":"Chen","year":"2022","journal-title":"Inf. Process. Manage."},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3539637.3557054"},{"issue":"9","key":"ref13","doi-asserted-by":"crossref","first-page":"421","DOI":"10.3390\/info11090421","article-title":"Measurement of text similarity: A survey","volume":"11","author":"Wang","year":"2020","journal-title":"Information"},{"issue":"10","key":"ref14","doi-asserted-by":"crossref","first-page":"5119","DOI":"10.3390\/app12105119","article-title":"Systematic comparison of vectorization methods in classification context","volume":"12","author":"Krzeszewska","year":"2022","journal-title":"Appl. Sci."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICEDCS57360.2022.00015"},{"key":"ref16","doi-asserted-by":"crossref","first-page":"269","DOI":"10.1016\/j.procs.2021.12.239","article-title":"Comparative study of Arabic text classification using feature vectorization methods","volume":"198","author":"Sabri","year":"2022","journal-title":"Proc. Comput. Sci."},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0254937"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3132651"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.26615\/978-954-452-080-9_014"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/SCSE49731.2020.9313040"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ASYU48272.2019.8946337"},{"volume-title":"Data Mining: Concepts and Techniques","year":"2011","author":"Han","key":"ref22"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s10660-018-09327-2"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2015.2430359"},{"key":"ref25","first-page":"1532","article-title":"Glove: Global vectors for word representation","volume-title":"Proc. Empirical Methods Natural Lang. Process. (EMNLP)","author":"Pennington"},{"key":"ref26","article-title":"Efficient estimation of word representations in vector space","author":"Mikolov","year":"2013","journal-title":"arXiv:1301.3781"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1100"},{"key":"ref29","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10660-022-09560-w"},{"key":"ref31","article-title":"Improving language understanding by generative pre-training","volume-title":"OpenAI","author":"Radford","year":"2018"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.14201\/adcaij2020924968"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109646"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2020.106855"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3440755"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10820123\/11108167.pdf?arnumber=11108167","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T17:44:45Z","timestamp":1754934285000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11108167\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"references-count":35,"URL":"https:\/\/doi.org\/10.1109\/access.2025.3595423","relation":{},"ISSN":["2169-3536"],"issn-type":[{"type":"electronic","value":"2169-3536"}],"subject":[],"published":{"date-parts":[[2025]]}}}