{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T22:40:16Z","timestamp":1760740816369,"version":"build-2065373602"},"publisher-location":"Singapore","reference-count":17,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819534555"},{"type":"electronic","value":"9789819534562"}],"license":[{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T00:00:00Z","timestamp":1760659200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-3456-2_11","type":"book-chapter","created":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T07:24:18Z","timestamp":1760599458000},"page":"148-160","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Optimizing Biomedical Text Processing: A Comparative Analysis of Tokenization Methods and Context-Aware Representation Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-5262-7927","authenticated-orcid":false,"given":"Wenran","family":"Xie","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,17]]},"reference":[{"key":"11_CR1","doi-asserted-by":"publisher","first-page":"102970","DOI":"10.1016\/j.specom.2023.102970","volume":"153","author":"I Haq","year":"2023","unstructured":"Haq, I., Qiu, W., Guo, J., Tang, P.: Correction of whitespace and word segmentation in noisy Pashto text using CRF. Speech Commun. 153, 102970 (2023)","journal-title":"Speech Commun."},{"key":"11_CR2","doi-asserted-by":"crossref","unstructured":"Neumann, M., et al.: ScispaCy: fast and robust models for biomedical natural language processing. In: EMNLP (2019)","DOI":"10.18653\/v1\/W19-5034"},{"key":"11_CR3","doi-asserted-by":"publisher","first-page":"53","DOI":"10.1016\/j.specom.2020.09.003","volume":"125","author":"M Aso","year":"2020","unstructured":"Aso, M., Takamichi, S., Takamune, N., Saruwatari, H.: Acoustic model-based subword tokenization and prosodic-context extraction without language knowledge for text-to-speech synthesis. Speech Commun. 125, 53\u201360 (2020)","journal-title":"Speech Commun."},{"key":"11_CR4","unstructured":"Kudo, T., Richardson, J. SentencePiece: a simple and language independent subword tokenizer. In: EMNLP (2018)"},{"key":"11_CR5","first-page":"267","volume":"445","author":"K He","year":"2021","unstructured":"He, K., Yan, Y., Xu, W.: From context-aware to knowledge-aware: boosting OOV tokens recognition in slot tagging with background knowledge. Comput. Speech Lang. 445, 267\u2013275 (2021)","journal-title":"Comput. Speech Lang."},{"key":"11_CR6","doi-asserted-by":"publisher","first-page":"119230","DOI":"10.1016\/j.renene.2023.119230","volume":"218","author":"A Behzadi","year":"2023","unstructured":"Behzadi, A., Sadrizadeh, S.: Grid-tied solar and biomass hybridization for multi-family houses in Sweden: an optimal rule-based control framework through machine learning approach. Renew. Energy 218, 119230 (2023)","journal-title":"Renew. Energy"},{"key":"11_CR7","first-page":"106017","volume":"92","author":"MR Rezvan","year":"2024","unstructured":"Rezvan, M.R., Sorkhi, A.G., Pirgazi, J., Kallehbasti, M.M.P.: AdvanceSplice: integrating N-gram one-hot encoding and ensemble modeling for enhanced accuracy. ISA Trans. 92, 106017 (2024)","journal-title":"ISA Trans."},{"key":"11_CR8","first-page":"100302","volume":"13","author":"E Holbrook","year":"2024","unstructured":"Holbrook, E., Wiskur, B., Nagykaldi, Z.: Discovering opioid slang on social media: a Word2Vec approach with reddit data. Depend. Rep. 13, 100302 (2024)","journal-title":"Depend. Rep."},{"key":"11_CR9","doi-asserted-by":"publisher","first-page":"127811","DOI":"10.1016\/j.eswa.2025.127811","volume":"283","author":"W Xing","year":"2025","unstructured":"Xing, W., Zhang, J., Li, C., Dong, G.: IAMP-EmGCN: a new design for identifying antimicrobial peptides based on BERT and graph convolutional network. Expert Syst. Appl. 283, 127811 (2025)","journal-title":"Expert Syst. Appl."},{"key":"11_CR10","doi-asserted-by":"crossref","unstructured":"Lee, J., et al.: BioBERT: a pre-trained biomedical language representation model. Bioinformatics (2020)","DOI":"10.1093\/bioinformatics\/btz682"},{"key":"11_CR11","doi-asserted-by":"publisher","first-page":"111535","DOI":"10.1016\/j.dib.2025.111535","volume":"60","author":"L Fang","year":"2025","unstructured":"Fang, L., Salami, M.O., Weber, G.M., Torvik, V.I.: UCite: the union of nine large-scale public PubMed citation datasets with reliability filtering. Data Brief 60, 111535 (2025)","journal-title":"Data Brief"},{"key":"11_CR12","first-page":"115210","volume":"1040","author":"H Guo","year":"2025","unstructured":"Guo, H., Zhang, S., Shan, S., Chen, L., Yang, W.: The extra local diagnosability and diagnosis algorithm of networks under the PMC model. Science 1040, 115210 (2025)","journal-title":"Science"},{"key":"11_CR13","first-page":"124785","volume":"322","author":"Y Saito","year":"2024","unstructured":"Saito, Y., Itakura, K., Ohtake, N., Hasegawa, H.: Classification of soybean chemical characteristics by excitation emission matrix coupled with t-SNE dimensionality reduction. Spectroscopy 322, 124785 (2024)","journal-title":"Spectroscopy"},{"key":"11_CR14","first-page":"108605","volume":"176","author":"HS Tan","year":"2024","unstructured":"Tan, H.S., Wang, K., Mcbeth, R.: Exploring UMAP in hybrid models of entropy-based and representativeness sampling for active learning in biomedical segmentation. Comput. Med. Imaging Graph. 176, 108605 (2024)","journal-title":"Comput. Med. Imaging Graph."},{"issue":"Suppl. 1","key":"11_CR15","doi-asserted-by":"publisher","first-page":"e226","DOI":"10.1016\/j.clinph.2018.04.582","volume":"129","author":"JA Reeves","year":"2018","unstructured":"Reeves, J.A., Hussain, S.J., Wassermann, E.M., Freedberg, M.V.: Platform session \u2013 NIBS: quantifying the stereotypy of TMS-evoked EEG potentials using a cosine similarity metric. Clin. Neurophysiol. 129(Suppl. 1), e226 (2018)","journal-title":"Clin. Neurophysiol."},{"key":"11_CR16","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Aptner: a specific dataset for ner missions in cyber threat intelligence field. In: 2022 IEEE 25th International Conference on Computer Supported Cooperative Work in Design (CSCWD), pp. 1233\u20131238. IEEE (2022)","DOI":"10.1109\/CSCWD54268.2022.9776031"},{"key":"11_CR17","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Dnrti: a large-scale dataset for named entity recognition in threat intelligence. In: 2020 IEEE 19th International Conference on Trust, Security and Privacy in Computing and Commu-nications (TrustCom), pp. 1842\u20131848. IEEE (202)","DOI":"10.1109\/TrustCom50675.2020.00252"}],"container-title":["Lecture Notes in Computer Science","Advanced Data Mining and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-3456-2_11","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T22:03:45Z","timestamp":1760738625000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-3456-2_11"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,17]]},"ISBN":["9789819534555","9789819534562"],"references-count":17,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-3456-2_11","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2025,10,17]]},"assertion":[{"value":"17 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ADMA","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Data Mining and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kyoto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"adma2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/adma2025.github.io\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}