{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T11:36:22Z","timestamp":1758281782920,"version":"3.44.0"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T00:00:00Z","timestamp":1758240000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T00:00:00Z","timestamp":1758240000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003708","name":"Korea Institute of Science and Technology Information","doi-asserted-by":"publisher","award":["K24L4M2C5","K24L4M2C5","K24L4M2C5"],"award-info":[{"award-number":["K24L4M2C5","K24L4M2C5","K24L4M2C5"]}],"id":[{"id":"10.13039\/501100003708","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07838-y","type":"journal-article","created":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T03:16:33Z","timestamp":1758251793000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Scilinkbert: a BERT-style language model for understanding scientific texts with citations"],"prefix":"10.1007","volume":"81","author":[{"given":"Ju-Yeon","family":"Yu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Donghun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kyong-Ha","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,9,19]]},"reference":[{"issue":"8016","key":"7838_CR1","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1038\/s41586-024-07487-w","volume":"630","author":"J Abramson","year":"2024","unstructured":"Abramson J, Adler J, Dunger J et al (2024) Accurate structure prediction of biomolecular interactions with alphafold 3. Nature 630(8016):493\u2013500. https:\/\/doi.org\/10.1038\/s41586-024-07487-w","journal-title":"Nature"},{"key":"7838_CR2","unstructured":"Achiam J, Adler S, Agarwal S, et\u00a0al (2023) Gpt-4 technical report. arXiv preprint arXiv:2303.08774"},{"key":"7838_CR3","doi-asserted-by":"crossref","unstructured":"Beltagy I, Lo K, Cohan A (2019) Scibert: a pretrained language model for scientific text. arXiv preprint arXiv:1903.10676","DOI":"10.18653\/v1\/D19-1371"},{"key":"7838_CR4","unstructured":"Berens P, et\u00a0al (2023) Ai for science: an emerging agenda. arXiv preprint arXiv:2303.04217"},{"key":"7838_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.drudis.2025.103XXX","author":"H Chen","year":"2025","unstructured":"Chen H et al (2025) The future of pharmaceuticals: artificial intelligence in drug discovery and development. Drug Discov Today. https:\/\/doi.org\/10.1016\/j.drudis.2025.103XXX","journal-title":"Drug Discov Today"},{"issue":"7","key":"7838_CR6","doi-asserted-by":"publisher","first-page":"5317","DOI":"10.1109\/JBHI.2025.3547444","volume":"29","author":"J Chen","year":"2025","unstructured":"Chen J, Wei Z, Shen W et al (2025) Infusing multi-hop medical knowledge into smaller language models for biomedical question answering. IEEE J Biomed Health Inform 29(7):5317\u20135328. https:\/\/doi.org\/10.1109\/JBHI.2025.3547444","journal-title":"IEEE J Biomed Health Inform"},{"key":"7838_CR7","unstructured":"Devlin J, Chang MW, Lee K, et\u00a0al (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"7838_CR8","doi-asserted-by":"crossref","unstructured":"Gao T, Yao X, Chen D (2021) Simcse: simple contrastive learning of sentence embeddings. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp 6894\u20136910","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"issue":"1","key":"7838_CR9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3458754","volume":"3","author":"Y Gu","year":"2021","unstructured":"Gu Y, Tinn R, Cheng H et al (2021) Domain-specific language model pretraining for biomedical natural language processing. ACM Trans Comput Healthcare (HEALTH) 3(1):1\u201323. https:\/\/doi.org\/10.1145\/3458754","journal-title":"ACM Trans Comput Healthcare (HEALTH)"},{"key":"7838_CR10","unstructured":"Guu K, Lee K, Tung Z, et\u00a0al (2020) Retrieval augmented language model pre-training. In: Proceedings of the 37th International Conference on Machine Learning (ICML), PMLR, pp 3929\u20133938"},{"key":"7838_CR11","doi-asserted-by":"crossref","unstructured":"He B, Zhou D, Xiao J, et\u00a0al (2019) Integrating graph contextualized knowledge into pre-trained language models. arXiv preprint arXiv:1912.00147","DOI":"10.18653\/v1\/2020.findings-emnlp.207"},{"key":"7838_CR12","unstructured":"Houlsby N, Giurgiu A, Jastrzebski S, et\u00a0al (2019) Parameter-efficient transfer learning for nlp. In: Proceedings of the International Conference on Machine Learning (ICML), PMLR, pp 2790\u20132799"},{"key":"7838_CR13","unstructured":"Hu EJ, Shen Y, Wallis P, et\u00a0al (2021) Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685"},{"key":"7838_CR14","unstructured":"Huang K, Altosaar J, Ranganath R (2019) Clinicalbert: modeling clinical notes and predicting hospital readmission. arXiv preprint arXiv:1904.05342"},{"key":"7838_CR15","first-page":"2235","volume":"29","author":"G Irving","year":"2016","unstructured":"Irving G, Szegedy C, Alemi AA et al (2016) Deepmath-deep sequence models for premise selection. Adv Neural Inf Process Syst 29:2235\u20132243","journal-title":"Adv Neural Inf Process Syst"},{"key":"7838_CR16","doi-asserted-by":"publisher","first-page":"60805","DOI":"10.1109\/ACCESS.2022.3180052","volume":"10","author":"Y Jeong","year":"2022","unstructured":"Jeong Y, Kim E (2022) Scideberta: learning deberta for science technology documents and fine-tuning information extraction tasks. IEEE Access 10:60805\u201360813. https:\/\/doi.org\/10.1109\/ACCESS.2022.3180052","journal-title":"IEEE Access"},{"issue":"7873","key":"7838_CR17","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","volume":"596","author":"J Jumper","year":"2021","unstructured":"Jumper J, Evans R, Pritzel A et al (2021) Highly accurate protein structure prediction with alphafold. Nature 596(7873):583\u2013589. https:\/\/doi.org\/10.1038\/s41586-021-03819-2","journal-title":"Nature"},{"key":"7838_CR18","first-page":"1022","volume":"34","author":"R Karimi Mahabadi","year":"2021","unstructured":"Karimi Mahabadi R, Henderson J, Ruder S (2021) Compacter: efficient low-rank hypercomplex adapter layers. Adv Neural Inf Process Syst 34:1022\u20131035","journal-title":"Adv Neural Inf Process Syst"},{"issue":"4","key":"7838_CR19","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1093\/bioinformatics\/btz682","volume":"36","author":"J Lee","year":"2020","unstructured":"Lee J, Yoon W, Kim S et al (2020) Biobert: a pre-trained biomedical language representation model for biomedical text mining. Bioinformatics 36(4):1234\u20131240. https:\/\/doi.org\/10.1093\/bioinformatics\/btz682","journal-title":"Bioinformatics"},{"key":"7838_CR20","first-page":"9459","volume":"33","author":"P Lewis","year":"2020","unstructured":"Lewis P, Perez E, Piktus A et al (2020) Retrieval-augmented generation for knowledge-intensive nlp tasks. Adv Neural Inf Process Syst 33:9459\u20139474","journal-title":"Adv Neural Inf Process Syst"},{"key":"7838_CR21","doi-asserted-by":"crossref","unstructured":"Li XL, Liang P (2021) Prefix-tuning: Optimizing continuous prompts for generation. arXiv preprint arXiv:2101.00190","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"7838_CR22","doi-asserted-by":"crossref","unstructured":"Liu W, Zhou P, Zhao Z, et\u00a0al (2020) K-bert: Enabling language representation with knowledge graph. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp 2901\u20132908","DOI":"10.1609\/aaai.v34i03.5681"},{"key":"7838_CR23","unstructured":"Lu C, Lu C, Lange RT, et\u00a0al (2024) The ai scientist: towards fully automated open-ended scientific discovery. arXiv preprint arXiv:2408.06292"},{"issue":"3","key":"7838_CR24","doi-asserted-by":"publisher","first-page":"773","DOI":"10.1016\/j.drudis.2018.11.014","volume":"24","author":"KK Mak","year":"2019","unstructured":"Mak KK, Pichika MR (2019) Artificial intelligence in drug development: present status and future prospects. Drug Discov Today 24(3):773\u2013780. https:\/\/doi.org\/10.1016\/j.drudis.2018.11.014","journal-title":"Drug Discov Today"},{"key":"7838_CR25","unstructured":"Ostendorff M, Bourgonje P, Berrios M, et\u00a0al (2020) Learning neural textual representations for citation recommendation. In: Proceedings of the 12th Language Resources and Evaluation Conference. European Language Resources Association, pp 2536\u20132544"},{"issue":"6","key":"7838_CR26","doi-asserted-by":"publisher","first-page":"1687","DOI":"10.3390\/pharmaceutics15061687","volume":"15","author":"D Paul","year":"2023","unstructured":"Paul D et al (2023) The role of AI in drug discovery: challenges, opportunities, and strategies. Pharmaceutics 15(6):1687. https:\/\/doi.org\/10.3390\/pharmaceutics15061687","journal-title":"Pharmaceutics"},{"key":"7838_CR27","doi-asserted-by":"crossref","unstructured":"Peng Y, Yan S, Lu Z (2019) Transfer learning in biomedical natural language processing: an evaluation of bert and elmo on ten benchmarking datasets. CoRR arXiv:1906.05474","DOI":"10.18653\/v1\/W19-5006"},{"key":"7838_CR28","doi-asserted-by":"crossref","unstructured":"Rajpurkar P, Zhang J, Lopyrev K, et\u00a0al (2016) Squad: 100,000+ questions for machine comprehension of text. arXiv preprint arXiv:1606.05250","DOI":"10.18653\/v1\/D16-1264"},{"issue":"1","key":"7838_CR29","doi-asserted-by":"publisher","first-page":"vbae133","DOI":"10.1093\/bioadv\/vbae133","volume":"4","author":"H Rehana","year":"2024","unstructured":"Rehana H, \u00c7am NB, Basmaci M et al (2024) Evaluating GPT and BERT models for protein-protein interaction identification in biomedical text. Bioinf Adv 4(1):vbae133. https:\/\/doi.org\/10.1093\/bioadv\/vbae133","journal-title":"Bioinf Adv"},{"key":"7838_CR30","doi-asserted-by":"crossref","unstructured":"Reimers N, Gurevych I (2019) Sentence-bert: sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","DOI":"10.18653\/v1\/D19-1410"},{"key":"7838_CR31","unstructured":"Researchers C (2024) minicodeprops: a minimal AI benchmark for proving code properties. Carnegie Mellon University"},{"issue":"2","key":"7838_CR32","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1073\/pnas.98.2.381","volume":"98","author":"RJ Roberts","year":"2001","unstructured":"Roberts RJ (2001) Pubmed central: the genbank of the published literature. Proc Natl Acad Sci 98(2):381\u2013382","journal-title":"Proc Natl Acad Sci"},{"key":"7838_CR33","doi-asserted-by":"crossref","unstructured":"Sun T, Shao Y, Qiu X, et\u00a0al (2020) Colake: contextualized language and knowledge embedding. arXiv preprint arXiv:2010.00309","DOI":"10.18653\/v1\/2020.coling-main.327"},{"issue":"7990","key":"7838_CR34","doi-asserted-by":"publisher","first-page":"86","DOI":"10.1038\/s41586-023-06734-w","volume":"624","author":"NJ Szymanski","year":"2023","unstructured":"Szymanski NJ, Rendy B, Fei Y et al (2023) An autonomous laboratory for the accelerated synthesis of novel materials. Nature 624(7990):86\u201391. https:\/\/doi.org\/10.1038\/s41586-023-06734-w","journal-title":"Nature"},{"key":"7838_CR35","unstructured":"Urban J, et\u00a0al (2023) Machine learning and automated theorem proving. Tech Rep, University of Cambridge"},{"issue":"D1","key":"7838_CR36","doi-asserted-by":"publisher","first-page":"D368","DOI":"10.1093\/nar\/gkad1011","volume":"52","author":"M Varadi","year":"2024","unstructured":"Varadi M et al (2024) Alphafold protein structure database in 2024: providing structure coverage for over 214 million protein sequences. Nucleic Acids Res 52(D1):D368\u2013D375. https:\/\/doi.org\/10.1093\/nar\/gkad1011","journal-title":"Nucleic Acids Res"},{"key":"7838_CR37","doi-asserted-by":"crossref","unstructured":"Wang A, Singh A, Michael J, et\u00a0al (2018) Glue: a multi-task benchmark and analysis platform for natural language understanding. arXiv preprint arXiv:1804.07461","DOI":"10.18653\/v1\/W18-5446"},{"issue":"7972","key":"7838_CR38","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1038\/s41586-023-06221-2","volume":"620","author":"H Wang","year":"2023","unstructured":"Wang H, Fu T, Du Y et al (2023) Scientific discovery in the age of artificial intelligence. Nature 620(7972):47\u201360. https:\/\/doi.org\/10.1038\/s41586-023-06221-2","journal-title":"Nature"},{"key":"7838_CR39","doi-asserted-by":"crossref","unstructured":"Yasunaga M, Leskovec J, Liang P (2022) Linkbert: pretraining language models with document links. arXiv preprint arXiv:2203.15827","DOI":"10.18653\/v1\/2022.acl-long.551"},{"key":"7838_CR40","unstructured":"Zaken EB, Ravfogel S, Goldberg Y (2021) Bitfit: simple parameter-efficient fine-tuning for transformer-based masked language models. arXiv preprint arXiv:2106.10199"},{"key":"7838_CR41","volume-title":"Ushizima D (2024) Methods and applications of autonomous experimentation","author":"MM Noack","year":"2024","unstructured":"Noack MM (2024) Ushizima D (2024) Methods and applications of autonomous experimentation, 1st edn. CRC Press, Taylor & Francis Group, Boca Raton. ISBN: 978-1-032-31465-5","edition":"1"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07838-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07838-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07838-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,19]],"date-time":"2025-09-19T03:16:43Z","timestamp":1758251803000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07838-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,19]]},"references-count":41,"journal-issue":{"issue":"15","published-online":{"date-parts":[[2025,10]]}},"alternative-id":["7838"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07838-y","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,9,19]]},"assertion":[{"value":"10 May 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"1356"}}