{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T08:53:43Z","timestamp":1772096023596,"version":"3.50.1"},"reference-count":36,"publisher":"American Chemical Society (ACS)","issue":"5","license":[{"start":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T00:00:00Z","timestamp":1739232000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000287","name":"Royal Academy of Engineering","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000287","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100004349","name":"BASF","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100004349","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003343","name":"Cambridge Trust","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003343","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2025,3,10]]},"DOI":"10.1021\/acs.jcim.4c02029","type":"journal-article","created":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T20:21:06Z","timestamp":1739305266000},"page":"2476-2486","source":"Crossref","is-referenced-by-count":7,"title":["Cost-Efficient Domain-Adaptive Pretraining of Language Models for Optoelectronics Applications"],"prefix":"10.1021","volume":"65","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4974-4214","authenticated-orcid":true,"given":"Dingyun","family":"Huang","sequence":"first","affiliation":[{"name":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1552-8743","authenticated-orcid":true,"given":"Jacqueline M.","family":"Cole","sequence":"additional","affiliation":[{"name":"Cavendish Laboratory, Department of Physics, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."},{"name":"ISIS Neutron and Muon Source, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K."}]}],"member":"316","published-online":{"date-parts":[[2025,2,11]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.6b00207"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c00446"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-020-00602-2"},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02897-3"},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02511-6"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01295-5"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02897-3"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01355-w"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01752-1"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01301-w"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-019-1335-8"},{"key":"ref12\/cit12","doi-asserted-by":"publisher","DOI":"10.1021\/acsami.3c12301"},{"key":"ref13\/cit13","doi-asserted-by":"publisher","DOI":"10.1016\/j.matpr.2021.02.313"},{"key":"ref14\/cit14","unstructured":"Devlin, J.; Chang, M.W.; Lee, K.; Toutanova, K. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1810.04805."},{"key":"ref15\/cit15","unstructured":"Vaswani, A.; Shazeer, N.; Parmar, N.; Uszkoreit, J.; Jones, L.; Gomez, A. N.; Kaiser, L.; Polosukhin, I. Attention Is All You Need.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1706.03762."},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c01199"},{"key":"ref17\/cit17","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c01259"},{"key":"ref18\/cit18","unstructured":"Jiang, A. Q.  Mistral 7B.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2310.06825."},{"key":"ref19\/cit19","unstructured":"Brown, T. B.  Language Models are Few-Shot Learners.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2005.14165."},{"key":"ref20\/cit20","unstructured":"Touvron, H.  Llama 2: Open Foundation and Fine-Tuned Chat Models.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2307.09288."},{"key":"ref21\/cit21","unstructured":"Lewis, P.; Perez, E.; Piktus, A.; Petroni, F.; Karpukhin, V.; Goyal, N.; K\u00fcttler, H.; Lewis, M.; Yih, W.t.; Rockt\u00e4schel, T.; Riedel, S.; Kiela, D. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2005.11401,version:4."},{"key":"ref22\/cit22","unstructured":"Gao, Y.; Xiong, Y.; Gao, X.; Jia, K.; Pan, J.; Bi, Y.; Dai, Y.; Sun, J.; Guo, Q.; Wang, M.; Wang, H. Retrieval-Augmented Generation for Large Language Models: A Survey.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2312.10997."},{"key":"ref23\/cit23","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00530"},{"key":"ref24\/cit24","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.4c00063"},{"key":"ref25\/cit25","unstructured":"Gururangan, S.; Marasovi\u0107, A.; Swayamdipta, S.; Lo, K.; Beltagy, I.; Downey, D.; Smith, N. A. Don\u2019t Stop Pretraining: Adapt Language Models to Domains and Tasks.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2004.10964."},{"key":"ref26\/cit26","unstructured":"Liu, Y.; Ott, M.; Goyal, N.; Du, J.; Joshi, M.; Chen, D.; Levy, O.; Lewis, M.; Zettlemoyer, L.; Stoyanov, V. RoBERTa: A Robustly Optimized BERT Pretraining Approach.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1907.11692."},{"key":"ref27\/cit27","unstructured":"Lan, Z.; Chen, M.; Goodman, S.; Gimpel, K.; Sharma, P.; Soricut, R. ALBERT: A Lite BERT for Self-supervised Learning of Language Representations.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1909.11942,version:6."},{"key":"ref28\/cit28","first-page":"38","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations","author":"Wolf T.","year":"2024"},{"key":"ref29\/cit29","unstructured":"Rajpurkar, P.; Zhang, J.; Lopyrev, K.; Liang, P. SQuAD: 100,000+ Questions for Machine Comprehension of Text.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1606.05250."},{"key":"ref30\/cit30","unstructured":"Reimers, N.; Gurevych, I. Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1908.10084."},{"key":"ref31\/cit31","unstructured":"Li, Z.; Zhang, X.; Zhang, Y.; Long, D.; Xie, P.; Zhang, M. Towards General Text Embeddings with Multi-stage Contrastive Learning.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2308.03281."},{"key":"ref32\/cit32","doi-asserted-by":"publisher","DOI":"10.1145\/3458754"},{"key":"ref33\/cit33","unstructured":"Oord, A. v. d.; Li, Y.; Vinyals, O. Representation Learning with Contrastive Predictive Coding.  arXiv, 2024 http:\/\/arxiv.org\/abs\/1807.03748."},{"key":"ref34\/cit34","doi-asserted-by":"crossref","unstructured":"Liu, N. F.; Lin, K.; Hewitt, J.; Paranjape, A.; Bevilacqua, M.; Petroni, F.; Liang, P. Lost in the Middle: How Language Models Use Long Contexts.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2307.03172.","DOI":"10.1162\/tacl_a_00638"},{"key":"ref35\/cit35","doi-asserted-by":"crossref","unstructured":"Muennighoff, N.; Tazi, N.; Magne, L.; Reimers, N. MTEB: Massive Text Embedding Benchmark.  arXiv, 2024 http:\/\/arxiv.org\/abs\/2210.07316.","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"ref36\/cit36","unstructured":"MTEB Leaderboard - a Hugging Face Space\nby mteb. 2024 https:\/\/huggingface.co\/spaces\/mteb\/leaderboard."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.4c02029","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.4c02029","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,3,10]],"date-time":"2025-03-10T08:10:51Z","timestamp":1741594251000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.4c02029"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,11]]},"references-count":36,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,3,10]]}},"alternative-id":["10.1021\/acs.jcim.4c02029"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.4c02029","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,11]]}}}