{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T12:16:08Z","timestamp":1780316168309,"version":"3.54.1"},"reference-count":44,"publisher":"American Chemical Society (ACS)","issue":"7","license":[{"start":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T00:00:00Z","timestamp":1679270400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000287","name":"Royal Academy of Engineering","doi-asserted-by":"publisher","award":["RCSRC\\1819\\7\\10"],"award-info":[{"award-number":["RCSRC\\1819\\7\\10"]}],"id":[{"id":"10.13039\/501100000287","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000590","name":"Christ's College, University of Cambridge","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000590","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004543","name":"China Scholarship Council","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004543","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003343","name":"Cambridge Trust","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003343","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2023,4,10]]},"DOI":"10.1021\/acs.jcim.2c01259","type":"journal-article","created":{"date-parts":[[2023,3,20]],"date-time":"2023-03-20T19:48:46Z","timestamp":1679341726000},"page":"1961-1981","source":"Crossref","is-referenced-by-count":43,"title":["OpticalBERT and OpticalTable-SQA: Text- and Table-Based Language Models for the Optical-Materials Domain"],"prefix":"10.1021","volume":"63","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2561-352X","authenticated-orcid":true,"given":"Jiuyang","family":"Zhao","sequence":"first","affiliation":[{"name":"Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1901-8361","authenticated-orcid":true,"given":"Shu","family":"Huang","sequence":"additional","affiliation":[{"name":"Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1552-8743","authenticated-orcid":true,"given":"Jacqueline M.","family":"Cole","sequence":"additional","affiliation":[{"name":"Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K."},{"name":"ISIS Neutron and Muon Source, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K."}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"316","published-online":{"date-parts":[[2023,3,20]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"crossref","unstructured":"Garcia de Abajo, F. J. Engineering Materials with Extreme Optical Properties. In  Proceedings of the Photonic Metamaterials: From Random to Periodic, TuA2; Optical Society of America, 2006.","DOI":"10.1364\/META.2006.TuA2"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1021\/ma502569r"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1117\/1.JBO.24.7.071606"},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.6b00207"},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c00446"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-020-00602-2"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2018.111"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01295-5"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0306-0"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01355-w"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01294-6"},{"key":"ref12\/cit12","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01301-w"},{"key":"ref13\/cit13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref14\/cit14","unstructured":"Brown, T. B.; Mann, B.; Ryder, N.; Subbiah, M.; Kaplan, J.; Dhariwal, P.; Neelakantan, A.; Shyam, P.; Sastry, G.; Askell, A.; Agarwal, S.; Herbert-Voss, A.; Krueger, G.; Henighan, T.; Child, R.; Ramesh, A.; Ziegler, D. M.; Wu, J.; Winter, C.; Hesse, C.; Chen, M.; Sigler, E.; Litwin, M.; Gray, S.; Chess, B.; Clark, J.; Berner, C.; McCandlish, S.; Radford, A.; Sutskever, I.; Amodei, D. Language Models are Few-Shot Learners. 2020, 2005.14165.  ArXiv2005.14165. https:\/\/arxiv.org\/abs\/2005.14165 (accessed 2023-03-06)."},{"key":"ref15\/cit15","unstructured":"Devlin, J.; Chang, M.; Lee, K.; Toutanova, K. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. 2018, 1810.04805.  ArXiv1810.04805. https:\/\/arxiv.org\/abs\/1810.04805 (accessed 2023-03-06)."},{"key":"ref16\/cit16","unstructured":"Lee, J.; Yoon, W.; Kim, S.; Kim, D.; Kim, S.; So, C. H.; Kang, J. BioBERT: a pre-trained biomedical language representation model for biomedical text mining. 2019, 1901.08746.  ArXiv1901.08746. https:\/\/arxiv.org\/abs\/1901.08746 (accessed 2023-03-06)."},{"key":"ref17\/cit17","unstructured":"Gupta, T.; Zaki, M.; Krishnan, N. M. A.; Mausam. MatSciBERT: A Materials Domain Language Model for Text Mining and Information Extraction. 2021, 2109.15290.  ArXiv2109.15290. https:\/\/arxiv.org\/abs\/2109.15290 (accessed 2023-03-06)."},{"key":"ref18\/cit18","doi-asserted-by":"publisher","DOI":"10.1016\/j.patter.2022.100488"},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c00035"},{"key":"ref20\/cit20","unstructured":"National Science\nand Technology\nCouncil.  Materials Genome Initiative for Global Competitiveness; Executive Office of the President, National Science\nand Technology Council, 2011."},{"key":"ref21\/cit21","unstructured":"Vakulenko, S.; Savenkov, V. TableQA: Question Answering on Tabular Data.  2017, 1705.06504.  ArXiv1705.06504. https:\/\/arxiv.org\/abs\/1705.06504 (accessed 2023-03-06)."},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.398"},{"key":"ref23\/cit23","unstructured":"Chemmengath, S. A.; Kumar, V.; Bharadwaj, S.; Sen, J.; Canim, M.; Chakrabarti, S.; Gliozzo, A.; Sankaranarayanan, K. Topic Transferable Table Question Answering. 2021, 2109.07377.  ArXiv2109.07377. https:\/\/arxiv.org\/abs\/2109.07377 (accessed 2023-03-06)."},{"key":"ref24\/cit24","unstructured":"Vaswani, A.; Shazeer, N.; Parmar, N.; Uszkoreit, J.; Jones, L.; Gomez, A. N.; Kaiser, L.; Polosukhin, I. Attention Is All You Need. 2017, 1706.03762.  ArXiv1706.03762.  https:\/\/arxiv.org\/abs\/1706.03762 (accessed 2023-03-06)."},{"key":"ref25\/cit25","unstructured":"Beltagy, I.; Cohan, A.; Lo, K. SciBERT: Pretrained Contextualized Embeddings for Scientific Text. 2019, 1903.10676.  ArXiv1903.10676. https:\/\/arxiv.org\/abs\/1903.10676 (accessed 2023-03-06)."},{"key":"ref26\/cit26","unstructured":"Wolf, T.; Debut, L.; Sanh, V.; Chaumond, J.; Delangue, C.; Moi, A.; Cistac, P.; Rault, T.; Louf, R.; Funtowicz, M.; Brew, J. HuggingFace\u2019s Transformers: State-of-the-art Natural Language Processing. 2019, 1910.03771.  ArXiv1910.03771. https:\/\/arxiv.org\/abs\/1910.03771 (accessed 2023-03-06)."},{"key":"ref27\/cit27","unstructured":"Wu, Y.; Schuster, M.; Chen, Z.; Le, Q. V.; Norouzi, M.; Macherey, W.; Krikun, M.; Cao, Y.; Gao, Q.; Macherey, K.; Klingner, J.; Shah, A.; Johnson, M.; Liu, X.; Kaiser, L.; Gouws, S.; Kato, Y.; Kudo, T.; Kazawa, H.; Stevens, K.; Kurian, G.; Patil, N.; Wang, W.; Young, C.; Smith, J.; Riesa, J.; Rudnick, A.; Vinyals, O.; Corrado, G.; Hughes, M.; Dean, J. Google\u2019s Neural Machine Translation System: Bridging the Gap between Human and Machine Translation. 2016, 1609.08144.  ArXiv1609.08144. https:\/\/arxiv.org\/abs\/1609.08144 (accessed 2023-03-06)."},{"key":"ref28\/cit28","doi-asserted-by":"publisher","DOI":"10.1080\/14786448008626877"},{"key":"ref29\/cit29","unstructured":"\u00c1cs, J. Exploring BERT\u2019s Vocabulary. http:\/\/juditacs.github.io\/2019\/02\/19\/bert-tokenization-stats.html (accessed 2022-05-30)."},{"key":"ref30\/cit30","unstructured":"Rust, P.; Pfeiffer, J.; Vulic, I.; Ruder, S.; Gurevych, I. How Good is Your Tokenizer? On the Monolingual Performance of Multilingual Language Models. 2020, 2012.15613.  ArXiv2012.15613. https:\/\/arxiv.org\/abs\/2012.15613 (accessed 2023-03-06)."},{"key":"ref31\/cit31","unstructured":"Paszke, A.; Gross, S.; Chintala, S.; Chanan, G.; Yang, E.; DeVito, Z.; Lin, Z.; Desmaison, A.; Antiga, L.; Lerer, A. Automatic Differentiation in PyTorch. In  NIPS 2017 Workshop on Autodiff ; 2017."},{"key":"ref32\/cit32","unstructured":"Liu, P.; Yuan, W.; Fu, J.; Jiang, Z.; Hayashi, H.; Neubig, G. Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. 2021, 2107.13586.  ArXiv2107.13586. https:\/\/arxiv.org\/abs\/2107.13586 (accessed 2023-03-06)."},{"key":"ref33\/cit33","unstructured":"Rajpurkar, P.; Zhang, J.; Lopyrev, K.; Liang, P. SQuAD: 100, 000+ Questions for Machine Comprehension of Text. 2016, \/1606.05250.  ArXiv1606.05250. https:\/\/arxiv.org\/abs\/1606.05250 (accessed 2023-03-06)."},{"key":"ref34\/cit34","unstructured":"Wiese, G.; Weissenborn, D.; Neves, M. L. Neural Domain Adaptation for Biomedical Question Answering. 2017, 1706.03610.  ArXiv1706.03610. https:\/\/arxiv.org\/abs\/1706.03610 (accessed 2023-03-06)."},{"key":"ref35\/cit35","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/bty449"},{"key":"ref36\/cit36","unstructured":"Wang, X.; Zhang, Y.; Ren, X.; Zhang, Y.; Zitnik, M.; Shang, J.; Langlotz, C. P.; Han, J. Cross-type Biomedical Named Entity Recognition with Deep Multi-Task Learning. 2018, 1801.09851.  ArXiv1801.09851. https:\/\/arxiv.org\/abs\/1801.09851 (accessed 2023-03-06)."},{"key":"ref37\/cit37","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.1c01199"},{"key":"ref38\/cit38","unstructured":"Sang, E. F. T. K.; Buchholz, S. Introduction to the CoNLL-2000 Shared Task: Chunking. 2000, cs.CL\/0009008.  arXiv:cs\/0009008. https:\/\/arxiv.org\/abs\/cs\/0009008 (accessed 2023-03-06)."},{"key":"ref39\/cit39","doi-asserted-by":"publisher","DOI":"10.1186\/1758-2946-7-S1-S2"},{"key":"ref40\/cit40","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.9b00470"},{"key":"ref41\/cit41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1167"},{"key":"ref42\/cit42","doi-asserted-by":"publisher","DOI":"10.21105\/joss.03021"},{"key":"ref43\/cit43","unstructured":"Liu, P.; Yuan, W.; Fu, J.; Jiang, Z.; Hayashi, H.; Neubig, G. Pre-train, Prompt, and Predict: A Systematic Survey of Prompting Methods in Natural Language Processing. 2021, abs\/2107.13586.  ArXiv2107.13586. https:\/\/arxiv.org\/abs\/2107.13586 (accessed 2023-03-06)."},{"key":"ref44\/cit44","unstructured":"Jain, S.; Wallace, B. C. Attention is not Explanation. 2019, 1902.10186.  ArXiv1902.10186.  https:\/\/arxiv.org\/abs\/1902.10186 (accessed 2023-03-06)."}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.2c01259","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.2c01259","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,10]],"date-time":"2023-04-10T08:11:03Z","timestamp":1681114263000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.2c01259"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,20]]},"references-count":44,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,4,10]]}},"alternative-id":["10.1021\/acs.jcim.2c01259"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.2c01259","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,3,20]]}}}