{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T20:01:13Z","timestamp":1774641673656,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T00:00:00Z","timestamp":1709510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62020106005,61960206002,42050105,62061146002,62106143"],"award-info":[{"award-number":["No.62020106005,61960206002,42050105,62061146002,62106143"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Pilot Program for Basic Research"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,3,4]]},"DOI":"10.1145\/3616855.3635772","type":"proceedings-article","created":{"date-parts":[[2024,3,4]],"date-time":"2024-03-04T18:18:12Z","timestamp":1709576292000},"page":"161-170","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":63,"title":["K2: A Foundation Language Model for Geoscience Knowledge Understanding and Utilization"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3171-823X","authenticated-orcid":false,"given":"Cheng","family":"Deng","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6234-4409","authenticated-orcid":false,"given":"Tianhang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5863-5370","authenticated-orcid":false,"given":"Zhongmou","family":"He","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7349-4546","authenticated-orcid":false,"given":"Qiyuan","family":"Chen","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-0791-015X","authenticated-orcid":false,"given":"Yuanyuan","family":"Shi","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5280-6132","authenticated-orcid":false,"given":"Yi","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7796-9168","authenticated-orcid":false,"given":"Luoyi","family":"Fu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0127-2425","authenticated-orcid":false,"given":"Weinan","family":"Zhang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0357-8356","authenticated-orcid":false,"given":"Xinbing","family":"Wang","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3331-2302","authenticated-orcid":false,"given":"Chenghu","family":"Zhou","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University &amp; Institute of Geographical Science an Natural Resources Research, Chinese Academy of Sciences, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7204-0689","authenticated-orcid":false,"given":"Zhouhan","family":"Lin","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-9559-6941","authenticated-orcid":false,"given":"Junxian","family":"He","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2024,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"SciBERT: A Pretrained Language Model for Scientific Text. In Conference on Empirical Methods in Natural Language Processing.","author":"Beltagy Iz","year":"2019","unstructured":"Iz Beltagy, Kyle Lo, and Arman Cohan. 2019. SciBERT: A Pretrained Language Model for Scientific Text. In Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1130\/SPE501"},{"key":"e_1_3_2_1_3_1","volume-title":"Xing","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/vicuna.lmsys.org"},{"key":"e_1_3_2_1_4_1","unstructured":"Hyung Won Chung Le Hou S. Longpre Barret Zoph Yi Tay William Fedus Eric Li Xuezhi Wang Mostafa Dehghani Siddhartha Brahma Albert Webson Shixiang Shane Gu Zhuyun Dai Mirac Suzgun Xinyun Chen Aakanksha Chowdhery Dasha Valter Sharan Narang Gaurav Mishra Adams Wei Yu Vincent Zhao Yanping Huang Andrew M. Dai Hongkun Yu Slav Petrov Ed Huai hsin Chi Jeff Dean Jacob Devlin Adam Roberts Denny Zhou Quoc V. Le and Jason Wei. 2022. Scaling Instruction-Finetuned Language Models. ArXiv Vol. abs\/2210.11416 (2022)."},{"key":"e_1_3_2_1_5_1","volume-title":"Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge. ArXiv","author":"Clark Peter","year":"2018","unstructured":"Peter Clark, Isaac Cowhey, Oren Etzioni, Tushar Khot, Ashish Sabharwal, Carissa Schoenick, and Oyvind Tafjord. 2018. Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge. ArXiv , Vol. abs\/1803.05457 (2018)."},{"key":"e_1_3_2_1_6_1","volume-title":"Hello Dolly: Democratizing the magic of ChatGPT with open models. https:\/\/www.databricks.com\/blog\/2023\/03\/24\/hello-dolly-democratizing-magic-chatgpt-open-models.html","year":"2023","unstructured":"Databricks. 2023. Hello Dolly: Democratizing the magic of ChatGPT with open models. https:\/\/www.databricks.com\/blog\/2023\/03\/24\/hello-dolly-democratizing-magic-chatgpt-open-models.html"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482003"},{"key":"e_1_3_2_1_8_1","volume-title":"PK-Chat: Pointer Network Guided Knowledge Driven Generative Dialogue Model. arXiv preprint arXiv:2304.00592","author":"Deng Cheng","year":"2023","unstructured":"Cheng Deng, Bo Tong, Luoyi Fu, Jiaxin Ding, Dexing Cao, Xinbing Wang, and Chenghu Zhou. 2023. PK-Chat: Pointer Network Guided Knowledge Driven Generative Dialogue Model. arXiv preprint arXiv:2304.00592 (2023)."},{"key":"e_1_3_2_1_9_1","volume-title":"November 17, 2021","author":"Denli Huseyin","year":"2021","unstructured":"Huseyin Denli, HassanJaved Chughtai, Brian Hughes, Robert Gistri, and Peng Xu. 2021. Geoscience Language Processing for Exploration. Day 3 Wed, November 17, 2021 (2021)."},{"key":"e_1_3_2_1_10_1","volume-title":"A Multi-Modal Geographic Pre-Training Method. ArXiv","author":"Ding Ruixue","year":"2023","unstructured":"Ruixue Ding, Boli Chen, Pengjun Xie, Fei Huang, Xin Li, Qiang-Wei Zhang, and Yao Xu. 2023. A Multi-Modal Geographic Pre-Training Method. ArXiv , Vol. abs\/2301.04283 (2023)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10115-020-01532-6"},{"key":"e_1_3_2_1_12_1","volume-title":"GPTScore: Evaluate as You Desire. ArXiv","author":"Fu Jinlan","year":"2023","unstructured":"Jinlan Fu, See-Kiong Ng, Zhengbao Jiang, and Pengfei Liu. 2023. GPTScore: Evaluate as You Desire. ArXiv , Vol. abs\/2302.04166 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Sid Black, Laurence Golding, Travis Hoppe, Charles Foster, Jason Phang, Horace He, Anish Thite, Noa Nabeshima, Shawn Presser, and Connor Leahy.","author":"Gao Leo","year":"2020","unstructured":"Leo Gao, Stella Rose Biderman, Sid Black, Laurence Golding, Travis Hoppe, Charles Foster, Jason Phang, Horace He, Anish Thite, Noa Nabeshima, Shawn Presser, and Connor Leahy. 2020. The Pile: An 800GB Dataset of Diverse Text for Language Modeling. ArXiv , Vol. abs\/2101.00027 (2020)."},{"key":"e_1_3_2_1_14_1","volume-title":"Koala: A Dialogue Model for Academic Research. Blog post. https:\/\/bair.berkeley.edu\/blog\/2023\/04\/03\/koala\/","author":"Geng Xinyang","year":"2023","unstructured":"Xinyang Geng, Arnav Gudibande, Hao Liu, Eric Wallace, Pieter Abbeel, Sergey Levine, and Dawn Song. 2023. Koala: A Dialogue Model for Academic Research. Blog post. https:\/\/bair.berkeley.edu\/blog\/2023\/04\/03\/koala\/"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2305016120"},{"key":"e_1_3_2_1_16_1","volume-title":"MatSciBERT: A materials domain language model for text mining and information extraction. npj Computational Materials","author":"Gupta Tanishq","year":"2021","unstructured":"Tanishq Gupta, Mohd Zaki, N. Krishnan, and Mausam. 2021. MatSciBERT: A materials domain language model for text mining and information extraction. npj Computational Materials , Vol. 8 (2021), 1--11."},{"key":"e_1_3_2_1_17_1","volume-title":"Parameter-Efficient Transfer Learning for NLP. In International Conference on Machine Learning.","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin de Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-Efficient Transfer Learning for NLP. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_18_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. ArXiv","author":"Hu Edward J.","year":"2021","unstructured":"Edward J. Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. ArXiv , Vol. abs\/2106.09685 (2021)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539021"},{"key":"e_1_3_2_1_20_1","volume-title":"Scaling Laws for Neural Language Models. ArXiv","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, T. J. Henighan, Tom B. Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeff Wu, and Dario Amodei. 2020. Scaling Laws for Neural Language Models. ArXiv , Vol. abs\/2001.08361 (2020)."},{"key":"e_1_3_2_1_21_1","volume-title":"Dobson","author":"Kraljevic Zeljko","year":"2021","unstructured":"Zeljko Kraljevic, Anthony Shek, Daniel M Bean, Rebecca Bendayan, James T. H. Teo, and Richard J. B. Dobson. 2021. MedGPT: Medical Concept Prediction from Clinical Narratives. ArXiv , Vol. abs\/2107.03134 (2021)."},{"key":"e_1_3_2_1_22_1","volume-title":"The Power of Scale for Parameter-Efficient Prompt Tuning. ArXiv","author":"Lester Brian","year":"2021","unstructured":"Brian Lester, Rami Al-Rfou, and Noah Constant. 2021. The Power of Scale for Parameter-Efficient Prompt Tuning. ArXiv , Vol. abs\/2104.08691 (2021)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"e_1_3_2_1_24_1","volume-title":"Geo-BERT Pre-training Model for Query Rewriting in POI Search. In Conference on Empirical Methods in Natural Language Processing.","author":"Liu Xiao","year":"2021","unstructured":"Xiao Liu, Juan Hu, Qi Shen, and Huan Chen. 2021. Geo-BERT Pre-training Model for Query Rewriting in POI Search. In Conference on Empirical Methods in Natural Language Processing."},{"key":"e_1_3_2_1_25_1","volume-title":"Yi Tay, Denny Zhou, Quoc V. Le, Barret Zoph, Jason Wei, and Adam Roberts.","author":"Longpre S.","year":"2023","unstructured":"S. Longpre, Le Hou, Tu Vu, Albert Webson, Hyung Won Chung, Yi Tay, Denny Zhou, Quoc V. Le, Barret Zoph, Jason Wei, and Adam Roberts. 2023. The Flan Collection: Designing Data and Methods for Effective Instruction Tuning. ArXiv , Vol. abs\/2301.13688 (2023)."},{"key":"e_1_3_2_1_26_1","volume-title":"DataExpo: A One-Stop Dataset Service for Open Science Research. Companion Proceedings of the ACM Web Conference 2023","author":"Lu Bin","year":"2023","unstructured":"Bin Lu, Lyuwen Wu, Lina Yang, Chenxing Sun, Wei Liu, Xiaoying Gan, Shiyu Liang, Luoyi Fu, Xinbing Wang, and Cheng Zhou. 2023. DataExpo: A One-Stop Dataset Service for Open Science Research. Companion Proceedings of the ACM Web Conference 2023 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"BioGPT: Generative Pre-trained Transformer for Biomedical Text Generation and Mining. Briefings in bioinformatics","author":"Luo Renqian","year":"2022","unstructured":"Renqian Luo, Liai Sun, Yingce Xia, Tao Qin, Sheng Zhang, Hoifung Poon, and Tie-Yan Liu. 2022. BioGPT: Generative Pre-trained Transformer for Biomedical Text Generation and Mining. Briefings in bioinformatics (2022)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/s12145-021-00695-2"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cageo.2020.104620"},{"key":"e_1_3_2_1_30_1","volume-title":"On the Opportunities and Challenges of Foundation Models for Geospatial Artificial Intelligence. ArXiv","author":"Mai Gengchen","year":"2023","unstructured":"Gengchen Mai, Weiming Huang, Jin Sun, Suhang Song, Deepak Mishra, Ninghao Liu, Song Gao, Tianming Liu, G. Cong, Yingjie Hu, Chris Cundy, Ziyuan Li, Rui Zhu, and Ni Lao. 2023. On the Opportunities and Challenges of Foundation Models for Geospatial Artificial Intelligence. ArXiv , Vol. abs\/2304.06798 (2023)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1080\/13658816.2021.2004602"},{"key":"e_1_3_2_1_32_1","volume-title":"Natural Instructions: Benchmarking Generalization to New Tasks from Natural Language Instructions. arXiv preprint arXiv:2104.08773","author":"Mishra Swaroop","year":"2021","unstructured":"Swaroop Mishra, Daniel Khashabi, Chitta Baral, and Hannaneh Hajishirzi. 2021. Natural Instructions: Benchmarking Generalization to New Tasks from Natural Language Instructions. arXiv preprint arXiv:2104.08773 (2021)."},{"key":"e_1_3_2_1_33_1","unstructured":"Erik Nijkamp Bo Pang Hiroaki Hayashi Lifu Tu Haiquan Wang Yingbo Zhou Silvio Savarese and Caiming Xiong. 2022. CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis."},{"key":"e_1_3_2_1_34_1","volume-title":"https:\/\/openai.com\/blog\/chatgpt","author":"Introducing AI.","year":"2022","unstructured":"OpenAI. 2022. Introducing ChatGPT. (2022). https:\/\/openai.com\/blog\/chatgpt"},{"key":"e_1_3_2_1_35_1","unstructured":"OpenAI. 2023. GPT-4 Technical Report. ArXiv Vol. abs\/2303.08774 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Word embeddings for application in geosciences: development, evaluation, and examples of soil-related concepts. SOIL","author":"Padarian Jos\u00e9","year":"2019","unstructured":"Jos\u00e9 Padarian and Ignacio Fuentes. 2019. Word embeddings for application in geosciences: development, evaluation, and examples of soil-related concepts. SOIL (2019)."},{"key":"e_1_3_2_1_37_1","volume-title":"ToolLLM: Facilitating Large Language Models to Master 16000 Real-world APIs. ArXiv","author":"Qin Yujia","year":"2023","unstructured":"Yujia Qin, Shi Liang, Yining Ye, Kunlun Zhu, Lan Yan, Ya-Ting Lu, Yankai Lin, Xin Cong, Xiangru Tang, Bill Qian, Sihan Zhao, Runchu Tian, Ruobing Xie, Jie Zhou, Marc H. Gerstein, Dahai Li, Zhiyuan Liu, and Maosong Sun. 2023. ToolLLM: Facilitating Large Language Models to Master 16000 Real-world APIs. ArXiv , Vol. abs\/2307.16789 (2023). https:\/\/api.semanticscholar.org\/CorpusID:260334759"},{"key":"e_1_3_2_1_38_1","unstructured":"Alec Radford Jeff Wu Rewon Child David Luan Dario Amodei and Ilya Sutskever. 2019. Language Models are Unsupervised Multitask Learners. (2019)."},{"key":"e_1_3_2_1_39_1","volume-title":"Liu","author":"Raffel Colin","year":"2019","unstructured":"Colin Raffel, Noam M. Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2019. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. ArXiv , Vol. abs\/1910.10683 (2019)."},{"key":"e_1_3_2_1_40_1","unstructured":"Victor Sanh Albert Webson Colin Raffel Stephen H. Bach Lintang Sutawika Zaid Alyafeai Antoine Chaffin Arnaud Stiegler Teven Le Scao Arun Raja Manan Dey M Saiful Bari Canwen Xu Urmish Thakker Shanya Sharma Eliza Szczechla Taewoon Kim Gunjan Chhablani Nihal V. Nayak Debajyoti Datta Jonathan Chang Mike Tian-Jian Jiang Han Wang Matteo Manica Sheng Shen Zheng Xin Yong Harshit Pandey Rachel Bawden Thomas Wang Trishala Neeraj Jos Rozen Abheesht Sharma Andrea Santilli Thibault F\u00e9vry Jason Alan Fries Ryan Teehan Stella Rose Biderman Leo Gao Tali Bers Thomas Wolf and Alexander M. Rush. 2021. Multitask Prompted Training Enables Zero-Shot Task Generalization. ArXiv Vol. abs\/2110.08207 (2021)."},{"key":"e_1_3_2_1_41_1","volume-title":"Bio-Megatron: Larger Biomedical Domain Language Model. ArXiv","author":"Shin Hoo-Chang","year":"2020","unstructured":"Hoo-Chang Shin, Yang Zhang, Evelina Bakhturina, Raul Puri, Mostofa Patwary, Mohammad Shoeybi, and Raghav Mani. 2020. Bio-Megatron: Larger Biomedical Domain Language Model. ArXiv , Vol. abs\/2010.06060 (2020)."},{"key":"e_1_3_2_1_42_1","unstructured":"K. Singhal Shekoofeh Azizi Tao Tu Said Mahdavi Jason Lee Kai Wei Hyung Won Chung Nathan Scales Ajay Kumar Tanwani Heather J. Cole-Lewis Stephen J. Pfohl P A Payne Martin G. Seneviratne Paul Gamble Chris Kelly Nathaneal Scharli Aakanksha Chowdhery P. A. Mansfield Blaise Ag\u00fcera y Arcas Dale R. Webster Greg S. Corrado Y. Matias Katherine Hui-Ling Chou Juraj Gottweis Nenad Tomavsev Yun Liu Alvin Rajkomar Jo\u00eblle K. Barral Christopher Semturs Alan Karthikesalingam and Vivek Natarajan. 2022. Large Language Models Encode Clinical Knowledge. ArXiv Vol. abs\/2212.13138 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"Hashimoto","author":"Taori Rohan","year":"2023","unstructured":"Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li, Carlos Guestrin, Percy Liang, and Tatsunori B. Hashimoto. 2023. Stanford Alpaca: An Instruction-following LLaMA model. https:\/\/github.com\/tatsu-lab\/stanford_alpaca."},{"key":"e_1_3_2_1_44_1","volume-title":"Galactica: A Large Language Model for Science. ArXiv","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony S. Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A Large Language Model for Science. ArXiv , Vol. abs\/2211.09085 (2022)."},{"key":"e_1_3_2_1_45_1","unstructured":"MosaicML NLP Team. 2023. Introducing MPT-7B: A New Standard for Open-Source ly Usable LLMs. (2023). www.mosaicml.com\/blog\/mpt-7b"},{"key":"e_1_3_2_1_46_1","volume-title":"LLaMA: Open and Efficient Foundation Language Models. ArXiv","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, Aur'elien Rodriguez, Armand Joulin, Edouard Grave, and Guillaume Lample. 2023. LLaMA: Open and Efficient Foundation Language Models. ArXiv , Vol. abs\/2302.13971 (2023)."},{"key":"e_1_3_2_1_47_1","volume-title":"Pre-trained Language Models in Biomedical Domain: A Systematic Survey. ArXiv","author":"Wang Benyou","year":"2021","unstructured":"Benyou Wang, Qianqian Xie, Jiahuan Pei, Prayag Tiwari, Zhao Li, and Jie Fu. 2021b. Pre-trained Language Models in Biomedical Domain: A Systematic Survey. ArXiv , Vol. abs\/2110.05006 (2021)."},{"key":"e_1_3_2_1_48_1","volume-title":"Schiffries","author":"Wang Chengshan","year":"2021","unstructured":"Chengshan Wang, Robert M. Hazen, Qiuming Cheng, Michael H. Stephenson, Chenghu Zhou, Peter A. Fox, Shu'zhong Shen, Roland Oberh\"ansli, Zeng'qian Hou, Xiaogang Ma, Zhiqiang Feng, Junxuan Fan, Chao Ma, Xiumian Hu, Bin Luo, Juanle Wang, and Craig M. Schiffries. 2021a. The Deep-Time Digital Earth program: data-driven discovery in geosciences. National Science Review , Vol. 8 (2021)."},{"key":"e_1_3_2_1_49_1","volume-title":"Self-Instruct: Aligning Language Model with Self Generated Instructions. ArXiv","author":"Wang Yizhong","year":"2022","unstructured":"Yizhong Wang, Yeganeh Kordi, Swaroop Mishra, Alisa Liu, Noah A. Smith, Daniel Khashabi, and Hannaneh Hajishirzi. 2022. Self-Instruct: Aligning Language Model with Self Generated Instructions. ArXiv , Vol. abs\/2212.10560 (2022)."},{"key":"e_1_3_2_1_50_1","volume-title":"F. Xia, Quoc Le, and Denny Zhou.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Ed Huai hsin Chi, F. Xia, Quoc Le, and Denny Zhou. 2022. Chain of Thought Prompting Elicits Reasoning in Large Language Models. ArXiv , Vol. abs\/2201.11903 (2022). https:\/\/api.semanticscholar.org\/CorpusID:246411621"},{"key":"e_1_3_2_1_51_1","volume-title":"2023 a. Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data. ArXiv","author":"Xu Canwen","year":"2023","unstructured":"Canwen Xu, Daya Guo, Nan Duan, and Julian McAuley. 2023 a. Baize: An Open-Source Chat Model with Parameter-Efficient Tuning on Self-Chat Data. ArXiv , Vol. abs\/2304.01196 (2023)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.727"},{"key":"e_1_3_2_1_53_1","volume-title":"reStructured Pre-training. ArXiv","author":"Yuan Weizhe","year":"2022","unstructured":"Weizhe Yuan and Pengfei Liu. 2022. reStructured Pre-training. ArXiv , Vol. abs\/2206.11147 (2022)."},{"key":"e_1_3_2_1_54_1","volume-title":"Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, P. Zhang, Yuxiao Dong, and Jie Tang.","author":"Zeng Aohan","year":"2022","unstructured":"Aohan Zeng, Xiao Liu, Zhengxiao Du, Zihan Wang, Hanyu Lai, Ming Ding, Zhuoyi Yang, Yifan Xu, Wendi Zheng, Xiao Xia, Weng Lam Tam, Zixuan Ma, Yufei Xue, Jidong Zhai, Wenguang Chen, P. Zhang, Yuxiao Dong, and Jie Tang. 2022. GLM-130B: An Open Bilingual Pre-trained Model. ArXiv , Vol. abs\/2210.02414 (2022)."},{"key":"e_1_3_2_1_55_1","volume-title":"DeepShovel: An Online Collaborative Platform for Data Extraction in Geoscience Literature with AI Assistance. ArXiv","author":"Zhang Shao","year":"2022","unstructured":"Shao Zhang, Yuting Jia, Hui Xu, Ying Wen, Dakuo Wang, and Xinbing Wang. 2022. DeepShovel: An Online Collaborative Platform for Data Extraction in Geoscience Literature with AI Assistance. ArXiv , Vol. abs\/2202.10163 (2022). https:\/\/api.semanticscholar.org\/CorpusID:247011979 io"}],"event":{"name":"WSDM '24: The 17th ACM International Conference on Web Search and Data Mining","location":"Merida Mexico","acronym":"WSDM '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 17th ACM International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635772","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3616855.3635772","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:53:18Z","timestamp":1755823998000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3616855.3635772"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,3,4]]},"references-count":55,"alternative-id":["10.1145\/3616855.3635772","10.1145\/3616855"],"URL":"https:\/\/doi.org\/10.1145\/3616855.3635772","relation":{},"subject":[],"published":{"date-parts":[[2024,3,4]]},"assertion":[{"value":"2024-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}