{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:15:55Z","timestamp":1765340155890,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":51,"publisher":"ACM","funder":[{"name":"the National Natural Science Foundation of China","award":["U22B2019"],"award-info":[{"award-number":["U22B2019"]}]},{"name":"the China Scholarship Council program","award":["202406470057"],"award-info":[{"award-number":["202406470057"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755875","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"2323-2332","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["DeepMolTex: Deep Alignment of Molecular Graphs with Large Language Models via Mixture of Modality Experts"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0586-4119","authenticated-orcid":false,"given":"Mingliang","family":"Yan","sequence":"first","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4137-2420","authenticated-orcid":false,"given":"Yanhua","family":"Yu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6541-4050","authenticated-orcid":false,"given":"Ruochi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Syneron Technology, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1206-9315","authenticated-orcid":false,"given":"Zhiyuan","family":"Liu","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-6793-1012","authenticated-orcid":false,"given":"Ruicheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Sun Yat-Sen University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7228-2279","authenticated-orcid":false,"given":"Yimeng","family":"Ren","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0983-758X","authenticated-orcid":false,"given":"Kangkang","family":"Lu","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1931-7775","authenticated-orcid":false,"given":"Zhiyong","family":"Huang","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7573-9600","authenticated-orcid":false,"given":"Feng","family":"Luo","sequence":"additional","affiliation":[{"name":"China Mobile Group Design Institute Corporation, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0723-0438","authenticated-orcid":false,"given":"Zhen","family":"Cai","sequence":"additional","affiliation":[{"name":"Beijing University of Posts and Telecommunications, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","first-page":"23716","article-title":"Flamingo: a visual language model for few-shot learning","volume":"35","author":"Alayrac Jean-Baptiste","year":"2022","unstructured":"Jean-Baptiste Alayrac, Jeff Donahue, Pauline Luc, Antoine Miech, Iain Barr, Yana Hasson, Karel Lenc, Arthur Mensch, Katherine Millican, Malcolm Reynolds, et al., 2022. Flamingo: a visual language model for few-shot learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 23716-23736.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00418-8"},{"key":"e_1_3_2_1_4_1","volume-title":"Qwen-VL: A Frontier Large Vision-Language Model with Versatile Abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-VL: A Frontier Large Vision-Language Model with Versatile Abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In IEEvaluation@ACL","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An Automatic Metric for MT Evaluation with Improved Correlation with Human Judgments. In IEEvaluation@ACL. Association for Computational Linguistics, 65-72."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the 31st International Conference on Computational Linguistics. 354-379","author":"Cao He","year":"2025","unstructured":"He Cao, Zijing Liu, Xingyu Lu, Yuan Yao, and Yu Li. 2025. InstructMol: Multi-Modal Integration for Building a Versatile and Reliable Molecular Assistant in Drug Discovery. In Proceedings of the 31st International Conference on Computational Linguistics. 354-379."},{"key":"e_1_3_2_1_7_1","volume-title":"Hight: Hierarchical graph tokenization for graph-language alignment. arXiv preprint arXiv:2406.14021","author":"Chen Yongqiang","year":"2024","unstructured":"Yongqiang Chen, Quanming Yao, Juzheng Zhang, James Cheng, and Yatao Bian. 2024. Hight: Hierarchical graph tokenization for graph-language alignment. arXiv preprint arXiv:2406.14021 (2024)."},{"key":"e_1_3_2_1_8_1","volume-title":"International Conference on Machine Learning. PMLR, 6140-6157","author":"Christofidellis Dimitrios","year":"2023","unstructured":"Dimitrios Christofidellis, Giorgio Giannone, Jannis Born, Ole Winther, Teodoro Laino, and Matteo Manica. 2023. Unifying molecular and textual representations via multi-task language modelling. In International Conference on Machine Learning. PMLR, 6140-6157."},{"key":"e_1_3_2_1_9_1","volume-title":"ICML workshop.","author":"Cao Nicola De","year":"2018","unstructured":"Nicola De Cao and Thomas Kipf. 2018. MolGAN: An implicit generative model for small molecular graphs. In ICML workshop."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1002\/cmdc.200800178"},{"key":"e_1_3_2_1_11_1","unstructured":"Xiaoyi Dong Pan Zhang Yuhang Zang Yuhang Cao Bin Wang Linke Ouyang Xilin Wei Songyang Zhang Haodong Duan Maosong Cao et al. 2024. Internlm-xcomposer2: Mastering free-form text-image composition and comprehension in vision-language large model. arXiv preprint arXiv:2401.16420 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"PaLM-E: An Embodied Multimodal Language Model. In International Conference on Machine Learning, ICML 2023","volume":"8488","author":"Driess Danny","year":"2023","unstructured":"Danny Driess, Fei Xia, Mehdi S. M. Sajjadi, Corey Lynch, Aakanksha Chowdhery, Brian Ichter, Ayzaan Wahid, Jonathan Tompson, Quan Vuong, Tianhe Yu, Wenlong Huang, Yevgen Chebotar, Pierre Sermanet, Daniel Duckworth, Sergey Levine, Vincent Vanhoucke, Karol Hausman, Marc Toussaint, Klaus Greff, Andy Zeng, Igor Mordatch, and Pete Florence. 2023. PaLM-E: An Embodied Multimodal Language Model. In International Conference on Machine Learning, ICML 2023, 23-29 July 2023, Honolulu, Hawaii, USA (Proceedings of Machine Learning Research, Vol. 202), Andreas Krause, Emma Brunskill, Kyunghyun Cho, Barbara Engelhardt, Sivan Sabato, and Jonathan Scarlett (Eds.). PMLR, 8469-8488. https:\/\/proceedings.mlr.press\/v202\/driess23a.html"},{"key":"e_1_3_2_1_13_1","unstructured":"David K Duvenaud Dougal Maclaurin Jorge Iparraguirre Rafael Bombarell Timothy Hirzel Al\u00e1n Aspuru-Guzik and Ryan P Adams. 2015. Convolutional networks on graphs for learning molecular fingerprints. In NeurIPS."},{"key":"e_1_3_2_1_14_1","volume-title":"Kevin Ros, Garrett Honke, Kyunghyun Cho, and Heng Ji.","author":"Edwards Carl","year":"2022","unstructured":"Carl Edwards, Tuan Manh Lai, Kevin Ros, Garrett Honke, Kyunghyun Cho, and Heng Ji. 2022. Translation between Molecules and Natural Language. In EMNLP, . Association for Computational Linguistics, 375-413."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Carl Edwards ChengXiang Zhai and Heng Ji. 2021. Text2mol: Cross-modal molecule retrieval with natural language queries. In EMNLP.","DOI":"10.18653\/v1\/2021.emnlp-main.47"},{"key":"e_1_3_2_1_16_1","volume-title":"Mol-Instructions: A Large-Scale Biomolecular Instruction Dataset for Large Language Models. In The Twelfth International Conference on Learning Representations, ICLR 2024","author":"Fang Yin","year":"2024","unstructured":"Yin Fang, Xiaozhuan Liang, Ningyu Zhang, Kangwei Liu, Rui Huang, Zhuo Chen, Xiaohui Fan, and Huajun Chen. 2024. Mol-Instructions: A Large-Scale Biomolecular Instruction Dataset for Large Language Models. In The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024. OpenReview.net. https:\/\/openreview.net\/forum?id=Tlsdsb6l9n"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1039\/9781849733069"},{"key":"e_1_3_2_1_18_1","volume-title":"Measuring Massive Multitask Language Understanding. In 9th International Conference on Learning Representations, ICLR 2021","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. In 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021. OpenReview.net. https:\/\/openreview.net\/forum?id=d7KBjmI3GmQ"},{"key":"e_1_3_2_1_19_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3393356"},{"key":"e_1_3_2_1_21_1","unstructured":"Qimai Li Zhichao Han and Xiao-Ming Wu. 2018. Deeper insights into graph convolutional networks for semi-supervised learning. In AAAI."},{"key":"e_1_3_2_1_22_1","unstructured":"Sihang Li Zhiyuan Liu Yanchen Luo Xiang Wang Xiangnan He Kenji Kawaguchi Tat-Seng Chua and Qi Tian. 2024c. 3D-MoLM: Towards 3D Molecule-Text Interpretation in Language Models. In ICLR . https:\/\/openreview.net\/forum?id=xI4yNlkaqh"},{"key":"e_1_3_2_1_23_1","volume-title":"FineMolTex: Towards Fine-grained Molecular Graph-Text Pre-training. arXiv preprint arXiv:2409.14106","author":"Li Yibo","year":"2024","unstructured":"Yibo Li, Yuan Fang, Mengmei Zhang, and Chuan Shi. 2024a. FineMolTex: Towards Fine-grained Molecular Graph-Text Pre-training. arXiv preprint arXiv:2409.14106 (2024)."},{"key":"e_1_3_2_1_24_1","volume-title":"DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs. ArXiv","author":"Liang Youwei","year":"2023","unstructured":"Youwei Liang, Ruiyi Zhang, Li Zhang, and Peng Xie. 2023. DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs. ArXiv, Vol. abs\/2309.03907 (2023). https:\/\/api.semanticscholar.org\/CorpusID:261660530"},{"key":"e_1_3_2_1_25_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74-81."},{"key":"e_1_3_2_1_26_1","volume-title":"Visual Instruction Tuning. In Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023b. Visual Instruction Tuning. In Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023, Alice Oh, Tristan Naumann, Amir Globerson, Kate Saenko, Moritz Hardt, and Sergey Levine (Eds.). http:\/\/papers.nips.cc\/paper_files\/paper\/2023\/hash\/6dcf277ea32ce3288914faf369fe6de0-Abstract-Conference.html"},{"key":"e_1_3_2_1_27_1","volume-title":"Git-mol: A multi-modal large language model for molecular science with graph, image, and text. Computers in biology and medicine","author":"Liu Pengfei","year":"2024","unstructured":"Pengfei Liu, Yiming Ren, Jun Tao, and Zhixiang Ren. 2024. Git-mol: A multi-modal large language model for molecular science with graph, image, and text. Computers in biology and medicine, Vol. 171 (2024), 108073."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00759-6"},{"key":"e_1_3_2_1_29_1","volume-title":"Pre-training Molecular Graph Representation with 3D Geometry. ArXiv","author":"Liu Shengchao","year":"2021","unstructured":"Shengchao Liu, Hanchen Wang, Weiyang Liu, Joan Lasenby, Hongyu Guo, and Jian Tang. 2021. Pre-training Molecular Graph Representation with 3D Geometry. ArXiv, Vol. abs\/2110.07728 (2021). https:\/\/api.semanticscholar.org\/CorpusID:239009574"},{"key":"e_1_3_2_1_30_1","volume-title":"Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. In EMNLP.","author":"Liu Zhiyuan","year":"2023","unstructured":"Zhiyuan Liu, Sihang Li, Yanchen Luo, Hao Fei, Yixin Cao, Kenji Kawaguchi, Xiang Wang, and Tat-Seng Chua. 2023a. Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. In EMNLP."},{"key":"e_1_3_2_1_31_1","volume-title":"The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=p66a00KLWN","author":"Liu Zhiyuan","year":"2025","unstructured":"Zhiyuan Liu, Yanchen Luo, Han Huang, Enzhi Zhang, Sihang Li, Junfeng Fang, Yaorui Shi, Xiang Wang, Kenji Kawaguchi, and Tat-Seng Chua. 2025. NEXT-MOL: 3D Diffusion Meets 1D Language Modeling for 3D Molecule Generation. In The Thirteenth International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=p66a00KLWN"},{"key":"e_1_3_2_1_32_1","volume-title":"Molxpt: Wrapping molecules with text for generative pre-training. In ACL.","author":"Liu Zequn","year":"2023","unstructured":"Zequn Liu, Wei Zhang, Yingce Xia, Lijun Wu, Shufang Xie, Tao Qin, Ming Zhang, and Tie-Yan Liu. 2023d. Molxpt: Wrapping molecules with text for generative pre-training. In ACL."},{"key":"e_1_3_2_1_33_1","volume-title":"Mono-internvl: Pushing the boundaries of monolithic multimodal large language models with endogenous visual pre-training. arXiv preprint arXiv:2410.08202","author":"Luo Gen","year":"2024","unstructured":"Gen Luo, Xue Yang, Wenhan Dou, Zhaokai Wang, Jifeng Dai, Yu Qiao, and Xizhou Zhu. 2024. Mono-internvl: Pushing the boundaries of monolithic multimodal large language models with endogenous visual pre-training. arXiv preprint arXiv:2410.08202 (2024)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2307.09484"},{"key":"e_1_3_2_1_35_1","volume-title":"BioMedGPT: Open Multimodal Generative Pre-trained Transformer for BioMedicine. ArXiv","author":"Luo Yi","year":"2023","unstructured":"Yi Luo, Jiahuan Zhang, Siqi Fan, Kai Yang, Yushuai Wu, Mu Qiao, and Zaiqing Nie. 2023b. BioMedGPT: Open Multimodal Generative Pre-trained Transformer for BioMedicine. ArXiv, Vol. abs\/2308.09442 (2023). https:\/\/api.semanticscholar.org\/CorpusID:261030404"},{"key":"e_1_3_2_1_36_1","first-page":"311","article-title":"Bleu: a Method for Automatic Evaluation of Machine Translation. In ACL","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a Method for Automatic Evaluation of Machine Translation. In ACL, . ACL, 311-318.","journal-title":"ACL"},{"key":"e_1_3_2_1_37_1","volume-title":"LLaMo: Large Language Model-based Molecular Graph Assistant. In Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024","author":"Park Jinyoung","year":"2024","unstructured":"Jinyoung Park, Minseong Bae, Dohwan Ko, and Hyunwoo J. Kim. 2024. LLaMo: Large Language Model-based Molecular Graph Assistant. In Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024, Amir Globersons, Lester Mackey, Danielle Belgrave, Angela Fan, Ulrich Paquet, Jakub M. Tomczak, and Cheng Zhang (Eds.). http:\/\/papers.nips.cc\/paper_files\/paper\/2024\/hash\/ee46288ab2aaf5c6e53aebebe719712c-Abstract-Conference.html"},{"key":"e_1_3_2_1_38_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In International Conference on Machine Learning. https:\/\/api.semanticscholar.org\/CorpusID:231591445"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_40_1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J. Liu. 2020b. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. J. Mach. Learn. Res., Vol. 21 (2020), 140:1-140:67.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_41_1","volume-title":"A molecular multimodal foundation model associating molecule graphs with natural language. arXiv:2209.05481","author":"Su Bing","year":"2022","unstructured":"Bing Su, Dazhao Du, Zhao Yang, Yujie Zhou, Jiangmeng Li, Anyi Rao, Hao Sun, Zhiwu Lu, and Ji-Rong Wen. 2022. A molecular multimodal foundation model associating molecule graphs with natural language. arXiv:2209.05481 (2022)."},{"key":"e_1_3_2_1_42_1","volume-title":"Galactica: A large language model for science. arXiv:2211.09085","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A large language model for science. arXiv:2211.09085 (2022)."},{"key":"e_1_3_2_1_43_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth et al. 2023. Gemini: a family of highly capable multimodal models. arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_44_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_45_1","first-page":"121475","article-title":"Cogvlm: Visual expert for pretrained language models","volume":"37","author":"Wang Weihan","year":"2024","unstructured":"Weihan Wang, Qingsong Lv, Wenmeng Yu, Wenyi Hong, Ji Qi, Yan Wang, Junhui Ji, Zhuoyi Yang, Lei Zhao, Song XiXuan, et al., 2024. Cogvlm: Visual expert for pretrained language models. Advances in Neural Information Processing Systems, Vol. 37 (2024), 121475-121499.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_46_1","volume-title":"mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality. ArXiv","author":"Ye Qinghao","year":"2023","unstructured":"Qinghao Ye, Haiyang Xu, Guohai Xu, Jiabo Ye, Ming Yan, Yi Zhou, Junyan Wang, Anwen Hu, Pengcheng Shi, Yaya Shi, Chenliang Li, Yuanhong Xu, Hehong Chen, Junfeng Tian, Qiang Qi, Ji Zhang, and Feiyan Huang. 2023. mPLUG-Owl: Modularization Empowers Large Language Models with Multimodality. ArXiv, Vol. abs\/2304.14178 (2023). https:\/\/api.semanticscholar.org\/CorpusID:258352455"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-022-28494-3"},{"key":"e_1_3_2_1_48_1","volume-title":"Unimot: Unified molecule-text language model with discrete token representation. arXiv preprint arXiv:2408.00863","author":"Zhang Juzheng","year":"2024","unstructured":"Juzheng Zhang, Yatao Bian, Yongqiang Chen, and Quanming Yao. 2024. Unimot: Unified molecule-text language model with discrete token representation. arXiv preprint arXiv:2408.00863 (2024)."},{"key":"e_1_3_2_1_49_1","first-page":"15870","article-title":"Motif-based graph self-supervised learning for molecular property prediction","volume":"34","author":"Zhang Zaixi","year":"2021","unstructured":"Zaixi Zhang, Qi Liu, Hao Wang, Chengqiang Lu, and Chee-Kong Lee. 2021. Motif-based graph self-supervised learning for molecular property prediction. Advances in Neural Information Processing Systems, Vol. 34 (2021), 15870-15882.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_50_1","volume-title":"Gimlet: A unified graph-text model for instruction-based molecule zero-shot learning. In NeurIPS.","author":"Zhao Haiteng","year":"2023","unstructured":"Haiteng Zhao, Shengchao Liu, Ma Chang, Hannan Xu, Jie Fu, Zhihong Deng, Lingpeng Kong, and Qi Liu. 2023. Gimlet: A unified graph-text model for instruction-based molecule zero-shot learning. In NeurIPS."},{"key":"e_1_3_2_1_51_1","volume-title":"ChemDFM: A Large Language Foundation Model for Chemistry. In Neurips 2024 Workshop Foundation Models for Science: Progress, Opportunities, and Challenges.","author":"Zhao Zihan","year":"2024","unstructured":"Zihan Zhao, Da Ma, Lu Chen, Liangtai Sun, Zihao Li, Yi Xia, Hongshen Xu, Zichen Zhu, Su Zhu, Shuai Fan, et al., 2024. ChemDFM: A Large Language Foundation Model for Chemistry. In Neurips 2024 Workshop Foundation Models for Science: Progress, Opportunities, and Challenges."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755875","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:13:40Z","timestamp":1765340020000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755875"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":51,"alternative-id":["10.1145\/3746027.3755875","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755875","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}