{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:15:38Z","timestamp":1765340138816,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":49,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62276245"],"award-info":[{"award-number":["62276245"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3754578","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:47:18Z","timestamp":1761374838000},"page":"2516-2525","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["CROP: Integrating Topological and Spatial Structures via Cross-View Prefixes for Molecular LLMs"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-8065-0993","authenticated-orcid":false,"given":"Jianting","family":"Tang","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China and State Key Laboratory of Cognitive Intelligence, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2205-2100","authenticated-orcid":false,"given":"Yubo","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China and State Key Laboratory of Cognitive Intelligence, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3789-9705","authenticated-orcid":false,"given":"Haoyu","family":"Cao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China and State Key Laboratory of Cognitive Intelligence, Hefei, Anhui, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0227-3793","authenticated-orcid":false,"given":"Linli","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China and State Key Laboratory of Cognitive Intelligence, Hefei, Anhui, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-020-00456-1"},{"key":"e_1_3_2_2_2_1","volume-title":"Instructmol: Multimodal integration for building a versatile and reliable molecular assistant in drug discovery. arXiv preprint arXiv:2311.16208","author":"Cao He","year":"2023","unstructured":"He Cao, Zijing Liu, Xingyu Lu, Yuan Yao, and Yu Li. 2023. Instructmol: Multimodal integration for building a versatile and reliable molecular assistant in drug discovery. arXiv preprint arXiv:2311.16208 (2023)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5747"},{"key":"e_1_3_2_2_4_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783 (2024)."},{"key":"e_1_3_2_2_5_1","volume-title":"Translation between molecules and natural language. arXiv preprint arXiv:2204.11817","author":"Edwards Carl","year":"2022","unstructured":"Carl Edwards, Tuan Lai, Kevin Ros, Garrett Honke, Kyunghyun Cho, and Heng Ji. 2022. Translation between molecules and natural language. arXiv preprint arXiv:2204.11817 (2022)."},{"key":"e_1_3_2_2_6_1","volume-title":"Translation between molecules and natural language. arXiv preprint arXiv:2204.11817","author":"Edwards Carl","year":"2022","unstructured":"Carl Edwards, Tuan Lai, Kevin Ros, Garrett Honke, Kyunghyun Cho, and Heng Ji. 2022. Translation between molecules and natural language. arXiv preprint arXiv:2204.11817 (2022)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"crossref","unstructured":"Ray F Egerton et al. 2005. Physical principles of electron microscopy. Vol. 56. Springer.","DOI":"10.1007\/b136495"},{"key":"e_1_3_2_2_8_1","volume-title":"Moltc: Towards molecular relational modeling in language models. arXiv preprint arXiv:2402.03781","author":"Fang Junfeng","year":"2024","unstructured":"Junfeng Fang, Shuai Zhang, Chang Wu, Zhiyuan Liu, Sihang Li, Kun Wang, Wenjie Du, Xiang Wang, and Xiangnan He. 2024. Moltc: Towards molecular relational modeling in language models. arXiv preprint arXiv:2402.03781 (2024)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1039\/9781849733069"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41570-022-00416-3"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.8b00338"},{"key":"e_1_3_2_2_12_1","volume-title":"Chemception: a deep neural network with minimal chemistry knowledge matches the performance of expert-developed QSAR\/QSPR models. arXiv preprint arXiv:1706.06689","author":"Goh Garrett B","year":"2017","unstructured":"Garrett B Goh, Charles Siegel, Abhinav Vishnu, Nathan O Hodas, and Nathan Baker. 2017. Chemception: a deep neural network with minimal chemistry knowledge matches the performance of expert-developed QSAR\/QSPR models. arXiv preprint arXiv:1706.06689 (2017)."},{"key":"e_1_3_2_2_13_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i21.30570"},{"key":"e_1_3_2_2_15_1","first-page":"2268","article-title":"Not too little, not too much: a theoretical analysis of graph (over) smoothing","volume":"35","author":"Keriven Nicolas","year":"2022","unstructured":"Nicolas Keriven. 2022. Not too little, not too much: a theoretical analysis of graph (over) smoothing. Advances in Neural Information Processing Systems 35 (2022), 2268--2281.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/aba947"},{"key":"e_1_3_2_2_17_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_2_18_1","volume-title":"Empowering molecule discovery for molecule-caption translation with large language models: A chatgpt perspective","author":"Li Jiatong","year":"2024","unstructured":"Jiatong Li, Yunqing Liu, Wenqi Fan, Xiao-Yong Wei, Hui Liu, Jiliang Tang, and Qing Li. 2024. Empowering molecule discovery for molecule-caption translation with large language models: A chatgpt perspective. IEEE Transactions on Knowledge and Data Engineering (2024)."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.drudis.2022.103373"},{"key":"e_1_3_2_2_20_1","volume-title":"DrugChat: towards enabling ChatGPT-like capabilities on drug molecule graphs. arXiv preprint arXiv:2309.03907","author":"Liang Youwei","year":"2023","unstructured":"Youwei Liang, Ruiyi Zhang, Li Zhang, and Pengtao Xie. 2023. DrugChat: towards enabling ChatGPT-like capabilities on drug molecule graphs. arXiv preprint arXiv:2309.03907 (2023)."},{"key":"e_1_3_2_2_21_1","volume-title":"Visual Instruction Tuning. arXiv preprint arXiv:2304.08485","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, QingyangWu, and Yong Jae Lee. 2023. Visual Instruction Tuning. arXiv preprint arXiv:2304.08485 (2023)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2024.108073"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00759-6"},{"key":"e_1_3_2_2_24_1","volume-title":"Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. arXiv preprint arXiv:2310.12798","author":"Liu Zhiyuan","year":"2023","unstructured":"Zhiyuan Liu, Sihang Li, Yanchen Luo, Hao Fei, Yixin Cao, Kenji Kawaguchi, Xiang Wang, and Tat-Seng Chua. 2023. Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. arXiv preprint arXiv:2310.12798 (2023)."},{"key":"e_1_3_2_2_25_1","volume-title":"Molxpt: Wrapping molecules with text for generative pre-training. arXiv preprint arXiv:2305.10688","author":"Liu Zequn","year":"2023","unstructured":"Zequn Liu,Wei Zhang, Yingce Xia, LijunWu, Shufang Xie, Tao Qin, Ming Zhang, and Tie-Yan Liu. 2023. Molxpt: Wrapping molecules with text for generative pre-training. arXiv preprint arXiv:2305.10688 (2023)."},{"key":"e_1_3_2_2_26_1","unstructured":"OpenAI. 2024. Hello GPT-4o."},{"key":"e_1_3_2_2_27_1","volume-title":"Biot5: Enriching cross-modal integration in biology with chemical knowledge and natural language associations. arXiv preprint arXiv:2310.07276","author":"Pei Qizhi","year":"2023","unstructured":"Qizhi Pei, Wei Zhang, Jinhua Zhu, Kehan Wu, Kaiyuan Gao, Lijun Wu, Yingce Xia, and Rui Yan. 2023. Biot5: Enriching cross-modal integration in biology with chemical knowledge and natural language associations. arXiv preprint arXiv:2310.07276 (2023)."},{"key":"e_1_3_2_2_28_1","volume-title":"Self-supervised graph transformer on large-scale molecular data. Advances in neural information processing systems 33","author":"Rong Yu","year":"2020","unstructured":"Yu Rong, Yatao Bian, Tingyang Xu, Weiyang Xie, Ying Wei, Wenbing Huang, and Junzhou Huang. 2020. Self-supervised graph transformer on large-scale molecular data. Advances in neural information processing systems 33 (2020), 12559--12571."},{"key":"e_1_3_2_2_29_1","volume-title":"Markus Hagenbuchner, and Gabriele Monfardini.","author":"Scarselli Franco","year":"2008","unstructured":"Franco Scarselli, Marco Gori, Ah Chung Tsoi, Markus Hagenbuchner, and Gabriele Monfardini. 2008. The graph neural network model. IEEE transactions on neural networks 20, 1 (2008), 61--80."},{"key":"e_1_3_2_2_30_1","volume-title":"A molecular multimodal foundation model associating molecule graphs with natural language. arXiv preprint arXiv:2209.05481","author":"Su Bing","year":"2022","unstructured":"Bing Su, Dazhao Du, Zhao Yang, Yujie Zhou, Jiangmeng Li, Anyi Rao, Hao Sun, Zhiwu Lu, and Ji-Rong Wen. 2022. A molecular multimodal foundation model associating molecule graphs with natural language. arXiv preprint arXiv:2209.05481 (2022)."},{"key":"e_1_3_2_2_31_1","volume-title":"Galactica: A large language model for science. arXiv preprint arXiv:2211.09085","author":"Taylor Ross","year":"2022","unstructured":"Ross Taylor, Marcin Kardas, Guillem Cucurull, Thomas Scialom, Anthony Hartshorn, Elvis Saravia, Andrew Poulton, Viktor Kerkez, and Robert Stojnic. 2022. Galactica: A large language model for science. arXiv preprint arXiv:2211.09085 (2022)."},{"key":"e_1_3_2_2_32_1","first-page":"4","article-title":"Deep graph infomax","volume":"2","author":"Velickovic Petar","year":"2019","unstructured":"Petar Velickovic, William Fedus, William L Hamilton, Pietro Li\u00f2, Yoshua Bengio, and R Devon Hjelm. 2019. Deep graph infomax. ICLR (Poster) 2, 3 (2019), 4.","journal-title":"ICLR (Poster)"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1021\/ci00057a005"},{"key":"e_1_3_2_2_34_1","volume-title":"MoleculeNet: a benchmark for molecular machine learning. Chemical science 9, 2","author":"Wu Zhenqin","year":"2018","unstructured":"Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh S Pappu, Karl Leswing, and Vijay Pande. 2018. MoleculeNet: a benchmark for molecular machine learning. Chemical science 9, 2 (2018), 513--530."},{"key":"e_1_3_2_2_35_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Xia Jun","year":"2022","unstructured":"Jun Xia, Chengshuai Zhao, Bozhen Hu, Zhangyang Gao, Cheng Tan, Yue Liu, Siyuan Li, and Stan Z Li. 2022. Mole-bert: Rethinking pre-training graph neural networks for molecules. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_2_36_1","volume-title":"Chemical structure-aware molecular image representation learning. Briefings in Bioinformatics 24, 6","author":"Xiang Hongxin","year":"2023","unstructured":"Hongxin Xiang, Shuting Jin, Xiangrong Liu, Xiangxiang Zeng, and Li Zeng. 2023. Chemical structure-aware molecular image representation learning. Briefings in Bioinformatics 24, 6 (2023), bbad404."},{"key":"e_1_3_2_2_37_1","volume-title":"Multitask deep networks with grid featurization achieve improved scoring performance for protein--ligand binding. Chemical biology & drug design 96, 3","author":"Xie Liangxu","year":"2020","unstructured":"Liangxu Xie, Lei Xu, Shan Chang, Xiaojun Xu, and Li Meng. 2020. Multitask deep networks with grid featurization achieve improved scoring performance for protein--ligand binding. Chemical biology & drug design 96, 3 (2020), 973--983."},{"key":"e_1_3_2_2_38_1","volume-title":"How powerful are graph neural networks? arXiv preprint arXiv:1810.00826","author":"Xu Keyulu","year":"2018","unstructured":"Keyulu Xu,Weihua Hu, Jure Leskovec, and Stefanie Jegelka. 2018. How powerful are graph neural networks? arXiv preprint arXiv:1810.00826 (2018)."},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i17.29902"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.9b00237"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.9b00237"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac545"},{"key":"e_1_3_2_2_43_1","volume-title":"A survey on multimodal large language models. arXiv preprint arXiv:2306.13549","author":"Yin Shukang","year":"2023","unstructured":"Shukang Yin, Chaoyou Fu, Sirui Zhao, Ke Li, Xing Sun, Tong Xu, and Enhong Chen. 2023. A survey on multimodal large language models. arXiv preprint arXiv:2306.13549 (2023)."},{"key":"e_1_3_2_2_44_1","volume-title":"Do transformers really perform badly for graph representation? Advances in neural information processing systems 34","author":"Ying Chengxuan","year":"2021","unstructured":"Chengxuan Ying, Tianle Cai, Shengjie Luo, Shuxin Zheng, Guolin Ke, Di He, Yanming Shen, and Tie-Yan Liu. 2021. Do transformers really perform badly for graph representation? Advances in neural information processing systems 34 (2021), 28877--28888."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-022-00557-6"},{"key":"e_1_3_2_2_46_1","volume-title":"A deep-learning system bridging molecule structure and biomedical text with comprehension comparable to human professionals. Nature communications 13, 1","author":"Zeng Zheni","year":"2022","unstructured":"Zheni Zeng, Yuan Yao, Zhiyuan Liu, and Maosong Sun. 2022. A deep-learning system bridging molecule structure and biomedical text with comprehension comparable to human professionals. Nature communications 13, 1 (2022), 862."},{"key":"e_1_3_2_2_47_1","volume-title":"Dockylin: A large multimodal model for visual document understanding with efficient visual slimming. arXiv preprint arXiv:2406.19101","author":"Zhang Jiaxin","year":"2024","unstructured":"Jiaxin Zhang, Wentao Yang, Songxuan Lai, Zecheng Xie, and Lianwen Jin. 2024. Dockylin: A large multimodal model for visual document understanding with efficient visual slimming. arXiv preprint arXiv:2406.19101 (2024)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cej.2020.127998"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-020-0142-x"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3754578","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:12:52Z","timestamp":1765339972000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754578"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":49,"alternative-id":["10.1145\/3746027.3754578","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3754578","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}