{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:03:39Z","timestamp":1750309419131,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":48,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"National Research Foundation of Korea (NRF) grant funded by the Korea government (MSIT)","award":["RS-2024-00335098"],"award-info":[{"award-number":["RS-2024-00335098"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100014188","name":"Ministry of Science and ICT, South Korea","doi-asserted-by":"publisher","award":["NRF-2022M3J6A1063021"],"award-info":[{"award-number":["NRF-2022M3J6A1063021"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100014188","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Institute of Information & communications Technology Planning & Evaluation (IITP) grant funded by the Korea government(MSIT)","award":["2022-0-00077"],"award-info":[{"award-number":["2022-0-00077"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679607","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"1153-1162","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Vision Language Model is NOT All You Need: Augmentation Strategies for Molecule Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3995-1148","authenticated-orcid":false,"given":"Namkyeong","family":"Lee","sequence":"first","affiliation":[{"name":"KAIST, Daejeon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1269-3778","authenticated-orcid":false,"given":"Siddhartha","family":"Laghuvarapu","sequence":"additional","affiliation":[{"name":"UIUC, Urbana, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5957-5816","authenticated-orcid":false,"given":"Chanyoung","family":"Park","sequence":"additional","affiliation":[{"name":"KAIST, Daejeon, Republic of Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1512-6426","authenticated-orcid":false,"given":"Jimeng","family":"Sun","sequence":"additional","affiliation":[{"name":"UIUC, Urbana, IL, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al. 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-015-0069-3"},{"key":"e_1_3_2_1_3_1","volume-title":"SciBERT: A pretrained language model for scientific text. arXiv preprint arXiv:1903.10676","author":"Beltagy Iz","year":"2019","unstructured":"Iz Beltagy, Kyle Lo, and Arman Cohan. 2019. SciBERT: A pretrained language model for scientific text. arXiv preprint arXiv:1903.10676 (2019)."},{"key":"e_1_3_2_1_4_1","volume-title":"Unifying molecular and textual representations via multi-task language modelling. arXiv preprint arXiv:2301.12586","author":"Christofidellis Dimitrios","year":"2023","unstructured":"Dimitrios Christofidellis, Giorgio Giannone, Jannis Born, Ole Winther, Teodoro Laino, and Matteo Manica. 2023. Unifying molecular and textual representations via multi-task language modelling. arXiv preprint arXiv:2301.12586 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Comparative toxicogenomics database (CTD): update","author":"Davis Allan Peter","year":"2021","unstructured":"Allan Peter Davis, Cynthia J Grondin, Robin J Johnson, Daniela Sciaky, Jolene Wiegers, Thomas C Wiegers, and Carolyn J Mattingly. 2021. Comparative toxicogenomics database (CTD): update 2021. Nucleic acids research, Vol. 49, D1 (2021), D1138--D1143."},{"key":"e_1_3_2_1_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Translation between molecules and natural language. arXiv preprint arXiv:2204.11817","author":"Edwards Carl","year":"2022","unstructured":"Carl Edwards, Tuan Lai, Kevin Ros, Garrett Honke, Kyunghyun Cho, and Heng Ji. 2022. Translation between molecules and natural language. arXiv preprint arXiv:2204.11817 (2022)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.47"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/S0011-393X(03)00059-6"},{"volume-title":"Drug-induced liver disease","author":"Hoofnagle Jay H","key":"e_1_3_2_1_10_1","unstructured":"Jay H Hoofnagle. 2013. LiverTox: a website on drug-induced liver injury. In Drug-induced liver disease. Elsevier, 725--732."},{"key":"e_1_3_2_1_11_1","volume-title":"Strategies for pre-training graph neural networks. arXiv preprint arXiv:1905.12265","author":"Hu Weihua","year":"2019","unstructured":"Weihua Hu, Bowen Liu, Joseph Gomes, Marinka Zitnik, Percy Liang, Vijay Pande, and Jure Leskovec. 2019. Strategies for pre-training graph neural networks. arXiv preprint arXiv:1905.12265 (2019)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403237"},{"key":"e_1_3_2_1_13_1","volume-title":"Therapeutics data commons: Machine learning datasets and tasks for drug discovery and development. arXiv preprint arXiv:2102.09548","author":"Huang Kexin","year":"2021","unstructured":"Kexin Huang, Tianfan Fu, Wenhao Gao, Yue Zhao, Yusuf Roohani, Jure Leskovec, Connor W Coley, Cao Xiao, Jimeng Sun, and Marinka Zitnik. 2021. Therapeutics data commons: Machine learning datasets and tasks for drug discovery and development. arXiv preprint arXiv:2102.09548 (2021)."},{"key":"e_1_3_2_1_14_1","volume-title":"International conference on machine learning. PMLR, 4904--4916","author":"Jia Chao","year":"2021","unstructured":"Chao Jia, Yinfei Yang, Ye Xia, Yi-Ting Chen, Zarana Parekh, Hieu Pham, Quoc Le, Yun-Hsuan Sung, Zhen Li, and Tom Duerig. 2021. Scaling up visual and vision-language representation learning with noisy text supervision. In International conference on machine learning. PMLR, 4904--4916."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Sunghwan Kim Jie Chen Tiejun Cheng Asta Gindulyte Jia He Siqian He Qingliang Li Benjamin A Shoemaker Paul A Thiessen Bo Yu et al. 2021. PubChem in 2021: new data content and improved web interfaces. Nucleic acids research Vol. 49 D1 (2021) D1388--D1395.","DOI":"10.1093\/nar\/gkaa971"},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Lee Namkyeong","year":"2023","unstructured":"Namkyeong Lee, Dongmin Hyun, Gyoung S Na, Sungwon Kim, Junseok Lee, and Chanyoung Park. 2023. Conditional graph information bottleneck for molecular relational learning. In International Conference on Machine Learning. PMLR, 18852--18871."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i7.20700"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2022.08.063"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599437"},{"key":"e_1_3_2_1_20_1","volume-title":"Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. arXiv preprint arXiv:2301.12597 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00759-6"},{"key":"e_1_3_2_1_22_1","volume-title":"Pre-training molecular graph representation with 3d geometry. arXiv preprint arXiv:2110.07728","author":"Liu Shengchao","year":"2021","unstructured":"Shengchao Liu, Hanchen Wang, Weiyang Liu, Joan Lasenby, Hongyu Guo, and Jian Tang. 2021. Pre-training molecular graph representation with 3d geometry. arXiv preprint arXiv:2110.07728 (2021)."},{"key":"e_1_3_2_1_23_1","volume-title":"Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. arXiv preprint arXiv:2310.12798","author":"Liu Zhiyuan","year":"2023","unstructured":"Zhiyuan Liu, Sihang Li, Yanchen Luo, Hao Fei, Yixin Cao, Kenji Kawaguchi, Xiang Wang, and Tat-Seng Chua. 2023. Molca: Molecular graph-language modeling with cross-modal projector and uni-modal adapter. arXiv preprint arXiv:2310.12798 (2023)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1021\/jm020155c"},{"key":"e_1_3_2_1_25_1","volume-title":"Memes: Machine learning framework for enhanced molecular screening. Chemical science","author":"Mehta Sarvesh","year":"2021","unstructured":"Sarvesh Mehta, Siddhartha Laghuvarapu, Yashaswi Pathak, Aaftaab Sethi, Mallika Alvala, and U Deva Priyakumar. 2021. Memes: Machine learning framework for enhanced molecular screening. Chemical science, Vol. 12, 35 (2021), 11710--11721."},{"key":"e_1_3_2_1_26_1","volume-title":"Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748","author":"van den Oord Aaron","year":"2018","unstructured":"Aaron van den Oord, Yazhe Li, and Oriol Vinyals. 2018. Representation learning with contrastive predictive coding. arXiv preprint arXiv:1807.03748 (2018)."},{"key":"e_1_3_2_1_27_1","volume-title":"BioT5: Enriching Cross-modal Integration in Biology with Chemical Knowledge and Natural Language Associations. arXiv preprint arXiv:2310.07276","author":"Pei Qizhi","year":"2023","unstructured":"Qizhi Pei, Wei Zhang, Jinhua Zhu, Kehan Wu, Kaiyuan Gao, Lijun Wu, Yingce Xia, and Rui Yan. 2023. BioT5: Enriching Cross-modal Integration in Biology with Chemical Knowledge and Natural Language Associations. arXiv preprint arXiv:2310.07276 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1021\/ci100050t"},{"key":"e_1_3_2_1_31_1","first-page":"12559","article-title":"Self-supervised graph transformer on large-scale molecular data","volume":"33","author":"Rong Yu","year":"2020","unstructured":"Yu Rong, Yatao Bian, Tingyang Xu, Weiyang Xie, Ying Wei, Wenbing Huang, and Junzhou Huang. 2020. Self-supervised graph transformer on large-scale molecular data. Advances in Neural Information Processing Systems, Vol. 33 (2020), 12559--12571.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_32_1","volume-title":"Enhancing activity prediction models in drug discovery with the ability to understand human language. arXiv preprint arXiv:2303.03363","author":"Seidl Philipp","year":"2023","unstructured":"Philipp Seidl, Andreu Vall, Sepp Hochreiter, and G\u00fcnter Klambauer. 2023. Enhancing activity prediction models in drug discovery with the ability to understand human language. arXiv preprint arXiv:2303.03363 (2023)."},{"key":"e_1_3_2_1_33_1","volume-title":"A molecular multimodal foundation model associating molecule graphs with natural language. arXiv preprint arXiv:2209.05481","author":"Su Bing","year":"2022","unstructured":"Bing Su, Dazhao Du, Zhao Yang, Yujie Zhou, Jiangmeng Li, Anyi Rao, Hao Sun, Zhiwu Lu, and Ji-Rong Wen. 2022. A molecular multimodal foundation model associating molecule graphs with natural language. arXiv preprint arXiv:2209.05481 (2022)."},{"key":"e_1_3_2_1_34_1","volume-title":"Infograph: Unsupervised and semi-supervised graph-level representation learning via mutual information maximization. arXiv preprint arXiv:1908.01000","author":"Sun Fan-Yun","year":"2019","unstructured":"Fan-Yun Sun, Jordan Hoffmann, Vikas Verma, and Jian Tang. 2019. Infograph: Unsupervised and semi-supervised graph-level representation learning via mutual information maximization. arXiv preprint arXiv:1908.01000 (2019)."},{"key":"e_1_3_2_1_35_1","volume-title":"PharmGKB: the pharmacogenomics knowledge base","author":"Thorn Caroline F","year":"2013","unstructured":"Caroline F Thorn, Teri E Klein, and Russ B Altman. 2013. PharmGKB: the pharmacogenomics knowledge base. Pharmacogenomics: Methods and Protocols (2013), 311--320."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.0c00155"},{"key":"e_1_3_2_1_37_1","volume-title":"David Junhao Zhang, Stan Weixian Lei, and Mike Zheng Shou.","author":"Wang Alex Jinpeng","year":"2023","unstructured":"Alex Jinpeng Wang, Kevin Qinghong Lin, David Junhao Zhang, Stan Weixian Lei, and Mike Zheng Shou. 2023. Too Large; Data Reduction for Vision-Language Pre-Training. arXiv preprint arXiv:2305.20087 (2023)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00252"},{"key":"e_1_3_2_1_39_1","volume-title":"Andreea Deac, et al.","author":"Wang Hanchen","year":"2023","unstructured":"Hanchen Wang, Tianfan Fu, Yuanqi Du, Wenhao Gao, Kexin Huang, Ziming Liu, Payal Chandak, Shengchao Liu, Peter Van Katwyk, Andreea Deac, et al. 2023. Scientific discovery in the age of artificial intelligence. Nature, Vol. 620, 7972 (2023), 47--60."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1021\/ci00057a005"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"David S Wishart Yannick D Feunang An C Guo Elvis J Lo Ana Marcu Jason R Grant Tanvir Sajed Daniel Johnson Carin Li Zinat Sayeeda et al. 2018. DrugBank 5.0: a major update to the DrugBank database for 2018. Nucleic acids research Vol. 46 D1 (2018) D1074--D1082.","DOI":"10.1093\/nar\/gkx1037"},{"key":"e_1_3_2_1_42_1","volume-title":"MoleculeNet: a benchmark for molecular machine learning. Chemical science","author":"Wu Zhenqin","year":"2018","unstructured":"Zhenqin Wu, Bharath Ramsundar, Evan N Feinberg, Joseph Gomes, Caleb Geniesse, Aneesh S Pappu, Karl Leswing, and Vijay Pande. 2018. MoleculeNet: a benchmark for molecular machine learning. Chemical science, Vol. 9, 2 (2018), 513--530."},{"key":"e_1_3_2_1_43_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Xia Jun","year":"2022","unstructured":"Jun Xia, Chengshuai Zhao, Bozhen Hu, Zhangyang Gao, Cheng Tan, Yue Liu, Siyuan Li, and Stan Z Li. 2022. Mole-bert: Rethinking pre-training graph neural networks for molecules. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_44_1","volume-title":"How powerful are graph neural networks? arXiv preprint arXiv:1810.00826","author":"Xu Keyulu","year":"2018","unstructured":"Keyulu Xu, Weihua Hu, Jure Leskovec, and Stefanie Jegelka. 2018. How powerful are graph neural networks? arXiv preprint arXiv:1810.00826 (2018)."},{"key":"e_1_3_2_1_45_1","volume-title":"Coca: Contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917","author":"Yu Jiahui","year":"2022","unstructured":"Jiahui Yu, Zirui Wang, Vijay Vasudevan, Legg Yeung, Mojtaba Seyedhosseini, and Yonghui Wu. 2022. Coca: Contrastive captioners are image-text foundation models. arXiv preprint arXiv:2205.01917 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"A deep-learning system bridging molecule structure and biomedical text with comprehension comparable to human professionals. Nature communications","author":"Zeng Zheni","year":"2022","unstructured":"Zheni Zeng, Yuan Yao, Zhiyuan Liu, and Maosong Sun. 2022. A deep-learning system bridging molecule structure and biomedical text with comprehension comparable to human professionals. Nature communications, Vol. 13, 1 (2022), 862."},{"key":"e_1_3_2_1_47_1","unstructured":"Xuan Zhang Limei Wang Jacob Helwig Youzhi Luo Cong Fu Yaochen Xie Meng Liu Yuchao Lin Zhao Xu Keqiang Yan et al. 2023. Artificial Intelligence for Science in Quantum Atomistic and Continuum Systems. arXiv preprint arXiv:2307.08423 (2023)."},{"key":"e_1_3_2_1_48_1","volume-title":"GIMLET: A Unified Graph-Text Model for Instruction-Based Molecule Zero-Shot Learning. bioRxiv","author":"Zhao Haiteng","year":"2023","unstructured":"Haiteng Zhao, Shengchao Liu, Chang Ma, Hannan Xu, Jie Fu, Zhi-Hong Deng, Lingpeng Kong, and Qi Liu. 2023. GIMLET: A Unified Graph-Text Model for Instruction-Based Molecule Zero-Shot Learning. bioRxiv (2023), 2023--05."}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Boise ID USA","acronym":"CIKM '24"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679607","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679607","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:23Z","timestamp":1750294703000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679607"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":48,"alternative-id":["10.1145\/3627673.3679607","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679607","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}