{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:58Z","timestamp":1750309558950,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"eBay Inc."}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714714","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T22:52:18Z","timestamp":1745362338000},"page":"4853-4862","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Hypergraph-based Zero-shot Multi-modal Product Attribute Value Extraction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-6950-1910","authenticated-orcid":false,"given":"Jiazhen","family":"Hu","sequence":"first","affiliation":[{"name":"Virginia Tech, Blacksburg, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8945-6909","authenticated-orcid":false,"given":"Jiaying","family":"Gong","sequence":"additional","affiliation":[{"name":"eBay Inc., New York City, New York, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5002-272X","authenticated-orcid":false,"given":"Hongda","family":"Shen","sequence":"additional","affiliation":[{"name":"eBay Inc., New York City, New York, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9712-6667","authenticated-orcid":false,"given":"Hoda","family":"Eldardiry","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, Virginia, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track. 575--585","author":"Blume Ansel","year":"2023","unstructured":"Ansel Blume, Nasser Zalmout, Heng Ji, and Xian Li. 2023. Generative models for product attribute extraction. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: Industry Track. 575--585."},{"key":"e_1_3_2_1_3_1","volume-title":"SynerGraph: An Integrated Graph Convolution Network for Multimodal Recommendation. arXiv preprint arXiv:2405.19031","author":"Burabak Mert","year":"2024","unstructured":"Mert Burabak and Tevfik Aytekin. 2024. SynerGraph: An Integrated Graph Convolution Network for Multimodal Recommendation. arXiv preprint arXiv:2405.19031 (2024)."},{"key":"e_1_3_2_1_4_1","volume-title":"Zero-Shot Relational Learning for Multimodal Knowledge Graphs. arXiv preprint arXiv:2404.06220","author":"Cai Rui","year":"2024","unstructured":"Rui Cai, Shichao Pei, and Xiangliang Zhang. 2024. Zero-Shot Relational Learning for Multimodal Knowledge Graphs. arXiv preprint arXiv:2404.06220 (2024)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1488"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_7_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 24185--24198","author":"Chen Zhe","year":"2024","unstructured":"Zhe Chen, Jiannan Wu, Wenhai Wang, Weijie Su, Guo Chen, Sen Xing, Muyan Zhong, Qinglong Zhang, Xizhou Zhu, Lewei Lu, et al. 2024. Internvl: Scaling up vision foundation models and aligning for generic visual-linguistic tasks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 24185--24198."},{"key":"e_1_3_2_1_8_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al. 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys. org (accessed 14 April 2023), Vol. 2, 3 (2023), 6."},{"key":"e_1_3_2_1_9_1","first-page":"1","article-title":"Scaling instruction-finetuned language models","volume":"25","author":"Chung Hyung Won","year":"2024","unstructured":"Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Yunxuan Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, et al. 2024. Scaling instruction-finetuned language models. Journal of Machine Learning Research, Vol. 25, 70 (2024), 1--53.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_10_1","volume-title":"Multi-modal attribute extraction for e-commerce. arXiv preprint arXiv:2203.03441","author":"la Comble Alo\u00efs De","year":"2022","unstructured":"Alo\u00efs De la Comble, Anuvabh Dutt, Pablo Montalvo, and Aghiles Salah. 2022. Multi-modal attribute extraction for e-commerce. arXiv preprint arXiv:2203.03441 (2022)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020304"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData55660.2022.10020304"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-023-00624-6"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3661357"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615142"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645649"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3422622"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28688"},{"key":"e_1_3_2_1_19_1","volume-title":"Inductive representation learning on large graphs. Advances in neural information processing systems","author":"Hamilton Will","year":"2017","unstructured":"Will Hamilton, Zhitao Ying, and Jure Leskovec. 2017. Inductive representation learning on large graphs. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00240"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3253760"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-023-46625-8"},{"key":"e_1_3_2_1_23_1","volume-title":"Jean-Fran\u00e7ois Pessiot, Johanes Effendi, Justin Chiu, et al.","author":"Levine Aaron","year":"2024","unstructured":"Aaron Levine, Connie Huang, Chenguang Wang, Eduardo Batista, Ewa Szymanska, Hongyi Ding, Hou Wei Chou, Jean-Fran\u00e7ois Pessiot, Johanes Effendi, Justin Chiu, et al. 2024. Rakutenai-7b: Extending large language models for japanese. arXiv e-prints (2024), arXiv--2403."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.996"},{"key":"e_1_3_2_1_25_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023a. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467164"},{"key":"e_1_3_2_1_27_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3251108"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3163667"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00294"},{"key":"e_1_3_2_1_31_1","volume-title":"Learning on Multimodal Graphs: A Survey. arXiv preprint arXiv:2402.05322","author":"Peng Ciyuan","year":"2024","unstructured":"Ciyuan Peng, Jiayuan He, and Feng Xia. 2024. Learning on Multimodal Graphs: A Survey. arXiv preprint arXiv:2402.05322 (2024)."},{"key":"e_1_3_2_1_32_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2023.122850"},{"key":"e_1_3_2_1_34_1","volume-title":"Exploring Large Language Models for Product Attribute Value Identification. arXiv preprint arXiv:2409.12695","author":"Sabeh Kassem","year":"2024","unstructured":"Kassem Sabeh, Mouna Kacimi, Johann Gamper, Robert Litschko, and Barbara Plank. 2024. Exploring Large Language Models for Product Attribute Value Identification. arXiv preprint arXiv:2409.12695 (2024)."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i8.28744"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.413"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3399746"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403047"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.20"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612266"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3372278.3390713"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599486"},{"key":"e_1_3_2_1_43_1","volume-title":"A depression detection model based on multimodal graph neural network. Multimedia Tools and Applications","author":"Xia Yujing","year":"2024","unstructured":"Yujing Xia, Lin Liu, Tao Dong, Juan Chen, Yu Cheng, and Lin Tang. 2024. A depression detection model based on multimodal graph neural network. Multimedia Tools and Applications (2024), 1--17."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1514"},{"key":"e_1_3_2_1_45_1","volume-title":"Adatag: Multi-attribute value extraction from product profiles with adaptive decoding. arXiv preprint arXiv:2106.02318","author":"Yan Jun","year":"2021","unstructured":"Jun Yan, Nasser Zalmout, Yan Liang, Christan Grant, Xiang Ren, and Xin Luna Dong. 2021. Adatag: Multi-attribute value extraction from product profiles with adaptive decoding. arXiv preprint arXiv:2106.02318 (2021)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.633"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498377"},{"key":"e_1_3_2_1_48_1","volume-title":"Multimodal Fusion via Hypergraph Autoencoder and Contrastive Learning for Emotion Recognition in Conversation. arXiv preprint arXiv:2408.00970","author":"Yi Zijian","year":"2024","unstructured":"Zijian Yi, Ziming Zhao, Zhishu Shen, and Tiehua Zhang. 2024. Multimodal Fusion via Hypergraph Autoencoder and Contrastive Learning for Emotion Recognition in Conversation. arXiv preprint arXiv:2408.00970 (2024)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25445"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01100"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-acl.831"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3214102"},{"key":"e_1_3_2_1_53_1","volume-title":"DreamLIP: Language-Image Pre-training with Long Captions. arXiv preprint arXiv:2403.17007","author":"Zheng Kecheng","year":"2024","unstructured":"Kecheng Zheng, Yifei Zhang, Wei Wu, Fan Lu, Shuailei Ma, Xin Jin, Wei Chen, and Yujun Shen. 2024. DreamLIP: Language-Image Pre-training with Long Captions. arXiv preprint arXiv:2403.17007 (2024)."},{"key":"e_1_3_2_1_54_1","volume-title":"Multimodal Graph Benchmark. arXiv preprint arXiv:2406.16321","author":"Zhu Jing","year":"2024","unstructured":"Jing Zhu, Yuhang Zhou, Shengyi Qian, Zhongmou He, Tong Zhao, Neil Shah, and Danai Koutra. 2024. Multimodal Graph Benchmark. arXiv preprint arXiv:2406.16321 (2024)."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714714","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714714","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:57Z","timestamp":1750295937000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714714"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":56,"alternative-id":["10.1145\/3696410.3714714","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714714","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}