{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,28]],"date-time":"2026-04-28T07:44:43Z","timestamp":1777362283323,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,14]],"date-time":"2021-08-14T00:00:00Z","timestamp":1628899200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF BigData program under CNS-1952192, IIS-1838200"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,14]]},"DOI":"10.1145\/3447548.3467164","type":"proceedings-article","created":{"date-parts":[[2021,8,12]],"date-time":"2021-08-12T06:13:10Z","timestamp":1628748790000},"page":"3262-3270","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["PAM: Understanding Product Images in Cross Product Category Attribute Extraction"],"prefix":"10.1145","author":[{"given":"Rongmei","family":"Lin","sequence":"first","affiliation":[{"name":"Emory University, Atlanta, GA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiang","family":"He","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jie","family":"Feng","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nasser","family":"Zalmout","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Liang","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Li","family":"Xiong","sequence":"additional","affiliation":[{"name":"Emory University, Atlanta, GA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xin Luna","family":"Dong","sequence":"additional","affiliation":[{"name":"Amazon, Seattle, WA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2021,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2014.2339814"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00636"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_2_5_1","volume-title":"International conference on machine learning. PMLR","author":"Chung Junyoung","year":"2015","unstructured":"Junyoung Chung , Caglar Gulcehre , Kyunghyun Cho , and Yoshua Bengio . 2015 . Gated feedback recurrent neural networks . In International conference on machine learning. PMLR , 2067--2075. Junyoung Chung, Caglar Gulcehre, Kyunghyun Cho, and Yoshua Bengio. 2015. Gated feedback recurrent neural networks. In International conference on machine learning. PMLR, 2067--2075."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403323"},{"key":"e_1_3_2_2_7_1","volume-title":"Garnett (Eds.)","volume":"32","author":"Emelianenko Dmitrii","year":"2019","unstructured":"Dmitrii Emelianenko , Elena Voita , and Pavel Serdyukov . 2019 . Sequence Modeling with Unconstrained Generation Order. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc, E. Fox, and R . Garnett (Eds.) , Vol. 32 . Curran Associates, Inc., 7700--7711. https:\/\/proceedings.neurips.cc\/paper\/ 2019\/file\/1558417b096b5d8e7cbe0183ea9cbf26-Paper.pdf Dmitrii Emelianenko, Elena Voita, and Pavel Serdyukov. 2019. Sequence Modeling with Unconstrained Generation Order. In Advances in Neural Information Processing Systems, H. Wallach, H. Larochelle, A. Beygelzimer, F. dtextquotesingle Alch\u00e9-Buc, E. Fox, and R. Garnett (Eds.), Vol. 32. Curran Associates, Inc., 7700--7711. https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/1558417b096b5d8e7cbe0183ea9cbf26-Paper.pdf"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_9_1","volume-title":"The Curious Case of Neural Text Degeneration. In International Conference on Learning Representations.","author":"Holtzman Ari","year":"2019","unstructured":"Ari Holtzman , Jan Buys , Li Du , Maxwell Forbes , and Yejin Choi . 2019 . The Curious Case of Neural Text Degeneration. In International Conference on Learning Representations. Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2019. The Curious Case of Neural Text Degeneration. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01001"},{"key":"e_1_3_2_2_11_1","volume-title":"Bidirectional LSTM-CRF models for sequence tagging. arXiv preprint arXiv:1508.01991","author":"Huang Zhiheng","year":"2015","unstructured":"Zhiheng Huang , Wei Xu , and Kai Yu. 2015. Bidirectional LSTM-CRF models for sequence tagging. arXiv preprint arXiv:1508.01991 ( 2015 ). Zhiheng Huang, Wei Xu, and Kai Yu. 2015. Bidirectional LSTM-CRF models for sequence tagging. arXiv preprint arXiv:1508.01991 (2015)."},{"key":"e_1_3_2_2_12_1","volume-title":"Txtract: Taxonomy-aware knowledge extraction for thousands of product categories. arXiv preprint arXiv:2004.13852","author":"Karamanolakis Giannis","year":"2020","unstructured":"Giannis Karamanolakis , Jun Ma , and Xin Luna Dong . 2020 . Txtract: Taxonomy-aware knowledge extraction for thousands of product categories. arXiv preprint arXiv:2004.13852 (2020). Giannis Karamanolakis, Jun Ma, and Xin Luna Dong. 2020. Txtract: Taxonomy-aware knowledge extraction for thousands of product categories. arXiv preprint arXiv:2004.13852 (2020)."},{"key":"e_1_3_2_2_13_1","volume-title":"Proceedings of the 32nd International Conference on Neural Information Processing Systems. 1571--1581","author":"Kim Jin-Hwa","year":"2018","unstructured":"Jin-Hwa Kim , Jaehyun Jun , and Byoung-Tak Zhang . 2018 . Bilinear attention networks . In Proceedings of the 32nd International Conference on Neural Information Processing Systems. 1571--1581 . Jin-Hwa Kim, Jaehyun Jun, and Byoung-Tak Zhang. 2018. Bilinear attention networks. In Proceedings of the 32nd International Conference on Neural Information Processing Systems. 1571--1581."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David A Shamma etal 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. International journal of computer vision Vol. 123 1 (2017) 32--73.  Ranjay Krishna Yuke Zhu Oliver Groth Justin Johnson Kenji Hata Joshua Kravitz Stephanie Chen Yannis Kalantidis Li-Jia Li David A Shamma et al. 2017. Visual genome: Connecting language and vision using crowdsourced dense image annotations. International journal of computer vision Vol. 123 1 (2017) 32--73.","DOI":"10.1007\/s11263-016-0981-7"},{"key":"e_1_3_2_2_15_1","volume-title":"Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela.","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis , Ethan Perez , Aleksandara Piktus , Fabio Petroni , Vladimir Karpukhin , Naman Goyal , Heinrich K\u00fcttler , Mike Lewis , Wen tau Yih , Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020 . Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks . arxiv: 2005.11401 [cs.CL] Patrick Lewis, Ethan Perez, Aleksandara Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen tau Yih, Tim Rockt\u00e4schel, Sebastian Riedel, and Douwe Kiela. 2020. Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. arxiv: 2005.11401 [cs.CL]"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58621-8_41"},{"key":"e_1_3_2_2_17_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu , Dhruv Batra , Devi Parikh , and Stefan Lee . 2019 . Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265 (2019). Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. arXiv preprint arXiv:1908.02265 (2019)."},{"key":"e_1_3_2_2_18_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"Ren Shaoqing","year":"2016","unstructured":"Shaoqing Ren , Kaiming He , Ross Girshick , and Jian Sun . 2016. Faster r-cnn: Towards real-time object detection with region proposal networks . IEEE transactions on pattern analysis and machine intelligence, Vol. 39 , 6 ( 2016 ), 1137--1149. Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2016. Faster r-cnn: Towards real-time object detection with region proposal networks. IEEE transactions on pattern analysis and machine intelligence, Vol. 39, 6 (2016), 1137--1149."},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"Amanpreet Singh Vivek Natarajan Meet Shah Yu Jiang Xinlei Chen Dhruv Batra Devi Parikh and Marcus Rohrbach. 2019. Towards vqa models that can read. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8317--8326.  Amanpreet Singh Vivek Natarajan Meet Shah Yu Jiang Xinlei Chen Dhruv Batra Devi Parikh and Marcus Rohrbach. 2019. Towards vqa models that can read. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 8317--8326.","DOI":"10.1109\/CVPR.2019.00851"},{"key":"e_1_3_2_2_20_1","volume-title":"Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490","author":"Tan Hao","year":"2019","unstructured":"Hao Tan and Mohit Bansal . 2019 . Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019). Hao Tan and Mohit Bansal. 2019. Lxmert: Learning cross-modality encoder representations from transformers. arXiv preprint arXiv:1908.07490 (2019)."},{"key":"e_1_3_2_2_21_1","volume-title":"Attention is all you need. arXiv preprint arXiv:1706.03762","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani , Noam Shazeer , Niki Parmar , Jakob Uszkoreit , Llion Jones , Aidan N Gomez , Lukasz Kaiser , and Illia Polosukhin . 2017. Attention is all you need. arXiv preprint arXiv:1706.03762 ( 2017 ). Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. arXiv preprint arXiv:1706.03762 (2017)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403047"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1514"},{"key":"e_1_3_2_2_24_1","volume-title":"TAP: Text-Aware Pre-training for Text-VQA and Text-Caption. arxiv","author":"Yang Zhengyuan","year":"2020","unstructured":"Zhengyuan Yang , Yijuan Lu , Jianfeng Wang , Xi Yin , Dinei Florencio , Lijuan Wang , Cha Zhang , Lei Zhang , and Jiebo Luo . 2020 . TAP: Text-Aware Pre-training for Text-VQA and Text-Caption. arxiv : 2012.04638 [cs.CV] Zhengyuan Yang, Yijuan Lu, Jianfeng Wang, Xi Yin, Dinei Florencio, Lijuan Wang, Cha Zhang, Lei Zhang, and Jiebo Luo. 2020. TAP: Text-Aware Pre-training for Text-VQA and Text-Caption. arxiv: 2012.04638 [cs.CV]"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"crossref","unstructured":"Pengchuan Zhang Xiujun Li Xiaowei Hu Jianwei Yang Lei Zhang Lijuan Wang Yejin Choi and Jianfeng Gao. 2021. VinVL: Making Visual Representations Matter in Vision-Language Models. arxiv: 2101.00529 [cs.CV]  Pengchuan Zhang Xiujun Li Xiaowei Hu Jianwei Yang Lei Zhang Lijuan Wang Yejin Choi and Jianfeng Gao. 2021. VinVL: Making Visual Representations Matter in Vision-Language Models. arxiv: 2101.00529 [cs.CV]","DOI":"10.1109\/CVPR46437.2021.00553"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219839"},{"key":"e_1_3_2_2_27_1","volume-title":"Multimodal Joint Attribute Prediction and Value Extraction for E-commerce Product. arXiv preprint arXiv:2009.07162","author":"Zhu Tiangang","year":"2020","unstructured":"Tiangang Zhu , Yue Wang , Haoran Li , Youzheng Wu , Xiaodong He , and Bowen Zhou . 2020. Multimodal Joint Attribute Prediction and Value Extraction for E-commerce Product. arXiv preprint arXiv:2009.07162 ( 2020 ). Tiangang Zhu, Yue Wang, Haoran Li, Youzheng Wu, Xiaodong He, and Bowen Zhou. 2020. Multimodal Joint Attribute Prediction and Value Extraction for E-commerce Product. arXiv preprint arXiv:2009.07162 (2020)."}],"event":{"name":"KDD '21: The 27th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Virtual Event Singapore","acronym":"KDD '21","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 27th ACM SIGKDD Conference on Knowledge Discovery &amp; Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467164","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3447548.3467164","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:18:27Z","timestamp":1750191507000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3447548.3467164"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,14]]},"references-count":27,"alternative-id":["10.1145\/3447548.3467164","10.1145\/3447548"],"URL":"https:\/\/doi.org\/10.1145\/3447548.3467164","relation":{},"subject":[],"published":{"date-parts":[[2021,8,14]]},"assertion":[{"value":"2021-08-14","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}