{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T05:48:22Z","timestamp":1777873702505,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737374","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:04:26Z","timestamp":1754255066000},"page":"5311-5321","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["ChineseEcomQA: A Scalable E-commerce Concept Evaluation Benchmark for Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1039-1119","authenticated-orcid":false,"given":"Haibin","family":"Chen","sequence":"first","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7013-1746","authenticated-orcid":false,"given":"Kangtao","family":"Lv","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5126-9767","authenticated-orcid":false,"given":"Chengwei","family":"Hu","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-3482-8486","authenticated-orcid":false,"given":"Yanshi","family":"Li","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-9459-6488","authenticated-orcid":false,"given":"Yujin","family":"Yuan","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-8098-7178","authenticated-orcid":false,"given":"Yancheng","family":"He","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9546-0901","authenticated-orcid":false,"given":"Xingyao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1995-3381","authenticated-orcid":false,"given":"Langming","family":"Liu","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2976-6256","authenticated-orcid":false,"given":"Shilei","family":"Liu","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3800-7543","authenticated-orcid":false,"given":"Wenbo","family":"Su","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4037-6315","authenticated-orcid":false,"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Taobao &amp; Tmall Group of Alibaba, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Using LLMs for the Extraction and Normalization of Product Attribute Values. In Symposium on Advances in Databases and Information Systems. https:\/\/api.semanticscholar.org\/CorpusID:268248749","author":"Baumann Nick","year":"2024","unstructured":"Nick Baumann, Alexander Brinkmann, and Christian Bizer. 2024. Using LLMs for the Extraction and Normalization of Product Attribute Values. In Symposium on Advances in Databases and Information Systems. https:\/\/api.semanticscholar.org\/CorpusID:268248749"},{"key":"e_1_3_2_2_2_1","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et al. 2024. Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:2401.02954(2024)."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-78090-5_4"},{"key":"e_1_3_2_2_4_1","unstructured":"Abhimanyu Dubey Abhinav Jauhri Abhinav Pandey Abhishek Kadian Ahmad Al-Dahle Aiesha Letman Akhil Mathur Alan Schelten Amy Yang Angela Fan et al. 2024. The llama 3 herd of models. arXiv preprint arXiv:2407.21783(2024)."},{"key":"e_1_3_2_2_5_1","unstructured":"Jiawei Gu Xuhui Jiang Zhichao Shi Hexiang Tan Xuehao Zhai Chengjin Xu Wei Li Yinghan Shen Shengjie Ma Honghao Liu et al. 2024. A Survey on LLM-as-a-Judge. arXiv preprint arXiv:2411.15594(2024)."},{"key":"e_1_3_2_2_6_1","unstructured":"Daya Guo Dejian Yang Haowei Zhang Junxiao Song Ruoyu Zhang Runxin Xu Qihao Zhu Shirong Ma Peiyi Wang Xiao Bi et al. 2025. Deepseek-r1: Incentivizing reasoning capability in llms via reinforcement learning. arXiv preprint arXiv:2501.12948(2025)."},{"key":"e_1_3_2_2_7_1","unstructured":"Yancheng He Shilong Li Jiaheng Liu Yingshui Tan Weixun Wang Hui Huang Xingyuan Bu Hangyu Guo Chengwei Hu Boren Zheng et al. 2024a. Chinese simpleqa: A chinese factuality evaluation for large language models. arXiv preprint arXiv:2411.07140(2024)."},{"key":"e_1_3_2_2_8_1","unstructured":"Yancheng He Shilong Li Jiaheng Liu Yingshui Tan Weixun Wang Hui Huang Xingyuan Bu Hangyu Guo Chengwei Hu Boren Zheng Zhuoran Lin Xuepeng Liu Dekai Sun Shirong Lin Zhicheng Zheng Xiaoyong Zhu Wenbo Su and Bo Zheng. 2024b. Chinese SimpleQA: A Chinese Factuality Evaluation for Large Language Models. arxiv:2411.07140 [cs.CL] https:\/\/arxiv.org\/abs\/2411.07140"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-56060-6_24"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3703155"},{"key":"e_1_3_2_2_11_1","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Jin Wei","year":"2023","unstructured":"Wei Jin, Haitao Mao, Zheng Li, Haoming Jiang, Chen Luo, Hongzhi Wen, Haoyu Han, Hanqing Lu, Zhengyang Wang, Ruirui Li, Zhen Li, Monica Cheng, Rahul Goutam, Haiyang Zhang, Karthik Subbian, Suhang Wang, Yizhou Sun, Jiliang Tang, Bing Yin, and Xianfeng Tang. 2023. Amazon-M2: a multilingual multi-locale shopping session dataset for recommendation and text generation. In Proceedings of the 37th International Conference on Neural Information Processing Systems(New Orleans, LA, USA) (NIPS '23). Curran Associates Inc., Red Hook, NY, USA, Article 351, 21 pages."},{"key":"e_1_3_2_2_12_1","unstructured":"Yilun Jin Zheng Li Chenwei Zhang Tianyu Cao Yifan Gao Pratik Jayarao Mao Li Xin Liu Ritesh Sarkhel Xianfeng Tang et al. 2024a. Shopping mmlu: A massive multi-task online shopping benchmark for large language models. arXiv preprint arXiv:2410.20745(2024)."},{"key":"e_1_3_2_2_13_1","volume-title":"Advances in Neural Information Processing Systems","author":"Jin Yilun","year":"1806","unstructured":"Yilun Jin, Zheng Li, Chenwei Zhang, Tianyu Cao, Yifan Gao, Pratik Jayarao, Mao Li, Xin Liu, Ritesh Sarkhel, Xianfeng Tang, Haodong Wang, Zhengyang Wang, Wenju Xu, Jingfeng Yang, Qingyu Yin, Xian Li, Priyanka Nigam, Yi Xu, Kai Chen, Qiang Yang, Meng Jiang, and Bing Yin. 2024b. Shopping MMLU: A Massive Multi-Task Online Shopping Benchmark for Large Language Models. In Advances in Neural Information Processing Systems, A. Globerson, L. Mackey, D. Belgrave, A. Fan, U. Paquet, J. Tomczak, and C. Zhang(Eds.), Vol. 37. Curran Associates, Inc., 18062-18089. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/file\/2049d75dd13db049897562bcf7d59da8-Paper-Datasets_and_Benchmarks_Track.pdf"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.178"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599519"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","unstructured":"Yangning Li Shirong Ma Xiaobin Wang Shen Huang Chengyue Jiang Hai-Tao Zheng Pengjun Xie Fei Huang and Yong Jiang. 2024. EcomGPT: instruction-tuning large language models with chain-of-task tasks for E-commerce. In Proceedings of the Thirty-Eighth AAAI Conference on Artificial Intelligence and Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence and Fourteenth Symposium on Educational Advances in Artificial Intelligence(AAAI'24\/IAAI'24\/EAAI'24). AAAI Press Article 2072 9 pages. https:\/\/doi.org\/10.1609\/aaai.v38i17.29820","DOI":"10.1609\/aaai.v38i17.29820"},{"key":"e_1_3_2_2_17_1","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437(2024)."},{"key":"e_1_3_2_2_18_1","unstructured":"Zichen Liu Changyu Chen Wenjun Li Tianyu Pang Chao Du and Min Lin. 2025. There May Not be Aha Moment in R1-Zero-like Training - A Pilot Study. https:\/\/oatllm.notion.site\/oat-zero. Notion Blog."},{"key":"e_1_3_2_2_19_1","unstructured":"OpenAI. 2023. GPT-4 technical report. (2023)."},{"key":"e_1_3_2_2_20_1","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Peng Bo","year":"2024","unstructured":"Bo Peng, Xinyi Ling, Ziru Chen, Huan Sun, and Xia Ning. 2024. eCeLLM: generalizing large language models for E-commerce from large-scale, high-quality instruction data. In Proceedings of the 41st International Conference on Machine Learning(Vienna, Austria) (ICML'24). JMLR.org, Article 1632, 43 pages."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"crossref","unstructured":"Mandeep Rathee Sean MacAvaney and Avishek Anand. 2025. Guiding Retrieval using LLM-based Listwise Rankers. arxiv:2501.09186 [cs.IR] https:\/\/arxiv.org\/abs\/2501.09186","DOI":"10.1007\/978-3-031-88708-6_15"},{"key":"e_1_3_2_2_22_1","unstructured":"Chandan K. Reddy Llu\u00eds M\u00e0rquez Fran Valero Nikhil Rao Hugo Zaragoza Sambaran Bandyopadhyay Arnab Biswas Anlu Xing and Karthik Subbian. 2022. Shopping Queries Dataset: A Large-Scale ESCI Benchmark for Improving Product Search. arxiv:2206.06588 [cs.IR] https:\/\/arxiv.org\/abs\/2206.06588"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-024-00976-7"},{"key":"e_1_3_2_2_24_1","volume-title":"Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al.","author":"Team Gemini","year":"2024","unstructured":"Gemini Team, Petko Georgiev, Ving Ian Lei, Ryan Burnell, Libin Bai, Anmol Gulati, Garrett Tanzer, Damien Vincent, Zhufeng Pan, Shibo Wang, et al., 2024. Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context. arXiv preprint arXiv:2403.05530(2024)."},{"key":"e_1_3_2_2_25_1","volume-title":"Yunxin Joy Jiao, Spencer Papay, Amelia Glaese, John Schulman, and William Fedus.","author":"Wei Jason","year":"2024","unstructured":"Jason Wei, Nguyen Karina, Hyung Won Chung, Yunxin Joy Jiao, Spencer Papay, Amelia Glaese, John Schulman, and William Fedus. 2024a. Measuring short-form factuality in large language models. arXiv preprint arXiv:2411.04368(2024)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635853"},{"key":"e_1_3_2_2_27_1","unstructured":"Wei Xu Jue Xiao and Jianlong Chen. 2024. Leveraging Large Language Models to Enhance Personalized Recommendations in E-commerce. arxiv:2410.12829 [cs.IR] https:\/\/arxiv.org\/abs\/2410.12829"},{"key":"e_1_3_2_2_28_1","unstructured":"An Yang Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chengyuan Li Dayiheng Liu Fei Huang Haoran Wei et al. 2024. Qwen2. 5 technical report. arXiv preprint arXiv:2412.15115(2024)."},{"key":"e_1_3_2_2_29_1","volume-title":"COSMO: A large-scale e-commerce common sense knowledge generation and serving system at Amazon.","author":"Yu Changlong","year":"2024","unstructured":"Changlong Yu, Xin Liu, Jefferson Maia, Tianyu Cao, Laurence (Yang) Li, Yifan Gao, Yangqiu Song, Rahul Goutam, Haiyang Zhang, Bing Yin, and Zheng Li. 2024. COSMO: A large-scale e-commerce common sense knowledge generation and serving system at Amazon. (2024). https:\/\/www.amazon.science\/publications\/cosmo-a-large-scale-e-commerce-common-sense-knowledge-generation-and-serving-system-at-amazon"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-industry.40"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737374","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:15:19Z","timestamp":1777572919000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737374"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":30,"alternative-id":["10.1145\/3711896.3737374","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737374","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}