{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,3]],"date-time":"2025-12-03T17:52:25Z","timestamp":1764784345491,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,1,20]],"date-time":"2020-01-20T00:00:00Z","timestamp":1579478400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key R\\&D Program of China","award":["2018YFB2101100, 2018YFB2101101"],"award-info":[{"award-number":["2018YFB2101100, 2018YFB2101101"]}]},{"DOI":"10.13039\/501100012659","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61972111,61602132,U1836107"],"award-info":[{"award-number":["61972111,61602132,U1836107"]}],"id":[{"id":"10.13039\/501100012659","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2020,1,20]]},"DOI":"10.1145\/3336191.3371858","type":"proceedings-article","created":{"date-parts":[[2020,1,22]],"date-time":"2020-01-22T19:08:16Z","timestamp":1579720096000},"page":"384-392","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":31,"title":["End-to-End Deep Reinforcement Learning based Recommendation with Supervised Embedding"],"prefix":"10.1145","author":[{"given":"Feng","family":"Liu","sequence":"first","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, China"}]},{"given":"Huifeng","family":"Guo","sequence":"additional","affiliation":[{"name":"Noah's Ark Lab, Huawei, Shenzhen, China"}]},{"given":"Xutao","family":"Li","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, China"}]},{"given":"Ruiming","family":"Tang","sequence":"additional","affiliation":[{"name":"Noah's Ark Lab, Huawei, Shenzhen, China"}]},{"given":"Yunming","family":"Ye","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Shenzhen, China"}]},{"given":"Xiuqiang","family":"He","sequence":"additional","affiliation":[{"name":"Noah's Ark Lab, Huawei, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2020,1,22]]},"reference":[{"volume-title":"SIGIR","author":"Pablo Castells Roc'io Ca","key":"e_1_3_2_1_1_1"},{"key":"e_1_3_2_1_2_1","unstructured":"Olivier Chapelle and Lihong Li. 2011. An Empirical Evaluation of Thompson Sampling. In NeurIPS Granada Spain. 2249--2257. Olivier Chapelle and Lihong Li. 2011. An Empirical Evaluation of Thompson Sampling. In NeurIPS Granada Spain. 2249--2257."},{"key":"e_1_3_2_1_3_1","unstructured":"Haokun Chen Xinyi Dai Han Cai Weinan Zhang Xuejian Wang Ruiming Tang Yuzhou Zhang and Yong Yu. 2018. Large-scale Interactive Recommendation with Tree-structured Policy Gradient. arXiv preprint arXiv:1811.05869 (2018). Haokun Chen Xinyi Dai Han Cai Weinan Zhang Xuejian Wang Ruiming Tang Yuzhou Zhang and Yong Yu. 2018. Large-scale Interactive Recommendation with Tree-structured Policy Gradient. arXiv preprint arXiv:1811.05869 (2018)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Minmin Chen Alex Beutel Paul Covington Sagar Jain Francois Belletti and Ed H Chi. 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System. In In WSDM. ACM 456--464. Minmin Chen Alex Beutel Paul Covington Sagar Jain Francois Belletti and Ed H Chi. 2019. Top-K Off-Policy Correction for a REINFORCE Recommender System. In In WSDM. ACM 456--464.","DOI":"10.1145\/3289600.3290999"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Heng-Tze Cheng Levent Koc Jeremiah Harmsen Tal Shaked Tushar Chandra Hrishi Aradhye Glen Anderson Greg Corrado Wei Chai Mustafa Ispir Rohan Anil Zakaria Haque Lichan Hong Vihan Jain Xiaobing Liu and Hemal Shah. 2016. Wide & Deep Learning for Recommender Systems. CoRR Vol. abs\/1606.07792 (2016). Heng-Tze Cheng Levent Koc Jeremiah Harmsen Tal Shaked Tushar Chandra Hrishi Aradhye Glen Anderson Greg Corrado Wei Chai Mustafa Ispir Rohan Anil Zakaria Haque Lichan Hong Vihan Jain Xiaobing Liu and Hemal Shah. 2016. Wide & Deep Learning for Recommender Systems. CoRR Vol. abs\/1606.07792 (2016).","DOI":"10.1145\/2988450.2988454"},{"volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","year":"2018","author":"Devlin Jacob","key":"e_1_3_2_1_6_1"},{"key":"e_1_3_2_1_7_1","unstructured":"Gabriel Dulac-Arnold Richard Evans Peter Sunehag and Ben Coppin. 2015. Reinforcement Learning in Large Discrete Action Spaces. CoRR Vol. abs\/1512.07679 (2015). Gabriel Dulac-Arnold Richard Evans Peter Sunehag and Ben Coppin. 2015. Reinforcement Learning in Large Discrete Action Spaces. CoRR Vol. abs\/1512.07679 (2015)."},{"key":"e_1_3_2_1_8_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017. DeepFM: A Factorization-Machine based Neural Network for CTR Prediction. In In IJCAI. 1725--1731. Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017. DeepFM: A Factorization-Machine based Neural Network for CTR Prediction. In In IJCAI. 1725--1731."},{"key":"e_1_3_2_1_9_1","unstructured":"Kaiming He Ross Girshick and Piotr Doll\u00e1r. 2018. Rethinking imagenet pre-training. arXiv preprint arXiv:1811.08883 (2018). Kaiming He Ross Girshick and Piotr Doll\u00e1r. 2018. Rethinking imagenet pre-training. arXiv preprint arXiv:1811.08883 (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Junqi Jin Chengru Song Han Li Kun Gai Jun Wang and Weinan Zhang. 2018. Real-Time Bidding with Multi-Agent Reinforcement Learning in Display Advertising. CoRR Vol. abs\/1802.09756 (2018). Junqi Jin Chengru Song Han Li Kun Gai Jun Wang and Weinan Zhang. 2018. Real-Time Bidding with Multi-Agent Reinforcement Learning in Display Advertising. CoRR Vol. abs\/1802.09756 (2018).","DOI":"10.1145\/3269206.3272021"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Yu-Chin Juan Yong Zhuang Wei-Sheng Chin and Chih-Jen Lin. 2016. Field-aware Factorization Machines for CTR Prediction. In In RecSys . 43--50. Yu-Chin Juan Yong Zhuang Wei-Sheng Chin and Chih-Jen Lin. 2016. Field-aware Factorization Machines for CTR Prediction. In In RecSys . 43--50.","DOI":"10.1145\/2959100.2959134"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460655"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Yehuda Koren. 2008. Factorization meets the neighborhood: a multifaceted collaborative filtering model. In In SIGKDD. ACM 426--434. Yehuda Koren. 2008. Factorization meets the neighborhood: a multifaceted collaborative filtering model. In In SIGKDD. ACM 426--434.","DOI":"10.1145\/1401890.1401944"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Lihong Li Wei Chu John Langford and Robert E Schapire. 2010. A contextual-bandit approach to personalized news article recommendation. In In WWW. ACM 661--670. Lihong Li Wei Chu John Langford and Robert E Schapire. 2010. A contextual-bandit approach to personalized news article recommendation. In In WWW. ACM 661--670.","DOI":"10.1145\/1772690.1772758"},{"key":"e_1_3_2_1_16_1","unstructured":"Xutao Li Gao Cong Xiaoli Li Tuan-Anh Nguyen Pham and Shonali Krishnaswamy. 2015. Rank-GeoFM: A Ranking based Geographical Factorization Method for Point of Interest Recommendation. In In SIGIR. ACM 433--442. Xutao Li Gao Cong Xiaoli Li Tuan-Anh Nguyen Pham and Shonali Krishnaswamy. 2015. Rank-GeoFM: A Ranking based Geographical Factorization Method for Point of Interest Recommendation. In In SIGIR. ACM 433--442."},{"key":"e_1_3_2_1_17_1","unstructured":"Timothy P. Lillicrap Jonathan J. Hunt Alexander Pritzel Nicolas Heess Tom Erez Yuval Tassa David Silver and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. CoRR Vol. abs\/1509.02971 (2015). Timothy P. Lillicrap Jonathan J. Hunt Alexander Pritzel Nicolas Heess Tom Erez Yuval Tassa David Silver and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. CoRR Vol. abs\/1509.02971 (2015)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313497"},{"key":"e_1_3_2_1_19_1","unstructured":"Feng Liu Ruiming Tang Xutao Li Weinan Zhang Yunming Ye Haokun Chen Huifeng Guo and Yuzhou Zhang. 2018. Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling. arXiv preprint arXiv:1810.12027 (2018). Feng Liu Ruiming Tang Xutao Li Weinan Zhang Yunming Ye Haokun Chen Huifeng Guo and Yuzhou Zhang. 2018. Deep Reinforcement Learning based Recommendation with Explicit User-Item Interactions Modeling. arXiv preprint arXiv:1810.12027 (2018)."},{"key":"e_1_3_2_1_20_1","unstructured":"Odalric-Ambrym Maillard Daniil Ryabko and R\u00e9mi Munos. 2011. Selecting the state-representation in reinforcement learning. In NeurIPS . 2627--2635. Odalric-Ambrym Maillard Daniil Ryabko and R\u00e9mi Munos. 2011. Selecting the state-representation in reinforcement learning. In NeurIPS . 2627--2635."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"H Brendan McMahan Gary Holt David Sculley Michael Young Dietmar Ebner Julian Grady Lan Nie Todd Phillips Eugene Davydov Daniel Golovin et almbox. 2013. Ad click prediction: a view from the trenches. In SIGKDD. ACM 1222--1230. H Brendan McMahan Gary Holt David Sculley Michael Young Dietmar Ebner Julian Grady Lan Nie Todd Phillips Eugene Davydov Daniel Golovin et almbox. 2013. Ad click prediction: a view from the trenches. In SIGKDD. ACM 1222--1230.","DOI":"10.1145\/2487575.2488200"},{"key":"e_1_3_2_1_22_1","unstructured":"Tomas Mikolov Ilya Sutskever Kai Chen Greg S Corrado and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. In NeurIPS . 3111--3119. Tomas Mikolov Ilya Sutskever Kai Chen Greg S Corrado and Jeff Dean. 2013. Distributed representations of words and phrases and their compositionality. In NeurIPS . 3111--3119."},{"key":"e_1_3_2_1_23_1","unstructured":"Andriy Mnih and Ruslan R Salakhutdinov. 2008. Probabilistic matrix factorization. In NeurIPS. 1257--1264. Andriy Mnih and Ruslan R Salakhutdinov. 2008. Probabilistic matrix factorization. In NeurIPS. 1257--1264."},{"key":"e_1_3_2_1_24_1","volume-title":"Nature","volume":"518","author":"Mnih Volodymyr","year":"2015"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Raymond J. Mooney and Loriene Roy. 2000. Content-based book recommending using learning for text categorization. In ACM DL. 195--204. Raymond J. Mooney and Loriene Roy. 2000. Content-based book recommending using learning for text categorization. In ACM DL. 195--204.","DOI":"10.1145\/336597.336662"},{"volume-title":"Product-Based Neural Networks for User Response Prediction. In ICDM 2016","year":"2016","author":"Qu Yanru","key":"e_1_3_2_1_26_1"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3233770"},{"volume-title":"ICDM","year":"2010","author":"Rendle Steffen","key":"e_1_3_2_1_28_1"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1093\/beheco\/ark016"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"David Silver Aja Huang Chris J. Maddison Arthur Guez Laurent Sifre George van den Driessche Julian Schrittwieser Ioannis Antonoglou Vedavyas Panneershelvam Marc Lanctot Sander Dieleman Dominik Grewe John Nham Nal Kalchbrenner Ilya Sutskever Timothy P. Lillicrap Madeleine Leach Koray Kavukcuoglu Thore Graepel and Demis Hassabis. 2016. Mastering the game of Go with deep neural networks and tree search. Nature Vol. 529 7587 (2016) 484--489. David Silver Aja Huang Chris J. Maddison Arthur Guez Laurent Sifre George van den Driessche Julian Schrittwieser Ioannis Antonoglou Vedavyas Panneershelvam Marc Lanctot Sander Dieleman Dominik Grewe John Nham Nal Kalchbrenner Ilya Sutskever Timothy P. Lillicrap Madeleine Leach Koray Kavukcuoglu Thore Graepel and Demis Hassabis. 2016. Mastering the game of Go with deep neural networks and tree search. Nature Vol. 529 7587 (2016) 484--489.","DOI":"10.1038\/nature16961"},{"volume-title":"Deterministic Policy Gradient Algorithms. In ICML 2014","year":"2014","author":"Silver David","key":"e_1_3_2_1_31_1"},{"key":"e_1_3_2_1_32_1","unstructured":"Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction . Vol. 1. MIT press Cambridge. Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction . Vol. 1. MIT press Cambridge."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Huazheng Wang Qingyun Wu and Hongning Wang. 2016. Learning Hidden Features for Contextual Bandits. In In CIKM. ACM 1633--1642. Huazheng Wang Qingyun Wu and Hongning Wang. 2016. Learning Hidden Features for Contextual Bandits. In In CIKM. ACM 1633--1642.","DOI":"10.1145\/2983323.2983847"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Jun Wang Arjen P De Vries and Marcel JT Reinders. 2006. Unifying user-based and item-based collaborative filtering approaches by similarity fusion. In In SIGIR. ACM 501--508. Jun Wang Arjen P De Vries and Marcel JT Reinders. 2006. Unifying user-based and item-based collaborative filtering approaches by similarity fusion. In In SIGIR. ACM 501--508.","DOI":"10.1145\/1148170.1148257"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Rex Ying Ruining He Kaifeng Chen Pong Eksombatchai William L Hamilton and Jure Leskovec. 2018. Graph convolutional neural networks for web-scale recommender systems. In In SIGKDD. ACM 974--983. Rex Ying Ruining He Kaifeng Chen Pong Eksombatchai William L Hamilton and Jure Leskovec. 2018. Graph convolutional neural networks for web-scale recommender systems. In In SIGKDD. ACM 974--983.","DOI":"10.1145\/3219819.3219890"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Chunqiu Zeng Qing Wang Shekoofeh Mokhtari and Tao Li. 2016. Online Context-Aware Recommendation with Time Varying Multi-Armed Bandit. In In SIGKDD. 2025--2034. Chunqiu Zeng Qing Wang Shekoofeh Mokhtari and Tao Li. 2016. Online Context-Aware Recommendation with Time Varying Multi-Armed Bandit. In In SIGKDD. 2025--2034.","DOI":"10.1145\/2939672.2939878"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Weinan Zhang Ulrich Paquet and Katja Hofmann. 2016. Collective Noise Contrastive Estimation for Policy Transfe Learning. In AAAI. AAAI Press 1408--1414. Weinan Zhang Ulrich Paquet and Katja Hofmann. 2016. Collective Noise Contrastive Estimation for Policy Transfe Learning. In AAAI. AAAI Press 1408--1414.","DOI":"10.1609\/aaai.v30i1.10153"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Xiangyu Zhao Long Xia Liang Zhang Zhuoye Ding Dawei Yin and Jiliang Tang. 2018a. Deep reinforcement learning for page-wise recommendations. In RecSys. ACM 95--103. Xiangyu Zhao Long Xia Liang Zhang Zhuoye Ding Dawei Yin and Jiliang Tang. 2018a. Deep reinforcement learning for page-wise recommendations. In RecSys. ACM 95--103.","DOI":"10.1145\/3240323.3240374"},{"key":"e_1_3_2_1_39_1","unstructured":"Xiangyu Zhao Long Xia Yihong Zhao Dawei Yin and jiliang Tang. 2019. Model-Based Reinforcement Learning for Whole-Chain Recommendations. arXiv preprint arXiv:1902.03987 (2019). Xiangyu Zhao Long Xia Yihong Zhao Dawei Yin and jiliang Tang. 2019. Model-Based Reinforcement Learning for Whole-Chain Recommendations. arXiv preprint arXiv:1902.03987 (2019)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219886"},{"key":"e_1_3_2_1_41_1","unstructured":"Xiangyu Zhao Liang Zhang Zhuoye Ding Dawei Yin Yihong Zhao and Jiliang Tang. 2017. Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209 (2017). Xiangyu Zhao Liang Zhang Zhuoye Ding Dawei Yin Yihong Zhao and Jiliang Tang. 2017. Deep reinforcement learning for list-wise recommendations. arXiv preprint arXiv:1801.00209 (2017)."},{"volume-title":"CIKM","year":"2013","author":"Zhao Xiaoxue","key":"e_1_3_2_1_42_1"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3185994"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"Guorui Zhou Xiaoqiang Zhu Chenru Song Ying Fan Han Zhu Xiao Ma Yanghui Yan Junqi Jin Han Li and Kun Gai. 2018. Deep interest network for click-through rate prediction. In In SIGKDD. ACM 1059--1068. Guorui Zhou Xiaoqiang Zhu Chenru Song Ying Fan Han Zhu Xiao Ma Yanghui Yan Junqi Jin Han Li and Kun Gai. 2018. Deep interest network for click-through rate prediction. In In SIGKDD. ACM 1059--1068.","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_1_45_1","unstructured":"Barret Zoph and Quoc V. Le. 2016. Neural Architecture Search with Reinforcement Learning. CoRR Vol. abs\/1611.01578 (2016). Barret Zoph and Quoc V. Le. 2016. Neural Architecture Search with Reinforcement Learning. CoRR Vol. abs\/1611.01578 (2016)."}],"event":{"name":"WSDM '20: The Thirteenth ACM International Conference on Web Search and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Houston TX USA","acronym":"WSDM '20"},"container-title":["Proceedings of the 13th International Conference on Web Search and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3336191.3371858","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3336191.3371858","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T23:23:14Z","timestamp":1750202594000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3336191.3371858"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,1,20]]},"references-count":45,"alternative-id":["10.1145\/3336191.3371858","10.1145\/3336191"],"URL":"https:\/\/doi.org\/10.1145\/3336191.3371858","relation":{},"subject":[],"published":{"date-parts":[[2020,1,20]]},"assertion":[{"value":"2020-01-22","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}