{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,11]],"date-time":"2026-07-11T02:26:27Z","timestamp":1783736787435,"version":"3.55.0"},"publisher-location":"New York, New York, USA","reference-count":52,"publisher":"ACM Press","license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSF","award":["#1639150"],"award-info":[{"award-number":["#1639150"]}]},{"name":"NSF","award":["#1652525"],"award-info":[{"award-number":["#1652525"]}]},{"name":"NSF","award":["#1544455"],"award-info":[{"award-number":["#1544455"]}]},{"name":"NSF","award":["#1618448"],"award-info":[{"award-number":["#1618448"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1145\/3178876.3185994","type":"proceedings-article","created":{"date-parts":[[2018,4,13]],"date-time":"2018-04-13T15:53:48Z","timestamp":1523634828000},"page":"167-176","source":"Crossref","is-referenced-by-count":498,"title":["DRN"],"prefix":"10.1145","author":[{"given":"Guanjie","family":"Zheng","sequence":"first","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fuzheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zihan","family":"Zheng","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Xiang","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nicholas Jing","family":"Yuan","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xing","family":"Xie","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhenhui","family":"Li","sequence":"additional","affiliation":[{"name":"Pennsylvania State University, University Park, PA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","reference":[{"key":"key-10.1145\/3178876.3185994-1","unstructured":"Gediminas Adomavicius and YoungOk Kwon. 2012. Improving aggregate recommendation diversity using ranking-based techniques. IEEE Transactions on Knowledge and Data Engineering, Vol. 24, 5 (2012), 896--911."},{"key":"key-10.1145\/3178876.3185994-2","doi-asserted-by":"crossref","unstructured":"Gediminas Adomavicius and Alexander Tuzhilin. 2005. Toward the next generation of recommender systems: A survey of the state-of-the-art and possible extensions. IEEE transactions on knowledge and data engineering, Vol. 17, 6 (2005), 734--749.","DOI":"10.1109\/TKDE.2005.99"},{"key":"key-10.1145\/3178876.3185994-3","doi-asserted-by":"crossref","unstructured":"Jes&#250;s Bobadilla, Fernando Ortega, Antonio Hernando, and Abraham Guti&#233;rrez. 2013. Recommender systems survey. Knowledge-based systems Vol. 46 (2013), 109--132.","DOI":"10.1016\/j.knosys.2013.03.012"},{"key":"key-10.1145\/3178876.3185994-4","unstructured":"Djallel Bouneffouf, Amel Bouzeghoub, and Alda Ganccarski. 2012. A contextual-bandit algorithm for mobile context-aware recommender system Neural Information Processing. Springer, 324--331."},{"key":"key-10.1145\/3178876.3185994-5","unstructured":"Nicolo Cesa-Bianchi, Claudio Gentile, and Giovanni Zappella. 2013. A gang of bandits Advances in Neural Information Processing Systems. 737--745."},{"key":"key-10.1145\/3178876.3185994-6","unstructured":"Olivier Chapelle and Lihong Li. 2011. An empirical evaluation of thompson sampling. In Advances in neural information processing systems. 2249--2257."},{"key":"key-10.1145\/3178876.3185994-7","doi-asserted-by":"crossref","unstructured":"Heng-Tze Cheng, Levent Koc, Jeremiah Harmsen, Tal Shaked, Tushar Chandra, Hrishi Aradhye, Glen Anderson, Greg Corrado, Wei Chai, Mustafa Ispir, et al. 2016. Wide &#38; deep learning for recommender systems. In Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 7--10.","DOI":"10.1145\/2988450.2988454"},{"key":"key-10.1145\/3178876.3185994-8","unstructured":"Franccois Chollet et al. 2015. Keras. https:\/\/github.com\/fchollet\/keras. (2015)."},{"key":"key-10.1145\/3178876.3185994-9","unstructured":"D Manning Christopher, Raghavan Prabhakar, and SCH&#220;TZE Hinrich. 2008. Introduction to information retrieval. An Introduction To Information Retrieval Vol. 151 (2008), 177."},{"key":"key-10.1145\/3178876.3185994-10","doi-asserted-by":"crossref","unstructured":"Abhinandan S Das, Mayur Datar, Ashutosh Garg, and Shyam Rajaram. 2007. Google news personalization: scalable online collaborative filtering Proceedings of the 16th international conference on World Wide Web. ACM, 271--280.","DOI":"10.1145\/1242572.1242610"},{"key":"key-10.1145\/3178876.3185994-11","doi-asserted-by":"crossref","unstructured":"Gianmarco De Francisci Morales, Aristides Gionis, and Claudio Lucchese. 2012. From chatter to headlines: harnessing the real-time web for personalized news recommendation Proceedings of the fifth ACM international conference on Web search and data mining. ACM, 153--162.","DOI":"10.1145\/2124295.2124315"},{"key":"key-10.1145\/3178876.3185994-12","unstructured":"Nan Du, Yichen Wang, Niao He, Jimeng Sun, and Le Song. 2015. Time-sensitive recommendation from recurrent user activities Advances in Neural Information Processing Systems. 3492--3500."},{"key":"key-10.1145\/3178876.3185994-13","unstructured":"Claudio Gentile, Shuai Li, and Giovanni Zappella. 2014. Online Clustering of Bandits.. In ICML. 757--765."},{"key":"key-10.1145\/3178876.3185994-14","unstructured":"Google. 2017. Google News. https:\/\/news.google.com\/. (2017)."},{"key":"key-10.1145\/3178876.3185994-15","doi-asserted-by":"crossref","unstructured":"Artem Grotov and Maarten de Rijke. 2016. Online learning to rank for information retrieval: SIGIR 2016 tutorial Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 1215--1218.","DOI":"10.1145\/2911451.2914798"},{"key":"key-10.1145\/3178876.3185994-16","unstructured":"Katja Hofmann, Anne Schuth, Shimon Whiteson, and Maarten de Rijke. 2013. Reusing historical interaction data for faster online learning to rank for IR Proceedings of the sixth ACM international conference on Web search and data mining. ACM, 183--192."},{"key":"key-10.1145\/3178876.3185994-17","doi-asserted-by":"crossref","unstructured":"Joseph G Ibrahim, Ming-Hui Chen, and Debajyoti Sinha. 2005. Bayesian survival analysis. Wiley Online Library.","DOI":"10.1002\/0470011815.b2a11006"},{"key":"key-10.1145\/3178876.3185994-18","doi-asserted-by":"crossref","unstructured":"Wouter IJntema, Frank Goossen, Flavius Frasincar, and Frederik Hogenboom. 2010. Ontology-based news recommendation. In Proceedings of the 2010 EDBT\/ICDT Workshops. ACM, 16.","DOI":"10.1145\/1754239.1754257"},{"key":"key-10.1145\/3178876.3185994-19","doi-asserted-by":"crossref","unstructured":"How Jing and Alexander J Smola. 2017. Neural survival recommender. In Proceedings of the Tenth ACM International Conference on Web Search and Data Mining. ACM, 515--524.","DOI":"10.1145\/3018661.3018719"},{"key":"key-10.1145\/3178876.3185994-20","unstructured":"Jaya Kawale, Hung H Bui, Branislav Kveton, Long Tran-Thanh, and Sanjay Chawla. 2015. Efficient Thompson Sampling for Online Matrix-Factorization Recommendation Advances in Neural Information Processing Systems. 1297--1305."},{"key":"key-10.1145\/3178876.3185994-21","doi-asserted-by":"crossref","unstructured":"Michal Kompan and M&#225;ria Bielikov&#225;. 2010. Content-Based News Recommendation.. In EC-Web, Vol. Vol. 61. Springer, 61--72.","DOI":"10.1007\/978-3-642-15208-5_6"},{"key":"key-10.1145\/3178876.3185994-22","unstructured":"Lihong Li, Wei Chu, John Langford, and Robert E Schapire. 2010. A contextual-bandit approach to personalized news article recommendation Proceedings of the 19th international conference on World wide web. ACM, 661--670."},{"key":"key-10.1145\/3178876.3185994-23","doi-asserted-by":"crossref","unstructured":"Lei Li, Dingding Wang, Tao Li, Daniel Knox, and Balaji Padmanabhan. 2011. SCENE: a scalable two-stage personalized news recommendation system Proceedings of the 34th international ACM SIGIR conference on Research and development in Information Retrieval. ACM, 125--134.","DOI":"10.1145\/2009916.2009937"},{"key":"key-10.1145\/3178876.3185994-24","unstructured":"Jiahui Liu, Peter Dolan, and Elin R&#248;nby Pedersen. 2010. Personalized news recommendation based on click behavior Proceedings of the 15th international conference on Intelligent user interfaces. ACM, 31--40."},{"key":"key-10.1145\/3178876.3185994-25","unstructured":"Zhongqi Lu and Qiang Yang. 2016. Partially Observable Markov Decision Process for Recommender Systems. arXiv preprint arXiv:1608.07793 (2016)."},{"key":"key-10.1145\/3178876.3185994-26","unstructured":"Tariq Mahmood and Francesco Ricci. 2007. Learning and adaptivity in interactive recommender systems Proceedings of the ninth international conference on Electronic commerce. ACM, 75--84."},{"key":"key-10.1145\/3178876.3185994-27","unstructured":"Benjamin Marlin and Richard S Zemel. 2004. The multiple multiplicative factor model for collaborative filtering Proceedings of the twenty-first international conference on Machine learning. ACM, 73."},{"key":"key-10.1145\/3178876.3185994-28","unstructured":"Alexander Novikov Mikhail Trofimov. 2016. tffm: TensorFlow implementation of an arbitrary order Factorization Machine. https:\/\/github.com\/geffy\/tffm. (2016)."},{"key":"key-10.1145\/3178876.3185994-29","unstructured":"Rupert G Miller Jr. 2011. Survival analysis. Vol. Vol. 66. John Wiley &#38; Sons."},{"key":"key-10.1145\/3178876.3185994-30","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei A Rusu, Joel Veness, Marc G Bellemare, Alex Graves, Martin Riedmiller, Andreas K Fidjeland, Georg Ostrovski, et al. 2015. Human-level control through deep reinforcement learning. Nature, Vol. 518, 7540 (2015), 529--533."},{"key":"key-10.1145\/3178876.3185994-31","unstructured":"Atsuyoshi Nakamura. 2015. A ucb-like strategy of collaborative filtering. In Asian Conference on Machine Learning. 315--329."},{"key":"key-10.1145\/3178876.3185994-32","unstructured":"Owen Phelan, Kevin McCarthy, Mike Bennett, and Barry Smyth. 2011. Terms of a feather: Content-based news recommendation and discovery using twitter. Advances in Information Retrieval (2011), 448--459."},{"key":"key-10.1145\/3178876.3185994-33","doi-asserted-by":"crossref","unstructured":"Steffen Rendle. 2010. Factorization machines. In Data Mining (ICDM), 2010 IEEE 10th International Conference on. IEEE, 995--1000.","DOI":"10.1109\/ICDM.2010.127"},{"key":"key-10.1145\/3178876.3185994-34","unstructured":"Pornthep Rojanavasu, Phaitoon Srinil, and Ouen Pinngern. 2005. New recommendation system using reinforcement learning. Special Issue of the Intl. J. Computer, the Internet and Management, Vol. 13, SP 3 (2005)."},{"key":"key-10.1145\/3178876.3185994-35","unstructured":"Guy Shani, David Heckerman, and Ronen I Brafman. 2005. An MDP-based recommender system. Journal of Machine Learning Research Vol. 6, Sep (2005), 1265--1295."},{"key":"key-10.1145\/3178876.3185994-36","unstructured":"Richard S Sutton and Andrew G Barto. 1998. Reinforcement learning: An introduction. Vol. Vol. 1. MIT press Cambridge."},{"key":"key-10.1145\/3178876.3185994-37","doi-asserted-by":"crossref","unstructured":"Nima Taghipour, Ahmad Kardan, and Saeed Shiry Ghidary. 2007. Usage-based web recommendations: a reinforcement learning approach Proceedings of the 2007 ACM conference on Recommender systems. ACM, 113--120.","DOI":"10.1145\/1297231.1297250"},{"key":"key-10.1145\/3178876.3185994-38","unstructured":"Liang Tang, Yexi Jiang, Lei Li, and Tao Li. 2014. Ensemble contextual bandits for personalized recommendation Proceedings of the 8th ACM Conference on Recommender Systems. ACM, 73--80."},{"key":"key-10.1145\/3178876.3185994-39","unstructured":"Liang Tang, Yexi Jiang, Lei Li, Chunqiu Zeng, and Tao Li. 2015. Personalized recommendation via parameter-free contextual bandits Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval. ACM, 323--332."},{"key":"key-10.1145\/3178876.3185994-40","doi-asserted-by":"crossref","unstructured":"Hado Van Hasselt, Arthur Guez, and David Silver. 2016. Deep Reinforcement Learning with Double Q-Learning. AAAI. 2094--2100.","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"key-10.1145\/3178876.3185994-41","doi-asserted-by":"crossref","unstructured":"Huazheng Wang, Qingyun Wu, and Hongning Wang. 2016. Learning Hidden Features for Contextual Bandits. Proceedings of the 25th ACM International on Conference on Information and Knowledge Management. ACM, 1633--1642.","DOI":"10.1145\/2983323.2983847"},{"key":"key-10.1145\/3178876.3185994-42","doi-asserted-by":"crossref","unstructured":"Huazheng Wang, Qingyun Wu, and Hongning Wang. 2017 a. Factorization Bandits for Interactive Recommendation. AAAI. 2695--2702.","DOI":"10.1609\/aaai.v31i1.10936"},{"key":"key-10.1145\/3178876.3185994-43","doi-asserted-by":"crossref","unstructured":"Xinxi Wang, Yi Wang, David Hsu, and Ye Wang. 2014. Exploration in interactive personalized music recommendation: a reinforcement learning approach. ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), Vol. 11, 1 (2014), 7.","DOI":"10.1145\/2623372"},{"key":"key-10.1145\/3178876.3185994-44","doi-asserted-by":"crossref","unstructured":"Xuejian Wang, Lantao Yu, Kan Ren, Guanyu Tao, Weinan Zhang, Yong Yu, and Jun Wang. 2017 b. Dynamic Attention Deep Model for Article Recommendation by Learning Human Editors' Demonstration. In Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, 2051--2059.","DOI":"10.1145\/3097983.3098096"},{"key":"key-10.1145\/3178876.3185994-45","unstructured":"Yining Wang, Liwei Wang, Yuanzhi Li, Di He, and Tie-Yan Liu. 2013. A theoretical analysis of ndcg type ranking measures Conference on Learning Theory. 25--54."},{"key":"key-10.1145\/3178876.3185994-46","unstructured":"Ziyu Wang, Tom Schaul, Matteo Hessel, Hado Van Hasselt, Marc Lanctot, and Nando De Freitas. 2015. Dueling network architectures for deep reinforcement learning. arXiv preprint arXiv:1511.06581 (2015)."},{"key":"key-10.1145\/3178876.3185994-47","unstructured":"Qingyun Wu, Hongning Wang, Liangjie Hong, and Yue Shi. 2017. Returning is Believing: Optimizing Long-term User Engagement in Recommender Systems. (2017)."},{"key":"key-10.1145\/3178876.3185994-48","unstructured":"Yisong Yue and Thorsten Joachims. 2009. Interactively optimizing information retrieval systems as a dueling bandits problem Proceedings of the 26th Annual International Conference on Machine Learning. ACM, 1201--1208."},{"key":"key-10.1145\/3178876.3185994-49","unstructured":"Chunqiu Zeng, Qing Wang, Shekoofeh Mokhtari, and Tao Li. 2016. Online Context-Aware Recommendation with Time Varying Multi-Armed Bandit Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. ACM, 2025--2034."},{"key":"key-10.1145\/3178876.3185994-50","doi-asserted-by":"crossref","unstructured":"Xiaoxue Zhao, Weinan Zhang, and Jun Wang. 2013. Interactive collaborative filtering. In Proceedings of the 22nd ACM international conference on Conference on information &#38; knowledge management. ACM, 1411--1420.","DOI":"10.1145\/2505515.2505690"},{"key":"key-10.1145\/3178876.3185994-51","unstructured":"Lei Zheng, Vahid Noroozi, and Philip S Yu. 2017. Joint deep modeling of users and items using reviews for recommendation Proceedings of the Tenth ACM International Conference on Web Search and Data Mining. ACM, 425--434."},{"key":"key-10.1145\/3178876.3185994-52","unstructured":"Cai-Nicolas Ziegler, Sean M McNee, Joseph A Konstan, and Georg Lausen. 2005. Improving recommendation lists through topic diversification Proceedings of the 14th international conference on World Wide Web. ACM, 22--32."}],"event":{"name":"the 2018 World Wide Web Conference","location":"Lyon, France","acronym":"WWW '18","number":"2018","sponsor":["SIGWEB, ACM Special Interest Group on Hypertext, Hypermedia, and Web","IW3C2, International World Wide Web Conference Committee"],"start":{"date-parts":[[2018,4,23]]},"end":{"date-parts":[[2018,4,27]]}},"container-title":["Proceedings of the 2018 World Wide Web Conference on World Wide Web - WWW '18"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3178876.3185994","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/dl.acm.org\/ft_gateway.cfm?id=3185994&ftid=1957497&dwn=1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:26:59Z","timestamp":1750213619000},"score":1,"resource":{"primary":{"URL":"http:\/\/dl.acm.org\/citation.cfm?doid=3178876.3185994"}},"subtitle":["A Deep Reinforcement Learning Framework for News Recommendation"],"proceedings-subject":"World Wide Web","short-title":[],"issued":{"date-parts":[[2018]]},"references-count":52,"URL":"https:\/\/doi.org\/10.1145\/3178876.3185994","relation":{},"subject":[],"published":{"date-parts":[[2018]]}}}