{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T10:47:32Z","timestamp":1759402052192,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":36,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032061058","type":"print"},{"value":"9783032061065","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T00:00:00Z","timestamp":1759449600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,3]],"date-time":"2025-10-03T00:00:00Z","timestamp":1759449600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06106-5_4","type":"book-chapter","created":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T10:08:59Z","timestamp":1759399739000},"page":"59-76","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Reward Shaping for\u00a0User Satisfaction"],"prefix":"10.1007","author":[{"given":"Konstantina","family":"Christakopoulou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Can","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sai","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sriraj","family":"Badam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Trevor","family":"Potter","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hao","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyang","family":"Yi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ya","family":"Le","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chris","family":"Berg","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eric Bencomo","family":"Dixon","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ed H.","family":"Chi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Minmin","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,3]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Beutel, A., et al.: Latent cross: making use of context in recurrent recommender systems. In: Proceedings of the Eleventh ACM International Conference on Web Search and Data Mining, pp. 46\u201354 (2018)","DOI":"10.1145\/3159652.3159727"},{"issue":"4","key":"4_CR2","first-page":"1","volume":"5","author":"O Chapelle","year":"2014","unstructured":"Chapelle, O., Manavoglu, E., Rosales, R.: Simple and scalable response prediction for display advertising. ACM Trans. Intell. Syst. Technol. (TIST) 5(4), 1\u201334 (2014)","journal-title":"ACM Trans. Intell. Syst. Technol. (TIST)"},{"key":"4_CR3","doi-asserted-by":"crossref","unstructured":"Chen, M., Beutel, A., Covington, P., Jain, S., Belletti, F., Chi, E.H.: Top-k off-policy correction for a reinforce recommender system. In: Proceedings of the Twelfth ACM International Conference on Web Search and Data Mining, pp. 456\u2013464 (2019)","DOI":"10.1145\/3289600.3290999"},{"key":"4_CR4","doi-asserted-by":"crossref","unstructured":"Christakopoulou, K., et al.: Deconfounding user satisfaction estimation from response rate bias. In: Proceedings of the 14th ACM Conference on recommender systems, pp. 450\u2013455 (2020)","DOI":"10.1145\/3383313.3412208"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Covington, P., Adams, J., Sargin, E.: Deep neural networks for YouTube recommendations. In: Proceedings of the 10th ACM Conference on Recommender Systems, pp. 191\u2013198 (2016)","DOI":"10.1145\/2959100.2959190"},{"issue":"2","key":"4_CR6","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1016\/0004-3702(94)90047-7","volume":"71","author":"M Dorigo","year":"1994","unstructured":"Dorigo, M., Colombetti, M.: Robot shaping: developing autonomous agents through learning. Artif. Intell. 71(2), 321\u2013370 (1994)","journal-title":"Artif. Intell."},{"key":"4_CR7","unstructured":"Dulac-Arnold, G., et al.: Deep reinforcement learning in large discrete action spaces. arXiv preprint arXiv:1512.07679 (2015)"},{"key":"4_CR8","unstructured":"Elkan, C.: The foundations of cost-sensitive learning. In: International joint Conference on Artificial Intelligence, vol.\u00a017, pp. 973\u2013978. Lawrence Erlbaum Associates Ltd (2001)"},{"key":"4_CR9","doi-asserted-by":"crossref","unstructured":"Garcia-Gathright, J., St.\u00a0Thomas, B., Hosey, C., Nazari, Z., Diaz, F.: Understanding and evaluating user satisfaction with music discovery. In: The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval, pp. 55\u201364 (2018)","DOI":"10.1145\/3209978.3210049"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE International Conference on Robotics and Automation (ICRA), pp. 3389\u20133396. IEEE (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Guo, Q., Agichtein, E.: Beyond dwell time: estimating document relevance from cursor movements and other post-click searcher behavior. In: Proceedings of the 21st International Conference on World Wide Web, pp. 569\u2013578 (2012)","DOI":"10.1145\/2187836.2187914"},{"key":"4_CR12","unstructured":"Hadfield-Menell, D., Milli, S., Abbeel, P., Russell, S.J., Dragan, A.: Inverse reward design. In: Advances in neural information processing systems. pp. 6765\u20136774 (2017)"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Ie, E., et al.: Slateq: a tractable decomposition for reinforcement learning with recommendation sets. In: Proceedings of the Twenty-eighth International Joint Conference on Artificial Intelligence (IJCAI-19), pp. 2592\u20132599. Macau, China (2019), see arXiv:1905.12767 for a related and expanded paper (with additional material and authors)","DOI":"10.24963\/ijcai.2019\/360"},{"key":"4_CR14","unstructured":"Lalmas, M.: Metrics, engagement & personalization. In: REVEAL workshop, The ACM Conference Series on Recommender Systems (2019)"},{"key":"4_CR15","unstructured":"Liu, F., et al.: Deep reinforcement learning based recommendation with explicit user-item interactions modeling. arXiv preprint arXiv:1810.12027 (2018)"},{"key":"4_CR16","doi-asserted-by":"crossref","unstructured":"Mataric, M.J.: Reward functions for accelerated learning. In: Machine learning proceedings 1994, pp. 181\u2013189. Elsevier (1994)","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Mehrotra, R., Lalmas, M., Kenney, D., Lim-Meng, T., Hashemian, G.: Jointly leveraging intent and interaction signals to predict user satisfaction with slate recommendations. In: Proceedings of The Web Conference 2019, pp. 1256\u20131267 (2019)","DOI":"10.1145\/3308558.3313613"},{"key":"4_CR18","doi-asserted-by":"crossref","unstructured":"Mehrotra, R., McInerney, J., Bouchard, H., Lalmas, M., Diaz, F.: Towards a fair marketplace: Counterfactual evaluation of the trade-off between relevance, fairness & satisfaction in recommendation systems. In: Proceedings of the 27th ACM International Conference on Information and Knowledge Management, pp. 2243\u20132251 (2018)","DOI":"10.1145\/3269206.3272027"},{"key":"4_CR19","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: International Conference on Machine Learning, pp. 1928\u20131937 (2016)"},{"key":"4_CR20","unstructured":"Mnih, V., et al.: Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Mnih, V., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"4_CR22","unstructured":"Munos, R., Stepleton, T., Harutyunyan, A., Bellemare, M.: Safe and efficient off-policy reinforcement learning. In: Advances in Neural Information Processing Systems, pp. 1054\u20131062 (2016)"},{"key":"4_CR23","unstructured":"Ng, A.Y., Harada, D., Russell, S.: Policy invariance under reward transformations: Theory and application to reward shaping. In: ICML, vol.\u00a099, pp. 278\u2013287 (1999)"},{"issue":"10","key":"4_CR24","doi-asserted-by":"publisher","first-page":"1345","DOI":"10.1109\/TKDE.2009.191","volume":"22","author":"SJ Pan","year":"2009","unstructured":"Pan, S.J., Yang, Q.: A survey on transfer learning. IEEE Trans. Knowl. Data Eng. 22(10), 1345\u20131359 (2009)","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Paulhus, D.L.: Measurement and control of response bias (1991)","DOI":"10.1016\/B978-0-12-590241-0.50006-X"},{"issue":"6","key":"4_CR26","doi-asserted-by":"publisher","first-page":"233","DOI":"10.1016\/S1364-6613(99)01327-3","volume":"3","author":"S Schaal","year":"1999","unstructured":"Schaal, S.: Is imitation learning the route to humanoid robots? Trends Cogn. Sci. 3(6), 233\u2013242 (1999)","journal-title":"Trends Cogn. Sci."},{"key":"4_CR27","unstructured":"Schulman, J., Moritz, P., Levine, S., Jordan, M., Abbeel, P.: High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:1506.02438 (2015)"},{"issue":"6419","key":"4_CR28","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D., et al.: A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419), 1140\u20131144 (2018)","journal-title":"Science"},{"key":"4_CR29","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT press (2018)"},{"key":"4_CR30","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y.: Policy gradient methods for reinforcement learning with function approximation. In: Advances in neural information processing systems, pp. 1057\u20131063 (2000)"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Wen, H., Yang, L., Estrin, D.: Leveraging post-click feedback for content recommendations. In: Proceedings of the 13th ACM Conference on Recommender Systems, pp. 278\u2013286 (2019)","DOI":"10.1145\/3298689.3347037"},{"issue":"3\u20134","key":"4_CR32","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1023\/A:1022672621406","volume":"8","author":"RJ Williams","year":"1992","unstructured":"Williams, R.J.: Simple statistical gradient-following algorithms for connectionist reinforcement learning. Mach. Learn. 8(3\u20134), 229\u2013256 (1992)","journal-title":"Mach. Learn."},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Yi, X., Hong, L., Zhong, E., Liu, N.N., Rajan, S.: Beyond clicks: dwell time for personalization. In: Proceedings of the 8th ACM Conference on Recommender systems, pp. 113\u2013120 (2014)","DOI":"10.1145\/2645710.2645724"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Zhang, J., Hao, B., Chen, B., Li, C., Chen, H., Sun, J.: Hierarchical reinforcement learning for course recommendation in MOOCs. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a033, pp. 435\u2013442 (2019)","DOI":"10.1609\/aaai.v33i01.3301435"},{"key":"4_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, X., Xia, L., Zhang, L., Ding, Z., Yin, D., Tang, J.: Deep reinforcement learning for page-wise recommendations. In: Proceedings of the 12th ACM Conference on Recommender Systems, pp. 95\u2013103 (2018)","DOI":"10.1145\/3240323.3240374"},{"key":"4_CR36","doi-asserted-by":"crossref","unstructured":"Zheng, G., et al.: DRN: A deep reinforcement learning framework for news recommendation. In: Proceedings of the 2018 World Wide Web Conference, pp. 167\u2013176 (2018)","DOI":"10.1145\/3178876.3185994"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06106-5_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T10:09:15Z","timestamp":1759399755000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06106-5_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,3]]},"ISBN":["9783032061058","9783032061065"],"references-count":36,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06106-5_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,3]]},"assertion":[{"value":"3 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Porto","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Portugal","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ecmlpkdd.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}