{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,17]],"date-time":"2026-07-17T23:13:51Z","timestamp":1784330031437,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,22]]},"DOI":"10.1145\/3383313.3412489","type":"proceedings-article","created":{"date-parts":[[2020,9,19]],"date-time":"2020-09-19T02:28:21Z","timestamp":1600482501000},"page":"23-32","source":"Crossref","is-referenced-by-count":120,"title":["Are We Evaluating Rigorously? Benchmarking Recommendation for Reproducible Evaluation and Fair Comparison"],"prefix":"10.1145","author":[{"given":"Zhu","family":"Sun","sequence":"first","affiliation":[{"name":"Macquarie University, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Di","family":"Yu","sequence":"additional","affiliation":[{"name":"Shanghai Univirsity of Finance and Economics, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hui","family":"Fang","sequence":"additional","affiliation":[{"name":"Shanghai University of Finance and Economics, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jie","family":"Yang","sequence":"additional","affiliation":[{"name":"Delft University of Technology, Netherlands"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinghua","family":"Qu","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jie","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cong","family":"Geng","sequence":"additional","affiliation":[{"name":"Shanghai University of Finance and Economics, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2020,9,22]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Reproducibility crisis?Nature 533, 26","author":"Baker Monya","year":"2016"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2016.7738886"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11257-016-9174-x"},{"key":"e_1_3_2_1_4_1","first-page":"281","article-title":"Random search for hyper-parameter optimization","volume":"13","author":"Bergstra James","year":"2012","journal-title":"JMLR"},{"key":"e_1_3_2_1_5_1","volume-title":"Overview of the 2019 Open-Source IR Replicability Challenge (OSIRRC","author":"Ryan Clancy","year":"2019"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Paolo Cremonesi 2010. Performance of recommender algorithms on top-n recommendation tasks. In RecSys. Paolo Cremonesi 2010. Performance of recommender algorithms on top-n recommendation tasks. In RecSys.","DOI":"10.1145\/1864708.1864721"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Maurizio\u00a0Ferrari Dacrema 2019. Are we really making much progress? A worrying analysis of recent neural recommendation approaches. In RecSys. Maurizio\u00a0Ferrari Dacrema 2019. Are we really making much progress? A worrying analysis of recent neural recommendation approaches. In RecSys.","DOI":"10.1145\/3298689.3347058"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Jia Deng 2009. Imagenet: A large-scale hierarchical image database. In CVPR. Jia Deng 2009. Imagenet: A large-scale hierarchical image database. In CVPR.","DOI":"10.1109\/CVPRW.2009.5206848"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Nicola Ferro and Diane Kelly. 2018. SIGIR initiative to implement ACM artifact review and badging. In ACM SIGIR Forum Vol.\u00a052. 4\u201310. Nicola Ferro and Diane Kelly. 2018. SIGIR initiative to implement ACM artifact review and badging. In ACM SIGIR Forum Vol.\u00a052. 4\u201310.","DOI":"10.1145\/3274784.3274786"},{"key":"e_1_3_2_1_10_1","first-page":"108","article-title":"Reproducibility of Data-Oriented Experiments in e-Science (Dagstuhl Seminar 16041)","volume":"6","author":"Freire Juliana","year":"2016","journal-title":"Dagstuhl Reports"},{"key":"e_1_3_2_1_11_1","volume-title":"UMAP Workshops, Vol.\u00a04.","author":"Guo Guibing","year":"2015"},{"key":"e_1_3_2_1_12_1","volume-title":"Proc. 37th European Conference on IR Research. Vol.\u00a09022","author":"Hanbury Allan","year":"2015"},{"key":"e_1_3_2_1_13_1","unstructured":"Xiangnan He 2016. Fast matrix factorization for online recommendation with implicit feedback. In SIGIR. Xiangnan He 2016. Fast matrix factorization for online recommendation with implicit feedback. In SIGIR."},{"key":"e_1_3_2_1_14_1","unstructured":"Xiangnan He 2017. Neural collaborative filtering. In WWW. Xiangnan He 2017. Neural collaborative filtering. In WWW."},{"key":"e_1_3_2_1_15_1","unstructured":"Xiangnan He and Tat-Seng Chua. 2017. Neural factorization machines for sparse predictive analytics. In SIGIR. Xiangnan He and Tat-Seng Chua. 2017. Neural factorization machines for sparse predictive analytics. In SIGIR."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"Bal\u00e1zs Hidasi 2018. Recurrent neural networks with top-k gains for session-based recommendations. In CIKM. Bal\u00e1zs Hidasi 2018. Recurrent neural networks with top-k gains for session-based recommendations. In CIKM.","DOI":"10.1145\/3269206.3271761"},{"key":"e_1_3_2_1_17_1","unstructured":"Yifan Hu 2008. Collaborative filtering for implicit feedback datasets. In ICDM. Yifan Hu 2008. Collaborative filtering for implicit feedback datasets. In ICDM."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Dietmar Jannach and Malte Ludewig. 2017. When recurrent neural networks meet the neighborhood for session-based recommendation. In RecSys. Dietmar Jannach and Malte Ludewig. 2017. When recurrent neural networks meet the neighborhood for session-based recommendation. In RecSys.","DOI":"10.1145\/3109859.3109872"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Dawen Liang 2018. Variational autoencoders for collaborative filtering. In WWW. Dawen Liang 2018. Variational autoencoders for collaborative filtering. In WWW.","DOI":"10.1145\/3178876.3186150"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"crossref","unstructured":"Sean\u00a0M McNee John Riedl and Joseph\u00a0A Konstan. 2006. Being accurate is not enough: how accuracy metrics have hurt recommender systems. In CHI\u201906 Extended Abstracts on Human Factors in Computing Systems. 1097\u20131101. Sean\u00a0M McNee John Riedl and Joseph\u00a0A Konstan. 2006. Being accurate is not enough: how accuracy metrics have hurt recommender systems. In CHI\u201906 Extended Abstracts on Human Factors in Computing Systems. 1097\u20131101.","DOI":"10.1145\/1125451.1125659"},{"key":"e_1_3_2_1_21_1","volume-title":"A manifesto for reproducible science. Nature human behaviour 1, 1","author":"R Munaf\u00f2","year":"2017"},{"key":"e_1_3_2_1_22_1","volume-title":"Slim: Sparse linear methods for top-n recommender systems. In ICDM.","author":"Ning Xia","year":"2011"},{"key":"e_1_3_2_1_23_1","unstructured":"Edward Raff. 2019. A Step Toward Quantifying Independently Reproducible Machine Learning Research. In NIPS. Edward Raff. 2019. A Step Toward Quantifying Independently Reproducible Machine Learning Research. In NIPS."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Steffen Rendle. 2010. Factorization machines. In ICDM. Steffen Rendle. 2010. Factorization machines. In ICDM.","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_1_25_1","unstructured":"Steffen Rendle 2009. BPR: Bayesian personalized ranking from implicit feedback. In IUI. Steffen Rendle 2009. BPR: Bayesian personalized ranking from implicit feedback. In IUI."},{"key":"e_1_3_2_1_26_1","unstructured":"Steffen Rendle 2019. On the difficulty of evaluating baselines: A study on recommender Systems. arXiv preprint arXiv:1905.01395(2019). Steffen Rendle 2019. On the difficulty of evaluating baselines: A study on recommender Systems. arXiv preprint arXiv:1905.01395(2019)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Alan Said 2014. Comparative recommender system evaluation: benchmarking recommendation frameworks. In RecSys. Alan Said 2014. Comparative recommender system evaluation: benchmarking recommendation frameworks. In RecSys.","DOI":"10.1145\/2645710.2645746"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Alan Said 2014. Rival: a toolkit to foster reproducibility in recommender system evaluation. In RecSys. Alan Said 2014. Rival: a toolkit to foster reproducibility in recommender system evaluation. In RecSys.","DOI":"10.1145\/2645710.2645712"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Badrul Sarwar 2001. Item-based collaborative filtering recommendation algorithms. In WWW. Badrul Sarwar 2001. Item-based collaborative filtering recommendation algorithms. In WWW.","DOI":"10.1145\/371920.372071"},{"key":"e_1_3_2_1_30_1","unstructured":"Jasper Snoek 2012. Practical bayesian optimization of machine learning algorithms. In NIPS. Jasper Snoek 2012. Practical bayesian optimization of machine learning algorithms. In NIPS."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Zhu Sun 2018. Recurrent knowledge graph embedding for effective recommendation. In RecSys. Zhu Sun 2018. Recurrent knowledge graph embedding for effective recommendation. In RecSys.","DOI":"10.1145\/3240323.3240361"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.elerap.2019.100879"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Jiliang Tang Huiji Gao Huan Liu and Atish Das\u00a0Sarma. 2012. eTrust: Understanding trust evolution in an online world. In KDD. Jiliang Tang Huiji Gao Huan Liu and Atish Das\u00a0Sarma. 2012. eTrust: Understanding trust evolution in an online world. In KDD.","DOI":"10.1145\/2339530.2339574"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Xiaoli Tang 2019. AKUPM: Attention-enhanced knowledge-aware user preference model for recommendation. In KDD. Xiaoli Tang 2019. AKUPM: Attention-enhanced knowledge-aware user preference model for recommendation. In KDD.","DOI":"10.1145\/3292500.3330705"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Daniel Valcarce 2018. On the robustness and discriminative power of information retrieval metrics for top-n recommendation. In RecSys. Daniel Valcarce 2018. On the robustness and discriminative power of information retrieval metrics for top-n recommendation. In RecSys.","DOI":"10.1145\/3240323.3240347"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Hongwei Wang 2019. Multi-task feature learning for knowledge graph enhanced recommendation. In WWW. Hongwei Wang 2019. Multi-task feature learning for knowledge graph enhanced recommendation. In WWW.","DOI":"10.1145\/3308558.3313411"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Xiang Wang 2019. Kgat: Knowledge graph attention network for recommendation. In KDD. Xiang Wang 2019. Kgat: Knowledge graph attention network for recommendation. In KDD.","DOI":"10.1145\/3292500.3330989"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Xiang Wang 2019. Neural graph collaborative filtering. In SIGIR. Xiang Wang 2019. Neural graph collaborative filtering. In SIGIR.","DOI":"10.1145\/3331184.3331267"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Ga Wu 2019. Noise contrastive estimation for one-class collaborative filtering. In SIGIR. Ga Wu 2019. Noise contrastive estimation for one-class collaborative filtering. In SIGIR.","DOI":"10.1145\/3331184.3331201"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Yao Wu 2016. Collaborative denoising auto-encoders for top-n recommender systems. In WSDM. Yao Wu 2016. Collaborative denoising auto-encoders for top-n recommender systems. In WSDM.","DOI":"10.1145\/2835776.2835837"},{"key":"e_1_3_2_1_41_1","unstructured":"Fengli Xu 2019. Relation-aware graph convolutional networks for agent-initiated social e-commerce recommendation. In CIKM. Fengli Xu 2019. Relation-aware graph convolutional networks for agent-initiated social e-commerce recommendation. In CIKM."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Longqi Yang 2018. Openrec: A modular framework for extensible and adaptable recommendation algorithms. In WSDM. Longqi Yang 2018. Openrec: A modular framework for extensible and adaptable recommendation algorithms. In WSDM.","DOI":"10.1145\/3159652.3159681"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Fuzheng Zhang 2016. Collaborative knowledge base embedding for recommender systems. In KDD. Fuzheng Zhang 2016. Collaborative knowledge base embedding for recommender systems. In KDD.","DOI":"10.1145\/2939672.2939673"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3285029","article-title":"Deep learning based recommender system: A survey and new perspectives","volume":"52","author":"Zhang Shuai","year":"2019","journal-title":"CSUR"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Shuai Zhang 2019. DeepRec: An Open-source Toolkit for Deep Learning based Recommendation. In IJCAI. Shuai Zhang 2019. DeepRec: An Open-source Toolkit for Deep Learning based Recommendation. In IJCAI.","DOI":"10.24963\/ijcai.2019\/963"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Qian Zhao 2018. Interpreting user inaction in recommender systems. In RecSys. Qian Zhao 2018. Interpreting user inaction in recommender systems. In RecSys.","DOI":"10.1145\/3240323.3240366"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Cai-Nicolas Ziegler 2005. Improving recommendation lists through topic diversification. In WWW. Cai-Nicolas Ziegler 2005. Improving recommendation lists through topic diversification. In WWW.","DOI":"10.1145\/1060745.1060754"}],"event":{"name":"RecSys '20: Fourteenth ACM Conference on Recommender Systems","location":"Virtual Event Brazil","acronym":"RecSys '20","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGecom Special Interest Group on Economics and Computation"]},"container-title":["Fourteenth ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383313.3412489","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3383313.3412489","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T22:33:21Z","timestamp":1750199601000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3383313.3412489"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,22]]},"references-count":47,"alternative-id":["10.1145\/3383313.3412489","10.1145\/3383313"],"URL":"https:\/\/doi.org\/10.1145\/3383313.3412489","relation":{},"subject":[],"published":{"date-parts":[[2020,9,22]]}}}