{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,11]],"date-time":"2026-07-11T02:40:32Z","timestamp":1783737632156,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":34,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,17]],"date-time":"2022-10-17T00:00:00Z","timestamp":1665964800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,17]]},"DOI":"10.1145\/3511808.3557479","type":"proceedings-article","created":{"date-parts":[[2022,10,16]],"date-time":"2022-10-16T01:29:57Z","timestamp":1665883797000},"page":"2671-2680","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":34,"title":["Towards Understanding the Overfitting Phenomenon of Deep Click-Through Rate Models"],"prefix":"10.1145","author":[{"given":"Zhao-Yu","family":"Zhang","sequence":"first","affiliation":[{"name":"Nanjing University, Nanjing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiang-Rong","family":"Sheng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yujing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Biye","family":"Jiang","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shuguang","family":"Han","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hongbo","family":"Deng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bo","family":"Zheng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2022,10,17]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Peter L. Bartlett Dylan J. Foster and Matus Telgarsky. 2017. Spectrally-normalized margin bounds for neural networks. In Advances in Neural Information Processing Systems 30. 6240--6249.  Peter L. Bartlett Dylan J. Foster and Matus Telgarsky. 2017. Spectrally-normalized margin bounds for neural networks. In Advances in Neural Information Processing Systems 30. 6240--6249."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1903070116"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"Shai Ben-David John Blitzer Koby Crammer Fernando Pereira etal 2007. Analysis of representations for domain adaptation. Advances in neural information processing systems 19 (2007) 137.  Shai Ben-David John Blitzer Koby Crammer Fernando Pereira et al. 2007. Analysis of representations for domain adaptation. Advances in neural information processing systems 19 (2007) 137.","DOI":"10.7551\/mitpress\/7503.003.0022"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498435"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623634"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639346"},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin , Ming-Wei Chang , Kenton Lee , and Kristina Toutanova . 2019 . BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding . In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics . Minneapolis, MN, USA, 4171--4186. Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics. Minneapolis, MN, USA, 4171--4186."},{"key":"e_1_3_2_2_10_1","volume-title":"Greedy function approximation: a gradient boosting machine. Annals of statistics","author":"Friedman Jerome H","year":"2001","unstructured":"Jerome H Friedman . 2001. Greedy function approximation: a gradient boosting machine. Annals of statistics ( 2001 ), 1189--1232. Jerome H Friedman. 2001. Greedy function approximation: a gradient boosting machine. Annals of statistics (2001), 1189--1232."},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467086"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.5555\/3172077.3172127"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_15_1","first-page":"2","article-title":"Neural networks for machine learning lecture 6a overview of mini-batch gradient descent","volume":"14","author":"Hinton Geoffrey","year":"2012","unstructured":"Geoffrey Hinton , Nitish Srivastava , and Kevin Swersky . 2012 . Neural networks for machine learning lecture 6a overview of mini-batch gradient descent . Cited on 14 , 8 (2012), 2 . Geoffrey Hinton, Nitish Srivastava, and Kevin Swersky. 2012. Neural networks for machine learning lecture 6a overview of mini-batch gradient descent. Cited on 14, 8 (2012), 2.","journal-title":"Cited on"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3326937.3341255"},{"key":"e_1_3_2_2_17_1","volume-title":"Proceedings of the 3rd International Conference on Learning Representations.","author":"Diederik","unstructured":"Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization . In Proceedings of the 3rd International Conference on Learning Representations. Diederik P. Kingma and Jimmy Ba. 2015. Adam: A Method for Stochastic Optimization. In Proceedings of the 3rd International Conference on Learning Representations."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2009.263"},{"key":"e_1_3_2_2_19_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov Ilya","year":"2017","unstructured":"Ilya Loshchilov and Frank Hutter . 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 ( 2017 ). Ilya Loshchilov and Frank Hutter. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3233770"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_2_24_1","volume-title":"Overfitting mechanism and avoidance in deep neural networks. arXiv preprint arXiv:1901.06566","author":"Salman Shaeke","year":"2019","unstructured":"Shaeke Salman and Xiuwen Liu . 2019. Overfitting mechanism and avoidance in deep neural networks. arXiv preprint arXiv:1901.06566 ( 2019 ). Shaeke Salman and Xiuwen Liu. 2019. Overfitting mechanism and avoidance in deep neural networks. arXiv preprint arXiv:1901.06566 (2019)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3481941"},{"key":"e_1_3_2_2_26_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava , Geoffrey Hinton , Alex Krizhevsky , Ilya Sutskever , and Ruslan Salakhutdinov . 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 ( 2014 ), 1929--1958. Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 (2014), 1929--1958."},{"key":"e_1_3_2_2_27_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30. 5998--6008.  Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems 30. 5998--6008."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/SC41405.2020.00025"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3446776"},{"key":"e_1_3_2_2_30_1","volume-title":"PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems. In 2022 IEEE 38th International Conference on Data Engineering (ICDE). IEEE.","author":"Zhang Yuanxing","year":"2022","unstructured":"Yuanxing Zhang , Langshi Chen , Siran Yang , Man Yuan , Huimin Yi , 2022 . PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems. In 2022 IEEE 38th International Conference on Data Engineering (ICDE). IEEE. Yuanxing Zhang, Langshi Chen, Siran Yang, Man Yuan, Huimin Yi, et al. 2022. PICASSO: Unleashing the Potential of GPU-centric Training for Wide-and-deep Recommender Systems. In 2022 IEEE 38th International Conference on Data Engineering (ICDE). IEEE."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"},{"key":"e_1_3_2_2_33_1","volume-title":"Why over-parameterization of deep neural networks does not overfit? Science China Information Sciences 64, 1","author":"Zhou Zhi-Hua","year":"2021","unstructured":"Zhi-Hua Zhou . 2021. Why over-parameterization of deep neural networks does not overfit? Science China Information Sciences 64, 1 ( 2021 ). Zhi-Hua Zhou. 2021. Why over-parameterization of deep neural networks does not overfit? Science China Information Sciences 64, 1 (2021)."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482486"}],"event":{"name":"CIKM '22: The 31st ACM International Conference on Information and Knowledge Management","location":"Atlanta GA USA","acronym":"CIKM '22","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 31st ACM International Conference on Information &amp; Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557479","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3511808.3557479","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:48:56Z","timestamp":1750182536000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3511808.3557479"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,17]]},"references-count":34,"alternative-id":["10.1145\/3511808.3557479","10.1145\/3511808"],"URL":"https:\/\/doi.org\/10.1145\/3511808.3557479","relation":{},"subject":[],"published":{"date-parts":[[2022,10,17]]},"assertion":[{"value":"2022-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}