{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:05:05Z","timestamp":1765357505974,"version":"3.46.0"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"8","license":[{"start":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T00:00:00Z","timestamp":1704412800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T00:00:00Z","timestamp":1704412800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100010903","name":"Key Programme","doi-asserted-by":"publisher","award":["62141220"],"award-info":[{"award-number":["62141220"]}],"id":[{"id":"10.13039\/501100010903","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,8]]},"DOI":"10.1007\/s10994-023-06469-9","type":"journal-article","created":{"date-parts":[[2024,1,5]],"date-time":"2024-01-05T12:01:58Z","timestamp":1704456118000},"page":"5331-5349","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["GS2P: a generative pre-trained learning to rank model with over-parameterization for web-scale search"],"prefix":"10.1007","volume":"113","author":[{"given":"Yuchen","family":"Li","sequence":"first","affiliation":[]},{"given":"Haoyi","family":"Xiong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9266-3044","authenticated-orcid":false,"given":"Linghe","family":"Kong","sequence":"additional","affiliation":[]},{"given":"Jiang","family":"Bian","sequence":"additional","affiliation":[]},{"given":"Shuaiqiang","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Guihai","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Dawei","family":"Yin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,5]]},"reference":[{"key":"6469_CR1","doi-asserted-by":"crossref","unstructured":"Ai, Q., Wang, X., Bruch, S., Golbandi, N., Bendersky, M., & Najork, M. (2019). Learning groupwise multivariate scoring functions using deep neural networks. In Proceedings of the 2019 ACM SIGIR international conference on theory of information retrieval, pp. 85\u201392.","DOI":"10.1145\/3341981.3344218"},{"key":"6469_CR2","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1017\/S0962492921000039","volume":"30","author":"M Belkin","year":"2021","unstructured":"Belkin, M. (2021). Fit without fear: Remarkable mathematical phenomena of deep learning through the prism of interpolation. Acta Numerica, 30, 203\u2013248.","journal-title":"Acta Numerica"},{"issue":"32","key":"6469_CR3","doi-asserted-by":"publisher","first-page":"15849","DOI":"10.1073\/pnas.1903070116","volume":"116","author":"M Belkin","year":"2019","unstructured":"Belkin, M., Hsu, D., Ma, S., & Mandal, S. (2019). Reconciling modern machine-learning practice and the classical bias-variance trade-off. Proceedings of the National Academy of Sciences, 116(32), 15849\u201315854.","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"6469_CR4","doi-asserted-by":"crossref","unstructured":"Bruch, S., Zoghi, M., Bendersky, M., & Najork, M. (2019). Revisiting approximate metric optimization in the age of deep neural networks. In Proceedings of the 42nd international ACM SIGIR conference on research and development in information retrieval, pp. 1241\u20131244.","DOI":"10.1145\/3331184.3331347"},{"key":"6469_CR5","doi-asserted-by":"crossref","unstructured":"Burges, C. J. C., Ragno, R., & Le, Q. V. (2006). Learning to rank with nonsmooth cost functions. In Advances in neural information processing systems 19, proceedings of the twentieth annual conference on neural information processing systems, pp. 193\u2013200.","DOI":"10.7551\/mitpress\/7503.003.0029"},{"key":"6469_CR6","doi-asserted-by":"crossref","unstructured":"Burges, C. J. C., Shaked, T., Renshaw, E., Lazier, A., Deeds, M., Hamilton, N., & Hullender, G. N. (2005). Learning to rank using gradient descent. In Machine learning, proceedings of the twenty-second international conference, ICML, pp. 89\u201396.","DOI":"10.1145\/1102351.1102363"},{"key":"6469_CR7","doi-asserted-by":"crossref","unstructured":"Cao, Z., Qin, T., Liu, T., Tsai, M., & Li, H. (2007). Learning to rank: From pairwise approach to listwise approach. In Machine learning, proceedings of the twenty-fourth international conference, pp. 129\u2013136.","DOI":"10.1145\/1273496.1273513"},{"key":"6469_CR8","doi-asserted-by":"crossref","unstructured":"Chen, T., & Guestrin, C. (2016). Xgboost: A scalable tree boosting system. In Proceedings of the 22nd ACM Sigkdd international conference on knowledge discovery and data mining, pp. 785\u2013794.","DOI":"10.1145\/2939672.2939785"},{"issue":"2","key":"6469_CR9","doi-asserted-by":"publisher","first-page":"243","DOI":"10.1145\/3130348.3130374","volume":"51","author":"K J\u00e4rvelin","year":"2017","unstructured":"J\u00e4rvelin, K., & Kek\u00e4l\u00e4inen, J. (2017). IR evaluation methods for retrieving highly relevant documents. SIGIR Forum, 51(2), 243\u2013250.","journal-title":"SIGIR Forum"},{"key":"6469_CR10","doi-asserted-by":"crossref","unstructured":"Joachims, T. (2006). Training linear svms in linear time. In Proceedings of the 12th ACM SIGKDD international conference on knowledge discovery and data mining, pp. 217\u2013226.","DOI":"10.1145\/1150402.1150429"},{"key":"6469_CR11","unstructured":"Ke, G., Meng, Q., Finley, T., Wang, T., Chen, W., Ma, W., Ye, Q., & Liu, T. (2017). Lightgbm: A highly efficient gradient boosting decision tree. In Advances in neural information processing systems 30: Annual conference on neural information processing systems, pp. 3146\u20133154."},{"key":"6469_CR12","unstructured":"Kingma, D. P., & Welling, M. (2014). Auto-encoding variational Bayes. In 2nd International conference on learning representations."},{"key":"6469_CR13","doi-asserted-by":"crossref","unstructured":"Li, M., Liu, X., van\u00a0de Weijer, J., & Raducanu, B. C. (2020). Learning to rank for active learning: A listwise approach. In 25th International conference on pattern recognition, pp. 5587\u20135594.","DOI":"10.1109\/ICPR48806.2021.9412680"},{"key":"6469_CR14","doi-asserted-by":"crossref","unstructured":"Li, Y., Xiong, H., Kong, L., Zhang, R., Dou, D., & Chen, G. (2022). Meta hierarchical reinforced learning to rank for recommendation: A comprehensive study in moocs. In Joint European conference on machine learning and knowledge discovery in databases, pp. 302\u2013317.","DOI":"10.1007\/978-3-031-26422-1_19"},{"key":"6469_CR15","doi-asserted-by":"crossref","unstructured":"Li, Y., Xiong, H., Wang, Q., Kong, L., Liu, H., Li, H., Bian, J., Wang, S., Chen, G., Dou, D., et al. (2023). Coltr: Semi-supervised learning to rank with co-training and over-parameterization for web search. IEEE Transactions on Knowledge and Data Engineering.","DOI":"10.1109\/TKDE.2023.3270750"},{"key":"6469_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Y., Lu, W., Cheng, S., Shi, D., Wang, S., Cheng, Z., & Yin, D. (2021). Pre-trained language model for web-scale retrieval in baidu search. In KDD \u201921: The 27th ACM SIGKDD conference on knowledge discovery and data mining, pp. 3365\u20133375.","DOI":"10.1145\/3447548.3467149"},{"key":"6469_CR17","unstructured":"Pobrotyn, P., & Bia\u0142obrzeski, R. (2021). Neuralndcg: Direct optimisation of a ranking metric via differentiable relaxation of sorting. arXiv preprint arXiv:2102.07831."},{"key":"6469_CR18","unstructured":"Pobrotyn, P., Bartczak, T., Synowiec, M., Bia\u0142obrzeski, R., & Bojar, J. (2020). Context-aware learning to rank with self-attention. arXiv preprint arXiv:2005.10084."},{"key":"6469_CR19","unstructured":"Qin, T., & Liu, T.-Y. (2013). Introducing letor 4.0 datasets. arXiv preprint arXiv:1306.2597."},{"key":"6469_CR20","unstructured":"Qin, Z., Yan, L., Zhuang, H., Tay, Y., Pasumarthi, R.K., Wang, X., Bendersky, M., & Najork, M. (2021). Are neural rankers still outperformed by gradient boosted decision trees? In International conference on learning representations."},{"issue":"4","key":"6469_CR21","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s10791-009-9124-x","volume":"13","author":"T Qin","year":"2010","unstructured":"Qin, T., Liu, T., & Li, H. (2010). A general approximation framework for direct optimization of information retrieval measures. Information Retrieval, 13(4), 375\u2013397.","journal-title":"Information Retrieval"},{"key":"6469_CR22","unstructured":"Rahimi, A., & Recht, B. (2007). Random features for large-scale kernel machines. In Advances in neural information processing systems 20, proceedings of the twenty-first annual conference on neural information processing systems, pp. 1177\u20131184."},{"key":"6469_CR23","unstructured":"Samarin, M., Roth, V., & Belius, D. (2022). Feature learning and random features in standard finite-width convolutional neural networks: An empirical study. In Uncertainty in artificial intelligence, pp. 1718\u20131727."},{"key":"6469_CR24","unstructured":"Sun, Y., Wang, S., Li, Y., Feng, S., Chen, X., Zhang, H., Tian, X., Zhu, D., Tian, H., & Wu, H. (2019). Ernie: Enhanced representation through knowledge integration. arXiv preprint arXiv:1904.09223"},{"key":"6469_CR25","doi-asserted-by":"crossref","unstructured":"Szummer, M., & Yilmaz, E. (2011). Semi-supervised learning to rank with preference regularization. In Proceedings of the 20th ACM conference on information and knowledge management, pp. 269\u2013278.","DOI":"10.1145\/2063576.2063620"},{"key":"6469_CR26","doi-asserted-by":"crossref","unstructured":"Tran, L., Liu, X., Zhou, J., & Jin, R. (2017). Missing modalities imputation via cascaded residual autoencoder. In 2017 IEEE conference on computer vision and pattern recognition, pp. 4971\u20134980.","DOI":"10.1109\/CVPR.2017.528"},{"key":"6469_CR27","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, L., & Polosukhin, I. (2017). Attention is all you need. In Advances in neural information processing systems 30: Annual conference on neural information processing systems, pp. 5998\u20136008."},{"key":"6469_CR28","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y., & Manzagol, P. (2010). Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion. Journal of Machine Learning Research, 11, 3371\u20133408.","journal-title":"Journal of Machine Learning Research"},{"key":"6469_CR29","doi-asserted-by":"crossref","unstructured":"Wang, R., Shivanna, R., Cheng, D., Jain, S., Lin, D., Hong, L., & Chi, E. (2021). Dcn v2: Improved deep and cross network and practical lessons for web-scale learning to rank systems. In Proceedings of the web conference 2021, pp. 1785\u20131797.","DOI":"10.1145\/3442381.3450078"},{"key":"6469_CR30","doi-asserted-by":"crossref","unstructured":"Wang, R., Shivanna, R., Cheng, D.Z., Jain, S., Lin, D., Hong, L., & Chi, E.H. (2021). DCN V2: improved deep and cross network and practical lessons for web-scale learning to rank systems. In WWW \u201921: The web conference, pp. 1785\u20131797.","DOI":"10.1145\/3442381.3450078"},{"issue":"2","key":"6469_CR31","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s10994-021-06122-3","volume":"111","author":"T Werner","year":"2022","unstructured":"Werner, T. (2022). A review on instance ranking problems in statistical learning. Machine Learning, 111(2), 415\u2013463.","journal-title":"Machine Learning"},{"key":"6469_CR32","doi-asserted-by":"crossref","unstructured":"Wu, X., Chen, H., Zhao, J., He, L., Yin, D., & Chang, Y. (2021). Unbiased learning to rank in feeds recommendation. In The fourteenth ACM international conference on web search and data mining, pp. 490\u2013498.","DOI":"10.1145\/3437963.3441751"},{"key":"6469_CR33","doi-asserted-by":"crossref","unstructured":"Xia, F., Liu, T., Wang, J., Zhang, W., & Li, H. (2008). Listwise approach to learning to rank: Theory and algorithm. In Machine learning, proceedings of the twenty-fifth international conference, pp. 1192\u20131199.","DOI":"10.1145\/1390156.1390306"},{"key":"6469_CR34","doi-asserted-by":"crossref","unstructured":"Yan, L., Qin, Z., Zhuang, H., Wang, X., Bendersky, M., & Najork, M. (2022). Revisiting two tower models for unbiased learning to rank. In Proceedings of the 45th international ACM SIGIR conference on research and development in information retrieval, pp. 2410\u20132414.","DOI":"10.1145\/3477495.3531837"},{"issue":"8","key":"6469_CR35","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1145\/3554729","volume":"55","author":"T Yang","year":"2023","unstructured":"Yang, T., & Ying, Y. (2023). AUC maximization in the era of big data and AI: A survey. ACM Computing Surveys, 55(8), 172\u2013117237.","journal-title":"ACM Computing Surveys"},{"issue":"5","key":"6469_CR36","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/s11280-015-0363-z","volume":"19","author":"X Zhang","year":"2016","unstructured":"Zhang, X., He, B., & Luo, T. (2016). Training query filtering for semi-supervised learning to rank with pseudo labels. World Wide Web, 19(5), 833\u2013864.","journal-title":"World Wide Web"},{"key":"6469_CR37","unstructured":"Zhao, S., Wang, H., & Liu, T. (2010). Paraphrasing with search engine query logs. In COLING 2010, 23rd international conference on computational linguistics, proceedings of the conference, pp. 1317\u20131325."},{"key":"6469_CR38","unstructured":"Zhao, S., Wang, H., Li, C., Liu, T., & Guan, Y. (2011). Automatically generating questions from queries for community-based question answering. In Proceedings of 5th international joint conference on natural language processing, pp. 929\u2013937."},{"key":"6469_CR39","doi-asserted-by":"crossref","unstructured":"Zou, L., Zhang, S., Cai, H., Ma, D., Cheng, S., Wang, S., Shi, D., Cheng, Z., & Yin, D. (2021). Pre-trained language model based ranking in Baidu search. In KDD \u201921: The 27th ACM SIGKDD conference on knowledge discovery and data mining, virtual event, pp. 4014\u20134022.","DOI":"10.1145\/3447548.3467147"},{"key":"6469_CR40","doi-asserted-by":"crossref","unstructured":"Zou, L., Zhang, S., Cai, H., Ma, D., Cheng, S., Wang, S., Shi, D., Cheng, Z., & Yin, D. (2021). Pre-trained language model based ranking in baidu search. In Proceedings of the 27th ACM SIGKDD conference on knowledge discovery and data mining, pp. 4014\u20134022.","DOI":"10.1145\/3447548.3467147"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06469-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-023-06469-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06469-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:05:10Z","timestamp":1764266710000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-023-06469-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,5]]},"references-count":40,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2024,8]]}},"alternative-id":["6469"],"URL":"https:\/\/doi.org\/10.1007\/s10994-023-06469-9","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2024,1,5]]},"assertion":[{"value":"15 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 October 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Not applicable.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}},{"value":"No data have been fabricated or manipulated to support your conclusions. No data, text, or theories by others are presented as if they were our own. Data we used, the data processing and inference phases do not contain any user personal information. This work does not have the potential to be used for policing or the military.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}