{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T13:39:35Z","timestamp":1769521175317,"version":"3.49.0"},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T00:00:00Z","timestamp":1737936000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T00:00:00Z","timestamp":1737936000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2110545"],"award-info":[{"award-number":["2110545"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62122037"],"award-info":[{"award-number":["62122037"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61921006"],"award-info":[{"award-number":["61921006"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s10994-024-06631-x","type":"journal-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T16:29:10Z","timestamp":1737995350000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Optimal large-scale stochastic optimization of NDCG surrogates for deep learning"],"prefix":"10.1007","volume":"114","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-0966-4506","authenticated-orcid":false,"given":"Zi-Hao","family":"Qiu","sequence":"first","affiliation":[]},{"given":"Quanqi","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Yongjian","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"Wei-Wei","family":"Tu","sequence":"additional","affiliation":[]},{"given":"Lijun","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Tianbao","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,27]]},"reference":[{"key":"6631_CR1","doi-asserted-by":"crossref","unstructured":"Ai, Q., Wang, X., Bruch, S., Golbandi, N., Bendersky, M., & Najork, M. (2019). Learning groupwise multivariate scoring functions using deep neural networks. In: Proceedings of the 2019 ACM SIGIR international conference on theory of information retrieval, pp 85\u201392.","DOI":"10.1145\/3341981.3344218"},{"issue":"1","key":"6631_CR2","first-page":"165","volume":"199","author":"Y Arjevani","year":"2022","unstructured":"Arjevani, Y., Carmon, Y., Duchi, J. C., Foster, D. J., Srebro, N., & Woodworth, B. (2022). Lower bounds for non-convex stochastic optimization. Mathematical Programming, 199(1), 165\u2013214.","journal-title":"Mathematical Programming"},{"issue":"2","key":"6631_CR3","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1137\/21M1406222","volume":"32","author":"K Balasubramanian","year":"2022","unstructured":"Balasubramanian, K., Ghadimi, S., & Nguyen, A. (2022). Stochastic multi-level composition optimization algorithms with level-independent convergence rates. SIAM Journal on Optimization, 32(2), 519\u2013544.","journal-title":"SIAM Journal on Optimization"},{"key":"6631_CR4","doi-asserted-by":"crossref","unstructured":"Bennett, J., Lanning, S., et al. (2007). The Netflix prize. In: Proceedings of KDD Cup and Workshop, vol 2007, p 35.","DOI":"10.1145\/1345448.1345459"},{"key":"6631_CR5","first-page":"730","volume":"29","author":"K Bhatia","year":"2015","unstructured":"Bhatia, K., Jain, H., Kar, P., Varma, M., & Jain, P. (2015). Sparse local embeddings for extreme multi-label classification. Advances in Neural Information Processing Systems, 29, 730\u2013738.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6631_CR6","doi-asserted-by":"crossref","unstructured":"Burges, C., Shaked, T., Renshaw, E., Lazier, A., Deeds, M., Hamilton, N., & Hullender, G. (2005a). Learning to rank using gradient descent. In: Proceedings of the 22nd international conference on machine learning, pp 89\u201396.","DOI":"10.1145\/1102351.1102363"},{"key":"6631_CR7","doi-asserted-by":"crossref","unstructured":"Burges, C., Shaked, T., Renshaw, E., Lazier, A., Deeds, M., Hamilton, N., & Hullender, G. (2005b). Learning to rank using gradient descent. In: Proceedings of the 22nd international conference on machine learning, pp 89\u201396.","DOI":"10.1145\/1102351.1102363"},{"issue":"23\u2013581","key":"6631_CR8","first-page":"81","volume":"11","author":"CJ Burges","year":"2010","unstructured":"Burges, C. J. (2010). From ranknet to lambdarank to lambdamart: An overview. Learning, 11(23\u2013581), 81.","journal-title":"Learning"},{"key":"6631_CR9","doi-asserted-by":"crossref","unstructured":"Cao, Z., Qin, T., Liu, T., Tsai, M., & Li, H. (2007). Learning to rank: from pairwise approach to listwise approach. In: Proceedings of the 24th international conference on machine learning, pp 129\u2013136.","DOI":"10.1145\/1273496.1273513"},{"key":"6631_CR10","doi-asserted-by":"crossref","unstructured":"Chakrabarti, S., Khanna, R., Sawant, U., & Bhattacharyya, C. (2008). Structured learning for non-smooth ranking losses. In: Proceeding of the 14th ACM SIGKDD conference on knowledge discovery and data mining, pp 88\u201396.","DOI":"10.1145\/1401890.1401906"},{"key":"6631_CR11","unstructured":"Chapelle, O., & Chang, Y. (2011). Yahoo! learning to rank challenge overview. In: Proceedings of the learning to rank challenge, PMLR, pp 1\u201324."},{"key":"6631_CR12","doi-asserted-by":"publisher","first-page":"4937","DOI":"10.1109\/TSP.2021.3092377","volume":"69","author":"T Chen","year":"2021","unstructured":"Chen, T., Sun, Y., & Yin, W. (2021). Solving stochastic compositional optimization is nearly as easy as solving stochastic optimization. IEEE Transactions on Signal Processing, 69, 4937\u20134948.","journal-title":"IEEE Transactions on Signal Processing"},{"key":"6631_CR13","unstructured":"Chen, T., Sun, Y., & Yin, W. (2022). A single-timescale stochastic bilevel optimization method. In: Proceedings of the 25th international conference on artificial intelligence and statistics, vol 151, pp 2466\u20132488."},{"issue":"1","key":"6631_CR14","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1007\/s10479-007-0176-2","volume":"153","author":"B Colson","year":"2007","unstructured":"Colson, B., Marcotte, P., & Savard, G. (2007). An overview of bilevel optimization. Annals of Operations Research, 153(1), 235\u2013256.","journal-title":"Annals of Operations Research"},{"key":"6631_CR15","doi-asserted-by":"crossref","unstructured":"Cremonesi, P., Koren, Y., & Turrin, R. (2010). Performance of recommender algorithms on top-n recommendation tasks. In: Proceedings of the 4th ACM conference on recommender systems, pp 39\u201346.","DOI":"10.1145\/1864708.1864721"},{"key":"6631_CR16","unstructured":"Cutkosky, A., & Orabona, F. (2019). Momentum-based variance reduction in non-convex sgd. In: Advances in neural information processing systems, vol 32."},{"key":"6631_CR17","first-page":"26698","volume":"35","author":"M Dagr\u00e9ou","year":"2022","unstructured":"Dagr\u00e9ou, M., Ablin, P., Vaiter, S., & Moreau, T. (2022). A framework for bilevel optimization that enables stochastic and global variance reduction algorithms. Advances in Neural Information Processing Systems, 35, 26698\u201326710.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6631_CR18","unstructured":"Fang, C., Li, C. J., Lin, Z., & Zhang, T. (2018). Spider: Near-optimal non-convex optimization via stochastic path-integrated differential estimator. In: Advances in neural information processing systems, vol 31."},{"key":"6631_CR19","doi-asserted-by":"crossref","unstructured":"Gao, H., Wang, Z., & Ji, S. (2018). Large-scale learnable graph convolutional networks. In: Proceedings of the 24th ACM SIGKDD conference on knowledge discovery and data mining, pp 1416\u20131424.","DOI":"10.1145\/3219819.3219947"},{"issue":"4","key":"6631_CR20","doi-asserted-by":"publisher","first-page":"2341","DOI":"10.1137\/120880811","volume":"23","author":"S Ghadimi","year":"2013","unstructured":"Ghadimi, S., & Lan, G. (2013). Stochastic first-and zeroth-order methods for nonconvex stochastic programming. SIAM Journal on Optimization, 23(4), 2341\u20132368.","journal-title":"SIAM Journal on Optimization"},{"key":"6631_CR21","unstructured":"Ghadimi, S., & Wang, M. (2018). Approximation methods for bilevel programming. arXiv preprint arXiv:1802.02246."},{"key":"6631_CR22","unstructured":"Grover, A., Wang, E., Zweig, A., & Ermon, S. (2019). Stochastic optimization of sorting networks via continuous relaxations. In: the 7th international conference on learning representations."},{"key":"6631_CR23","unstructured":"Guo, Z., Hu, Q., Zhang, L., & Yang, T. (2021a). Randomized stochastic variance-reduced methods for multi-task stochastic bilevel optimization. arXiv preprint arXiv:2105.02266."},{"key":"6631_CR24","unstructured":"Guo, Z., Xu, Y., Yin, W., Jin, R., & Yang, T. (2021b). On stochastic moving-average estimators for non-convex optimization. arXiv preprint arXiv:2104.14840."},{"issue":"4","key":"6631_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2827872","volume":"5","author":"FM Harper","year":"2015","unstructured":"Harper, F. M., & Konstan, J. A. (2015). The movielens datasets: History and context. ACM Transactions on Interactive Intelligent Systems, 5(4), 1\u201319.","journal-title":"ACM Transactions on Interactive Intelligent Systems"},{"key":"6631_CR26","doi-asserted-by":"crossref","unstructured":"He, X., Liao, L., Zhang, H., Nie, L., Hu, X., & Chua, T. S. (2017). Neural collaborative filtering. In: Proceedings of the 26th international conference on world wide web, pp 173\u2013182.","DOI":"10.1145\/3038912.3052569"},{"issue":"12","key":"6631_CR27","doi-asserted-by":"publisher","first-page":"2354","DOI":"10.1109\/TKDE.2018.2831682","volume":"30","author":"X He","year":"2018","unstructured":"He, X., He, Z., Song, J., Liu, Z., Jiang, Y. G., & Chua, T. S. (2018). Nais: Neural attentive item similarity model for recommendation. IEEE Transactions on Knowledge and Data Engineering, 30(12), 2354\u20132366.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"6631_CR28","doi-asserted-by":"crossref","unstructured":"He, X., Deng, K., Wang, X., Li, Y., Zhang, Y., & Wang, M. (2020). Lightgcn: Simplifying and powering graph convolution network for recommendation. In: Proceedings of the 43rd international ACM SIGIR conference on research and development in information retrieval, pp 639\u2013648.","DOI":"10.1145\/3397271.3401063"},{"issue":"1","key":"6631_CR29","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1137\/20M1387341","volume":"33","author":"M Hong","year":"2023","unstructured":"Hong, M., Wai, H. T., Wang, Z., & Yang, Z. (2023). A two-timescale framework for bilevel optimization: Complexity analysis and application to actor-critic. SIAM Journal on Optimization, 33(1), 147\u2013180.","journal-title":"SIAM Journal on Optimization"},{"key":"6631_CR30","unstructured":"Hu, W., Fey, M., Zitnik, M., Dong, Y., Ren, H., Liu, B., Catasta, M., & Leskovec, J. (2020a). Open graph benchmark: Datasets for machine learning on graphs. arXiv preprint arXiv:2005.00687."},{"key":"6631_CR31","unstructured":"Hu, Y., Zhang, S., Chen, X., & He, N. (2020b). Biased stochastic first-order methods for conditional stochastic optimization and applications in meta learning. Advances in Neural Information Processing Systems, 33, 2759\u20132770."},{"issue":"4","key":"6631_CR32","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1145\/582415.582418","volume":"20","author":"K J\u00e4rvelin","year":"2002","unstructured":"J\u00e4rvelin, K., & Kek\u00e4l\u00e4inen, J. (2002). Cumulated gain-based evaluation of ir techniques. ACM Transactions on Information Systems, 20(4), 422\u2013446.","journal-title":"ACM Transactions on Information Systems"},{"key":"6631_CR33","first-page":"32499","volume":"35","author":"W Jiang","year":"2022","unstructured":"Jiang, W., Li, G., Wang, Y., Zhang, L., & Yang, T. (2022). Multi-block-single-probe variance reduced estimator for coupled compositional optimization. Advances in Neural Information Processing Systems, 35, 32499\u201332511.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6631_CR34","unstructured":"Kishida, K. (2005). Property of average precision and its generalization: An examination of evaluation indicator for information retrieval experiments. National Institute of Informatics Tokyo, Japan."},{"issue":"2","key":"6631_CR35","doi-asserted-by":"publisher","first-page":"938","DOI":"10.1137\/120882706","volume":"6","author":"K Kunisch","year":"2013","unstructured":"Kunisch, K., & Pock, T. (2013). A bilevel optimization approach for parameter learning in variational models. SIAM Journal on Imaging Sciences, 6(2), 938\u2013983.","journal-title":"SIAM Journal on Imaging Sciences"},{"key":"6631_CR36","doi-asserted-by":"publisher","first-page":"7426","DOI":"10.1609\/aaai.v36i7.20706","volume":"36","author":"J Li","year":"2022","unstructured":"Li, J., Gu, B., & Huang, H. (2022). A fully single loop algorithm for bilevel optimization without hessian inverse. Proceedings of the AAAI Conference on Artificial Intelligence, 36, 7426\u20137434.","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"6631_CR37","unstructured":"Lin, T., Jin, C., & Jordan, M. I. (2019). On gradient descent ascent for nonconvex-concave minimax problems. arXiv preprint arXiv:1906.00331."},{"key":"6631_CR38","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., He, K., & Doll\u00e1r, P. (2017). Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"6631_CR39","doi-asserted-by":"crossref","unstructured":"Liu, N. N., & Yang, Q. (2008). Eigenrank: a ranking-oriented approach to collaborative filtering. In: Proceedings of the 31st international ACM SIGIR conference on research and development in information retrieval, pp 83\u201390.","DOI":"10.1145\/1390334.1390351"},{"key":"6631_CR40","unstructured":"Liu, R., Mu, P., Yuan, X., Zeng, S., & Zhang, J. (2020). A generic first-order algorithmic framework for bi-level programming beyond lower-level singleton. In: Proceedings of the 37th international conference on machine learning, pp 6305\u20136315."},{"key":"6631_CR41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-14267-3","volume-title":"Learning to rank for information retrieval","author":"TY Liu","year":"2011","unstructured":"Liu, T. Y. (2011). Learning to rank for information retrieval. Springer."},{"issue":"1","key":"6631_CR42","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s10107-004-0552-5","volume":"103","author":"Y Nesterov","year":"2005","unstructured":"Nesterov, Y. (2005). Smooth minimization of non-smooth functions. Math Program, 103(1), 127\u2013152.","journal-title":"Math Program"},{"key":"6631_CR43","doi-asserted-by":"crossref","unstructured":"Pasumarthi, R. K., Bruch, S., Wang, X., Li, C., Bendersky, M., Najork, M., Pfeifer, J., Golbandi, N., Anil, R., & Wolf, S. (2019). Tf-ranking: Scalable tensorflow library for learning-to-rank. In: Proceedings of the 25th ACM SIGKDD conference on knowledge discovery and data mining, pp 2970\u20132978.","DOI":"10.1145\/3292500.3330677"},{"key":"6631_CR44","unstructured":"Pobrotyn, P., & Bialobrzeski, R. (2021). Neuralndcg: Direct optimisation of a ranking metric via differentiable relaxation of sorting. arXiv preprint arXiv:2102.07831."},{"key":"6631_CR45","unstructured":"Pobrotyn, P., Bartczak, T., Synowiec, M., Bia\u0142obrzeski, R., & Bojar, J. (2020). Context-aware learning to rank with self-attention. arXiv preprint arXiv:2005.10084."},{"key":"6631_CR46","first-page":"1752","volume":"34","author":"Q Qi","year":"2021","unstructured":"Qi, Q., Luo, Y., Xu, Z., Ji, S., & Yang, T. (2021). Stochastic optimization of area under precision-recall curve for deep learning with provable convergence. Advances in Neural Information Processing Systems, 34, 1752\u20131765.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6631_CR47","unstructured":"Qin, T., & Liu, T. Y. (2013). Introducing letor 4.0 datasets. arXiv preprint arXiv:1306.2597."},{"issue":"2","key":"6631_CR48","doi-asserted-by":"publisher","first-page":"838","DOI":"10.1016\/j.ipm.2007.07.016","volume":"44","author":"T Qin","year":"2008","unstructured":"Qin, T., Zhang, X. D., Tsai, M. F., Wang, D. S., Liu, T. Y., & Li, H. (2008). Query-level loss functions for information retrieval. Information Processing & Management, 44(2), 838\u2013855.","journal-title":"Information Processing & Management"},{"issue":"4","key":"6631_CR49","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s10791-009-9124-x","volume":"13","author":"T Qin","year":"2010","unstructured":"Qin, T., Liu, T. Y., & Li, H. (2010). A general approximation framework for direct optimization of information retrieval measures. Information Retrieval, 13(4), 375\u2013397.","journal-title":"Information Retrieval"},{"key":"6631_CR50","unstructured":"Qiu, Z. H., Hu, Q., Zhong, Y., Zhang, L., & Yang, T. (2022). Large-scale stochastic optimization of ndcg surrogates for deep learning with provable convergence. In: Proceedings of the 39th international conference on machine learning, pp 18122\u201318152."},{"key":"6631_CR51","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks. vol 28."},{"key":"6631_CR52","first-page":"12559","volume":"33","author":"Y Rong","year":"2020","unstructured":"Rong, Y., Bian, Y., Xu, T., Xie, W., Wei, Y., Huang, W., & Huang, J. (2020). Self-supervised graph transformer on large-scale molecular data. Advances in Neural Information Processing Systems, 33, 12559\u201312571.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"1","key":"6631_CR53","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s10115-018-1254-2","volume":"62","author":"M Singh","year":"2020","unstructured":"Singh, M. (2020). Scalability and sparsity issues in recommender datasets: A survey. Knowledge and Information Systems, 62(1), 1\u201343.","journal-title":"Knowledge and Information Systems"},{"key":"6631_CR54","unstructured":"Swezey, R., Grover, A., Charron, B., & Ermon, S. (2021). Pirank: Scalable learning to rank via differentiable sorting. Advances in Neural Information Processing Systems 34."},{"key":"6631_CR55","doi-asserted-by":"crossref","unstructured":"Taylor, M., Guiver, J., Robertson, S., & Minka, T. (2008). Softrank: Optimizing non-smooth rank metrics. In: Proceedings of the 2008 international conference on web search and web data mining, pp 77\u201386.","DOI":"10.1145\/1341531.1341544"},{"key":"6631_CR56","doi-asserted-by":"crossref","unstructured":"Thonet, T., Cinar, Y. G., Gaussier, E., Li, M., & Renders, J. M. (2022). Listwise learning to rank based on approximate rank indicators. In: Proceedings of the 36th AAAI conference on artificial intelligence, pp 8494\u20138502.","DOI":"10.1609\/aaai.v36i8.20826"},{"key":"6631_CR57","first-page":"1883","volume":"22","author":"H Valizadegan","year":"2009","unstructured":"Valizadegan, H., Jin, R., Zhang, R., & Mao, J. (2009). Learning to rank by optimizing NDCG measure. Advances in Neural Information Processing Systems, 22, 1883\u20131891.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6631_CR58","doi-asserted-by":"crossref","unstructured":"Voorhees, E. M. (1999). Natural language processing and information retrieval. In: International summer school on information extraction, Springer, pp 32\u201348.","DOI":"10.1007\/3-540-48089-7_3"},{"key":"6631_CR59","doi-asserted-by":"crossref","unstructured":"Wang, C., Zhang, M., Ma, W., Liu, Y., & Ma, S. (2020). Make it a chorus: knowledge-and time-aware item modeling for sequential recommendation. In: Proceedings of the 43rd international ACM SIGIR conference on research and development in information retrieval, pp 109\u2013118.","DOI":"10.1145\/3397271.3401131"},{"issue":"1\u20132","key":"6631_CR60","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/s10107-016-1017-3","volume":"161","author":"M Wang","year":"2017","unstructured":"Wang, M., Fang, E. X., & Liu, H. (2017). Stochastic compositional gradient descent: algorithms for minimizing compositions of expected-value functions. Mathematical Programming, 161(1\u20132), 419\u2013449.","journal-title":"Mathematical Programming"},{"key":"6631_CR61","doi-asserted-by":"crossref","unstructured":"Wang, X., Li, C., Golbandi, N., Bendersky, M., & Najork, M. (2018). The lambdaloss framework for ranking metric optimization. In: Proceedings of The 27th ACM international conference on information and knowledge management, pp 1313\u20131322.","DOI":"10.1145\/3269206.3271784"},{"key":"6631_CR62","doi-asserted-by":"crossref","unstructured":"Wang, X., He, X., Wang, M., Feng, F., & Chua, T. S. (2019). Neural graph collaborative filtering. In: Proceedings of the 42nd international ACM SIGIR conference on research and development in information retrieval, pp 165\u2013174.","DOI":"10.1145\/3331184.3331267"},{"key":"6631_CR63","doi-asserted-by":"crossref","unstructured":"Wu, M., Chang, Y., Zheng, Z., & Zha, H. (2009). Smoothing dcg for learning to rank: A novel approach using smoothed hinge functions. In: Proceedings of the 18th ACM conference on information and knowledge management, p 1923-1926.","DOI":"10.1145\/1645953.1646266"},{"issue":"2","key":"6631_CR64","doi-asserted-by":"publisher","first-page":"513","DOI":"10.1039\/C7SC02664A","volume":"9","author":"Z Wu","year":"2018","unstructured":"Wu, Z., Ramsundar, B., Feinberg, E. N., Gomes, J., Geniesse, C., Pappu, A. S., Leswing, K., & Pande, V. (2018). Moleculenet: A benchmark for molecular machine learning. Chemical Science, 9(2), 513\u2013530.","journal-title":"Chemical Science"},{"key":"6631_CR65","doi-asserted-by":"crossref","unstructured":"Xia, F., Liu, T. Y., Wang, J., Zhang, W., & Li, H. (2008). Listwise approach to learning to rank: theory and algorithm. In: Proceedings of the 25th international conference on machine learning, pp 1192\u20131199.","DOI":"10.1145\/1390156.1390306"},{"key":"6631_CR66","doi-asserted-by":"crossref","unstructured":"Xu, J., & Li, H. (2007). Adarank: a boosting algorithm for information retrieval. In: Proceedings of the 30th international ACM SIGIR conference on research and development in information retrieval, pp 391\u2013398.","DOI":"10.1145\/1277741.1277809"},{"key":"6631_CR67","unstructured":"Xu, K., Hu, W., Leskovec, J., & Jegelka, S. (2018). How powerful are graph neural networks? arXiv preprint arXiv:1810.00826."},{"issue":"6","key":"6631_CR68","first-page":"1","volume":"19","author":"T Yang","year":"2018","unstructured":"Yang, T., & Lin, Q. (2018). Rsg: Beating subgradient method without smoothness and strong convexity. Journal of Machine Learning Research, 19(6), 1\u201333.","journal-title":"Journal of Machine Learning Research"},{"key":"6631_CR69","unstructured":"Yeh, J. Y., Lin, J. Y., Ke, H. R., Yang, W. P. (2007). Learning to rank for information retrieval using genetic programming. In: Proceedings of SIGIR 2007 workshop on learning to rank for information retrieval."},{"key":"6631_CR70","doi-asserted-by":"crossref","unstructured":"Yuan, T., Cheng, J., Zhang, X., Qiu, S., & Lu, H. (2014). Recommendation by mining multiple user behaviors with group sparsity. In: Twenty-Eighth AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v28i1.8713"},{"key":"6631_CR71","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Yan, Y., Sonka, M., & Yang, T. (2020). Robust deep auc maximization: A new surrogate loss and empirical studies on medical image classification. arXiv preprint arXiv:2012.03173.","DOI":"10.1109\/ICCV48922.2021.00303"},{"key":"6631_CR72","doi-asserted-by":"crossref","unstructured":"Yuan, Z., Zhu, D., Qiu, Z. H., Li, G., Wang, X., & Yang, T. (2023). Libauc: A deep learning library for x-risk optimization. In: Proceedings of the 29th ACM SIGKDD conference on knowledge discovery and data mining, pp 5487\u20135499.","DOI":"10.1145\/3580305.3599861"},{"issue":"1","key":"6631_CR73","first-page":"4130","volume":"21","author":"D Zhou","year":"2020","unstructured":"Zhou, D., Xu, P., & Gu, Q. (2020). Stochastic nested variance reduction for nonconvex optimization. The Journal of Machine Learning Research, 21(1), 4130\u20134192.","journal-title":"The Journal of Machine Learning Research"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06631-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06631-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06631-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T01:03:40Z","timestamp":1769475820000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06631-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,27]]},"references-count":73,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["6631"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06631-x","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,27]]},"assertion":[{"value":"15 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"42"}}