{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,6]],"date-time":"2026-06-06T13:14:15Z","timestamp":1780751655452,"version":"3.54.1"},"reference-count":92,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tpami.2023.3314661","type":"journal-article","created":{"date-parts":[[2023,9,12]],"date-time":"2023-09-12T17:32:33Z","timestamp":1694539953000},"page":"15426-15444","source":"Crossref","is-referenced-by-count":6,"title":["Regularized Optimal Transport Layers for Generalized Global Pooling Operations"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4192-5360","authenticated-orcid":false,"given":"Hongteng","family":"Xu","sequence":"first","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Beijing Key Laboratory of Big Data Management and Analysis Methods, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1344-5749","authenticated-orcid":false,"given":"Minjie","family":"Cheng","sequence":"additional","affiliation":[{"name":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"662","article-title":"Deep multi-instance learning with dynamic pooling","volume-title":"Proc. Asian Conf. Mach. Learn.","author":"Yan"},{"key":"ref2","first-page":"2127","article-title":"Attention-based deep multiple instance learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Ilse"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.5555\/3327345.3327389"},{"key":"ref4","article-title":"How powerful are graph neural networks?","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Xu"},{"key":"ref5","first-page":"1097","article-title":"ImageNet classification with deep convolutional neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Krizhevsky"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01270-0_35"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00043"},{"key":"ref9","first-page":"111","article-title":"A theoretical analysis of feature pooling in visual recognition","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Boureau"},{"key":"ref10","first-page":"464","article-title":"Generalizing pooling functions in convolutional neural networks: Mixed, gated, and tree","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Lee"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2637921"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107456"},{"key":"ref13","article-title":"Network in network","author":"Lin","year":"2013"},{"key":"ref14","article-title":"Order matters: Sequence to sequence for sets","author":"Vinyals","year":"2015"},{"key":"ref15","first-page":"3394","article-title":"Deep sets","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Zaheer"},{"key":"ref16","first-page":"3744","article-title":"Set transformer: A framework for attention-based permutation-invariant neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref17","first-page":"2664","article-title":"Gromov-Wasserstein averaging of kernel and distance matrices","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Peyr\u00e9"},{"key":"ref18","article-title":"Differentiable convex optimization layers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Agrawal"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1137\/20M1358517"},{"key":"ref20","first-page":"9639","article-title":"$(\\text {Implicit})^2$( Implicit )2: Implicit layers for implicit representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Huang"},{"key":"ref21","article-title":"Deep equilibrium models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Bai"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3059462"},{"key":"ref23","first-page":"2292","article-title":"Sinkhorn distances: Lightspeed computation of optimal transport","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Cuturi"},{"key":"ref24","first-page":"7673","article-title":"On unbalanced optimal transport: An analysis of Sinkhorn algorithm","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Pham"},{"key":"ref25","first-page":"8766","article-title":"The unbalanced Gromov Wasserstein distance: Conic formulation and relaxation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"S\u00e9journ\u00e9"},{"key":"ref26","first-page":"2816","article-title":"Bregman alternating direction method of multipliers","volume-title":"Proc. 27th Int. Conf. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6120"},{"key":"ref28","article-title":"Learning to pool in graph neural networks for extrapolation","author":"Ko","year":"2021"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44848-9_34"},{"key":"ref30","article-title":"Learning prototype-oriented set representations for meta-learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tian"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5997"},{"key":"ref33","first-page":"3734","article-title":"Self-attention graph pooling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Lee"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1561\/2200000073"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-71050-9"},{"key":"ref36","first-page":"2053","article-title":"Learning with a Wasserstein loss","volume-title":"Proc. 28th Int. Conf. Neural Inf. Process. Syst.","author":"Frogner"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2615921"},{"key":"ref38","first-page":"3186","article-title":"Unbalanced minibatch optimal transport; applications to domain adaptation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Fatras"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1137\/100805741"},{"key":"ref40","first-page":"685","article-title":"Fast computation of Wasserstein barycenters","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Cuturi"},{"key":"ref41","first-page":"214","article-title":"Wasserstein generative adversarial networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Arjovsky"},{"key":"ref42","article-title":"Wasserstein auto-encoders","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tolstikhin"},{"key":"ref43","first-page":"957","article-title":"From word embeddings to document distances","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kusner"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.2140\/pjm.1967.21.343"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1137\/141000439"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1090\/mcom\/3303"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1137\/16M1106018"},{"key":"ref48","first-page":"433","article-title":"A fast proximal point method for computing exact Wasserstein distance","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Xie"},{"key":"ref49","first-page":"1961","article-title":"Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Altschuler"},{"key":"ref50","article-title":"Partial optimal transport with applications on positive-unlabeled learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chapel"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2017.2659647"},{"key":"ref52","first-page":"880","article-title":"Smooth and sparse optimal transport","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Blondel"},{"key":"ref53","first-page":"6275","article-title":"Optimal transport for structured data with application on graphs","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Titouan"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.3390\/a13090212"},{"key":"ref55","article-title":"Learning latent permutations with Gumbel-Sinkhorn networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mena"},{"key":"ref56","first-page":"9438","article-title":"Sparse sinkhorn attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Tay"},{"key":"ref57","first-page":"733","article-title":"Sinkhorn autoencoders","volume-title":"Proc. Conf. Uncertainty Artif. Intell.","author":"Patrini"},{"key":"ref58","article-title":"Sinkformers: Transformers with doubly stochastic attention","author":"Sander","year":"2021"},{"key":"ref59","article-title":"A trainable optimal transport embedding for feature aggregation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Mialon"},{"key":"ref60","article-title":"Differentiable expectation-maximization for set representation learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kim"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00361"},{"key":"ref62","first-page":"3389","article-title":"Pooling by sliced-Wasserstein embedding","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Naderializadeh"},{"key":"ref63","article-title":"Wasserstein embedding for graph learning","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kolouri"},{"key":"ref64","article-title":"Differentiable implicit layers","author":"Look","year":"2020"},{"key":"ref65","first-page":"146","article-title":"Input convex neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amos"},{"key":"ref66","first-page":"136","article-title":"OptNet: Differentiable optimization as a layer in neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Amos"},{"key":"ref67","first-page":"10","article-title":"Deep ADMM-net for compressive sensing MRI","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sun"},{"key":"ref68","first-page":"6828","article-title":"Learning a compressed sensing measurement matrix via gradient unrolling","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wu"},{"key":"ref69","first-page":"1","article-title":"Automatic differentiation in machine learning: A survey","volume":"18","author":"Baydin","year":"2018","journal-title":"J. Marchine Learn. Res."},{"key":"ref70","first-page":"6672","article-title":"Optimal transport mapping via input convex neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Makkuva"},{"key":"ref71","article-title":"A hypergradient approach to robust regression without correspondence","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Xie"},{"key":"ref72","article-title":"Exploiting problem structure in deep declarative networks: Two case studies","author":"Gould","year":"2022"},{"key":"ref73","article-title":"DeeperGCN: All you need to train deeper GCNs","author":"Li","year":"2020"},{"key":"ref74","article-title":"StructPool: Structured graph pooling via conditional random fields","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Yuan"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2999032"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/s10208-011-9093-5"},{"key":"ref77","first-page":"10576","article-title":"Learning autoencoders with relational regularization","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"key":"ref78","first-page":"3046","article-title":"Scalable Gromov-Wasserstein learning for graph partitioning and matching","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref79","first-page":"6932","article-title":"Gromov-Wasserstein learning for graph matching and node embedding","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xu"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1137\/120891009"},{"key":"ref81","article-title":"A convergent single-loop algorithm for relaxation of Gromov-Wasserstein in graph data","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Li"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.5566\/ias.1155"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1186\/1471-2105-6-S1-S16"},{"key":"ref84","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"ref85","article-title":"TUDataset: A collection of benchmark datasets for learning with graphs","volume-title":"Proc. Workshop Graph Representation Learn. Beyond","author":"Morris"},{"key":"ref86","article-title":"Adversarial graph augmentation to improve graph contrastive learning","author":"Suresh","year":"2021"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/BIBM.2016.7822555"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1016\/j.mbs.2018.09.010"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1016\/j.jbi.2018.06.015"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1145\/3307339.3342136"},{"key":"ref91","article-title":"BioSNAP datasets: Stanford biomedical network dataset collection","author":"Zitnik","year":"2018"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10308548\/10247589.pdf?arnumber=10247589","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T01:10:26Z","timestamp":1709341826000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10247589\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":92,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3314661","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}