{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T15:50:28Z","timestamp":1780501828876,"version":"3.54.1"},"reference-count":103,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"11","license":[{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,11,1]],"date-time":"2023-11-01T00:00:00Z","timestamp":1698796800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"ARC","award":["FL170100117"],"award-info":[{"award-number":["FL170100117"]}]},{"name":"ERC Starting","award":["LEGO-3D (850533)"],"award-info":[{"award-number":["LEGO-3D (850533)"]}]},{"name":"DFG EXC","award":["390727645"],"award-info":[{"award-number":["390727645"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,11,1]]},"DOI":"10.1109\/tpami.2023.3298645","type":"journal-article","created":{"date-parts":[[2023,7,25]],"date-time":"2023-07-25T17:29:42Z","timestamp":1690306182000},"page":"13941-13958","source":"Crossref","is-referenced-by-count":222,"title":["Unifying Flow, Stereo and Depth Estimation"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1313-3358","authenticated-orcid":false,"given":"Haofei","family":"Xu","sequence":"first","affiliation":[{"name":"ETH Zurich, Z&#x00FC;rich, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6595-7661","authenticated-orcid":false,"given":"Jing","family":"Zhang","sequence":"additional","affiliation":[{"name":"Sydney AI Center, The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9444-3763","authenticated-orcid":false,"given":"Jianfei","family":"Cai","sequence":"additional","affiliation":[{"name":"Monash University, Clayton, VIC, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8659-8773","authenticated-orcid":false,"given":"Hamid","family":"Rezatofighi","sequence":"additional","affiliation":[{"name":"Monash University, Clayton, VIC, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8829-7344","authenticated-orcid":false,"given":"Fisher","family":"Yu","sequence":"additional","affiliation":[{"name":"ETH Zurich, Z&#x00FC;rich, Switzerland"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7225-5449","authenticated-orcid":false,"given":"Dacheng","family":"Tao","sequence":"additional","affiliation":[{"name":"Sydney AI Center, The University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8151-3726","authenticated-orcid":false,"given":"Andreas","family":"Geiger","sequence":"additional","affiliation":[{"name":"University of T&#x00FC;bingen, T&#x00FC;bingen, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref57","first-page":"65:1","article-title":"Stereo matching by training a convolutional neural network to compare image patches","volume":"17","author":"zbontar","year":"2016","journal-title":"J Mach Learn Res"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2006.70"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.17"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.438"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20083"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00872"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2012.156"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298720"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01708"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00996"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01033"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00870"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00070"},{"key":"ref42","doi-asserted-by":"crossref","first-page":"1744","DOI":"10.1109\/TPAMI.2011.236","article-title":"Motion detail preserving optical flow estimation","volume":"34","author":"xu","year":"2012","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-24673-2_3"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00590"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.291"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01063"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2007.1166"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(81)90024-2"},{"key":"ref9","first-page":"298","article-title":"Bundle adjustment&#x2014;A modern synthesis","author":"triggs","year":"1999","journal-title":"Proc Int Workshop Vis Algorithms"},{"key":"ref4","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2001269.2001293"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2007.4538852"},{"key":"ref100","first-page":"22 158","article-title":"Hierarchical neural architecture search for deep stereo matching","author":"cheng","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00566"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.143"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.458"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.261"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.596"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6385773"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00963"},{"key":"ref30","first-page":"169","article-title":"LiteFlowNet3: Resolving correspondence ambiguity for more accurate optical flow estimation","author":"hui","year":"2020","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00895"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.272"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000045324.43199.43"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00795"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00567"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1023\/A:1014573219977"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/3DV53792.2021.00032"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00027"},{"key":"ref20","article-title":"BA-NET: Dense bundle adjustment networks","author":"tang","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01264"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58536-5_24"},{"key":"ref28","article-title":"Perceiver IO: A general architecture for structured inputs & outputs","author":"jaegle","year":"2022","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref27","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00608"},{"key":"ref13","first-page":"611","article-title":"A naturalistic open source movie for optical flow evaluation","author":"butler","year":"2012","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01237-3_47"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-11752-2_3"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298925"},{"key":"ref97","first-page":"15220","article-title":"Displacement-invariant matching cost learning for accurate optical flow estimation","author":"wang","year":"2020","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref96","first-page":"794","article-title":"Volumetric correspondence networks for optical flow","author":"yang","year":"2019","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560873"},{"key":"ref10","article-title":"ImageNet classification with deep convolutional neural networks","author":"krizhevsky","year":"2012","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01707"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00931"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.179"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00203"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00620"},{"key":"ref93","article-title":"ViTAEv2: Vision transformer advanced by exploring inductive bias for image recognition and beyond","author":"zhang","year":"2022"},{"key":"ref92","article-title":"ViTAE: Vision transformer advanced by exploring intrinsic inductive bias","author":"xu","year":"2021","journal-title":"Proc Int Conf Neural Inf Process"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2894353"},{"key":"ref94","article-title":"Virtual VKITTI 2","author":"cabon","year":"2020"},{"key":"ref91","article-title":"ConViT: Improving vision transformers with soft convolutional inductive biases","author":"d\u2019ascoli","year":"2021","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref90","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12276"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref85","first-page":"213","article-title":"End-to-end object detection with transformers","author":"carion","year":"2020","journal-title":"Proc Eur Conf Comput Vis"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00615"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.445"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_44"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.1996.517097"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00839"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00257"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00122"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19827-4_1"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00026"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_15"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00298"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_42"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539939"},{"key":"ref1","author":"hartley","year":"2003","journal-title":"Multiple View Geomatry in Computer Vision"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3019967"},{"key":"ref73","article-title":"Deepv2d: Video to depth with differentiable structure from motion","author":"teed","year":"2020","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref72","article-title":"DPSNet: End-to-end deep plane sweep stereo","author":"im","year":"2019","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/788"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.700"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_16"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00614"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_45"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01369"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01578"},{"key":"ref61","first-page":"5314","article-title":"On the synergies between machine learning and binocular stereo for depth estimation from images: A survey","volume":"44","author":"poggi","year":"2022","journal-title":"IEEE Trans Pattern Anal Mach Intell"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10269680\/10193833.pdf?arnumber=10193833","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,4]],"date-time":"2023-10-04T17:40:43Z","timestamp":1696441243000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10193833\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,1]]},"references-count":103,"journal-issue":{"issue":"11"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2023.3298645","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,1]]}}}