{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T17:42:45Z","timestamp":1772905365034,"version":"3.50.1"},"reference-count":114,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T00:00:00Z","timestamp":1727740800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1109\/tpami.2024.3387326","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T18:14:27Z","timestamp":1712772867000},"page":"6919-6934","source":"Crossref","is-referenced-by-count":17,"title":["Learning Local and Global Temporal Contexts for Video Semantic Segmentation"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8667-9656","authenticated-orcid":false,"given":"Guolei","family":"Sun","sequence":"first","affiliation":[{"name":"Computer Vision Lab, ETH Zurich, Zurich, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6143-0264","authenticated-orcid":false,"given":"Yun","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute for Infocomm Research (I2R), Agency for Science, Technology and Research (A*STAR), Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4868-6526","authenticated-orcid":false,"given":"Henghui","family":"Ding","sequence":"additional","affiliation":[{"name":"MMLab at NTU, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0977-3600","authenticated-orcid":false,"given":"Min","family":"Wu","sequence":"additional","affiliation":[{"name":"Institute for Infocomm Research (I2R), Agency for Science, Technology and Research (A*STAR), Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3445-5711","authenticated-orcid":false,"given":"Luc","family":"Van Gool","sequence":"additional","affiliation":[{"name":"Computer Vision Lab, ETH Zurich, Zurich, Switzerland"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00313"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2572683"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00324"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58574-7_1"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1140-0"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00132"},{"key":"ref10","first-page":"28","article-title":"iSAID: A large-scale dataset for instance segmentation in aerial images","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. Workshop","author":"Waqas Zamir"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.477"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.595"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.114"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00713"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-88682-2_5"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00412"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00747"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00714"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00710"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1909.11065"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_46"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00417"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00770"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00254"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00897"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1802.02611"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00388"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00069"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00068"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01368"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00686"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58607-2_21"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00884"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49409-8_69"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00907"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_40"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.441"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00628"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICME51207.2021.9428381"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00983"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01172"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"ref51","first-page":"9355","article-title":"Twins: Revisiting the design of spatial attention in vision transformers","volume-title":"Proc. Annu. Conf. Neural Inform. Process. Syst.","author":"Chu"},{"key":"ref52","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume-title":"Proc. Annu. Conf. Neur. Inform. Process. Syst.","author":"Xie"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00269"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00687"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00705"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01235"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00960"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00383"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00433"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00405"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_1"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_4"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00064"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00728"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00199"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00533"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01539-8"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58520-4_26"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i2.20139"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.396"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00926"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00690"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00326"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_19"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01308"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00909"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00366"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.345"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.224"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00906"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475409"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636192"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00254"},{"key":"ref85","article-title":"Focal self-attention for local-global interactions in vision transformers","author":"Yang","year":"2021"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00210"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-024-1393-8"},{"key":"ref89","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01850"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01227"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1007\/s11633-023-1475-z"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00803"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01028"},{"key":"ref95","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-021-3445-y"},{"key":"ref96","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01621"},{"key":"ref97","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00542"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3217852"},{"key":"ref99","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00134"},{"key":"ref100","first-page":"17864","article-title":"Per-pixel classification is not all you need for semantic segmentation","volume-title":"Proc. Annu. Conf. Neur. Inform. Process. Syst.","author":"Cheng"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"ref102","article-title":"Vision transformer adapter for dense predictions","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref103","article-title":"1st place solution for PVUW challenge 2023: Video panoptic segmentation","author":"Zhang","year":"2023"},{"key":"ref104","article-title":"3rd place solution for PVUW challenge 2023: Video panoptic segmentation","author":"Su","year":"2023"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_30"},{"key":"ref106","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00692"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref108","first-page":"8026","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Annu. Conf. Neural Inform. Process. Syst.","author":"Paszke"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00860"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00823"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"},{"key":"ref112","article-title":"MMSegmentation: Openmmlab semantic segmentation toolbox and benchmark","author":"Contributors","year":"2020"},{"key":"ref113","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref114","first-page":"109","article-title":"Efficient inference in fully connected CRFs with Gaussian edge potentials","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Kr\u00e4henb\u00fchl"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10666888\/10496250.pdf?arnumber=10496250","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,7]],"date-time":"2024-09-07T04:44:54Z","timestamp":1725684294000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10496250\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10]]},"references-count":114,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3387326","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10]]}}}