{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,15]],"date-time":"2026-02-15T14:59:18Z","timestamp":1771167558331,"version":"3.50.1"},"reference-count":133,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100000780","name":"European Union\u2019s Horizon 2020 Research and Innovation Programme","doi-asserted-by":"publisher","award":["871295 (SeaClear)"],"award-info":[{"award-number":["871295 (SeaClear)"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Research Council (ERC) under the European Union\u2019s Horizon 2020 Research and Innovation Programme","doi-asserted-by":"publisher","award":["101018826-CLariNet"],"award-info":[{"award-number":["101018826-CLariNet"]}],"id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/access.2022.3162827","type":"journal-article","created":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T20:51:04Z","timestamp":1648500664000},"page":"34562-34576","source":"Crossref","is-referenced-by-count":11,"title":["Deep Learning for Object Detection and Segmentation in Videos: Toward an Integration With Domain Knowledge"],"prefix":"10.1109","volume":"10","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3995-9262","authenticated-orcid":false,"given":"Athina","family":"Ilioudi","sequence":"first","affiliation":[{"name":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4051-4896","authenticated-orcid":false,"given":"Azita","family":"Dabiri","sequence":"additional","affiliation":[{"name":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8024-5352","authenticated-orcid":false,"given":"Ben J.","family":"Wolf","sequence":"additional","affiliation":[{"name":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9867-6196","authenticated-orcid":false,"given":"Bart","family":"De Schutter","sequence":"additional","affiliation":[{"name":"Delft Center for Systems and Control, Delft University of Technology, Delft, The Netherlands"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58586-0_33"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00976"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10202470"},{"key":"ref6","volume-title":"Pattern Recognition and Machine Learning (Information Science and Statistics)","author":"Bishop","year":"2006"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3014297"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF00332918"},{"key":"ref9","first-page":"1","article-title":"Recurrent multi-frame single shot detector for video object detection","volume-title":"Proc. BMVC","author":"Broad"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3530911"},{"key":"ref11","article-title":"Fast video object segmentation with spatio-temporal GANs","author":"Caelles","year":"2019","journal-title":"arXiv:1903.12161"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-15561-1_56"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2000.854761"},{"key":"ref15","article-title":"R-FCN: Object detection via region-based fully convolutional networks","author":"Dai","year":"2016","journal-title":"arXiv:1605.06409"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00712"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2990070"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.316"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.330"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587597"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1186\/s41044-016-0014-0"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913491297"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/s13735-019-00183-w"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.4324\/9781315740218"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00033"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"ref28","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref29","first-page":"2672","article-title":"Generative adversarial nets","volume-title":"Proc. 27th Int. Conf. Neural Inf. Process. Syst.","volume":"2","author":"Goodfellow"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1049\/ip-f-2.1993.0015"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2005.239"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2016.00079"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2582924"},{"key":"ref34","volume-title":"Eye and Brain: The Psychology of Seeing","author":"Gregory","year":"1978"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6718"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00401"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093599"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-59497-3_175"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_20"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.5244\/C.2.23"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2844175"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2389824"},{"key":"ref43","article-title":"Impression network for video object detection","author":"Hetang","year":"2017","journal-title":"arXiv:1712.05896"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1162\/neco.2006.18.7.1527"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1126\/science.1127647"},{"key":"ref46","first-page":"282","volume-title":"Learning and Relearning in Boltzmann Machines","author":"Hinton","year":"1986"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1016\/0004-3702(81)90024-2"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2020.2967051"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/3463475"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611975673.63"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/3447814"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/j.jcp.2020.109458"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1115\/1.3662552"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2017.2720168"},{"key":"ref56","article-title":"Physics-guided neural networks (PGNN): An application in lake temperature modeling","author":"Daw","year":"2017","journal-title":"arXiv:1710.11431"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1145\/3505244"},{"key":"ref58","article-title":"Structured attention networks","author":"Kim","year":"2017","journal-title":"arXiv:1702.00887"},{"key":"ref59","article-title":"Auto-encoding variational Bayes","volume-title":"Proc. 2nd Int. Conf. Learn. Represent. (ICLR)","author":"Kingma"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544037.ch14"},{"key":"ref61","article-title":"Structured object-aware physics prediction for video modeling and planning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Kossen"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2020.04.283"},{"key":"ref63","first-page":"2879","article-title":"Generalization without systematicity: On the compositional skills of sequence-to-sequence recurrent networks","volume-title":"Proc. ICML","author":"Lake"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2006.68"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref66","article-title":"Object detection with convolutional neural network","author":"Li","year":"2019","journal-title":"arXiv:1912.01844"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01316"},{"key":"ref68","article-title":"A critical review of recurrent neural networks for sequence learning","author":"Lipton","year":"2015","journal-title":"arXiv:1506.00019"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.05.027"},{"key":"ref70","article-title":"Looking fast and slow: Memory-guided mobile video object detection","author":"Liu","year":"2019","journal-title":"arXiv:1903.10172"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-018-9641-3"},{"key":"ref73","volume-title":"System Identification: Theory for the User","author":"Ljung","year":"1999"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.1999.790410"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.257"},{"key":"ref76","article-title":"Deep learning: A critical appraisal","author":"Marcus","year":"2018","journal-title":"arXiv:1801.00631"},{"key":"ref77","article-title":"The next decade in AI: Four steps towards robust artificial intelligence","author":"Marcus","year":"2020","journal-title":"arXiv:2002.06177"},{"key":"ref78","article-title":"Deep learning for visual tracking: A comprehensive survey","author":"Mojtaba Marvasti-Zadeh","year":"2019","journal-title":"arXiv:1912.00535"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_19"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1591\/1\/012028"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1115\/1.4044507"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00770"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-17795-9_10"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ISACV.2018.8354080"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2009.191"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.1785\/0120170293"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.3390\/s22051780"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1155\/2014\/879070"},{"key":"ref89","volume-title":"The Book Why: The New Sci. Cause Effect","author":"Pearl","year":"2018"},{"key":"ref90","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2987324"},{"key":"ref91","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref92","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"ref93","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref94","doi-asserted-by":"publisher","DOI":"10.1109\/JSEN.2019.2898634"},{"key":"ref95","first-page":"448","article-title":"Deep Boltzmann machines","volume-title":"Proc. 12th Int. Conf. Artif. Intell. Statist.","volume":"5","author":"Salakhutdinov"},{"key":"ref96","first-page":"693","article-title":"Efficient learning of deep Boltzmann machines","volume-title":"Proc. AISTATS","author":"Salakhutdinov"},{"key":"ref97","article-title":"Towards out-of-distribution generalization: A survey","author":"Shen","year":"2021","journal-title":"arXiv:2108.13624"},{"key":"ref98","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00985"},{"key":"ref99","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv:1409.1556"},{"key":"ref100","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-13-8406-6_63"},{"key":"ref101","doi-asserted-by":"publisher","DOI":"10.1109\/ICMLA.2019.00015"},{"key":"ref102","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00937"},{"key":"ref103","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10934"},{"key":"ref104","first-page":"3","volume-title":"An Introduction to Neural Networks and Deep Learning","author":"Suk","year":"2017"},{"key":"ref105","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-15-4288-6_1"},{"key":"ref106","article-title":"Unsupervised RGBD video object segmentation using GANs","author":"Sultana","year":"2018","journal-title":"arXiv:1811.01526"},{"key":"ref107","doi-asserted-by":"publisher","DOI":"10.1016\/j.cma.2019.112732"},{"key":"ref108","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2018.03.030"},{"key":"ref109","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.158"},{"key":"ref110","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.44"},{"key":"ref111","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-013-0620-5"},{"key":"ref112","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2021.107102"},{"key":"ref113","first-page":"6000","article-title":"Attention is all you need","volume-title":"Proc. 31st Int. Conf. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref114","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2015.00029"},{"key":"ref115","first-page":"918","article-title":"A combinatorial perspective on transfer learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wang"},{"key":"ref116","article-title":"End-to-end video instance segmentation with transformers","author":"Wang","year":"2020","journal-title":"arXiv:2011.14503"},{"key":"ref117","doi-asserted-by":"publisher","DOI":"10.1145\/3514228"},{"key":"ref118","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00931"},{"key":"ref119","first-page":"1","article-title":"Galileo: Perceiving physical object properties by integrating a physics engine with deep learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"28","author":"Wu"},{"key":"ref120","article-title":"Unseen object instance segmentation for robotic environments","author":"Xie","year":"2020","journal-title":"arXiv:2007.08073"},{"key":"ref121","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00529"},{"key":"ref122","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00794"},{"key":"ref123","doi-asserted-by":"publisher","DOI":"10.1109\/IWECAI50956.2020.00027"},{"key":"ref124","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.235"},{"key":"ref125","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01199"},{"key":"ref126","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3074313"},{"key":"ref127","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2018.2876865"},{"key":"ref128","article-title":"Domain generalization in vision: A survey","author":"Zhou","year":"2021","journal-title":"arXiv:2103.02503"},{"key":"ref129","doi-asserted-by":"publisher","DOI":"10.3390\/app10217834"},{"key":"ref130","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00596"},{"key":"ref131","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.52"},{"key":"ref132","article-title":"Deep feature flow for video recognition","author":"Zhu","year":"2016","journal-title":"arXiv:1611.07715"},{"key":"ref133","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2016.2542358"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6287639\/9668973\/09743897.pdf?arnumber=9743897","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:09:22Z","timestamp":1705536562000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9743897\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":133,"URL":"https:\/\/doi.org\/10.1109\/access.2022.3162827","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}