{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T22:18:18Z","timestamp":1769725098975,"version":"3.49.0"},"reference-count":73,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,1]],"date-time":"2023-10-01T00:00:00Z","timestamp":1696118400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,1]]},"DOI":"10.1109\/iccv51070.2023.01833","type":"proceedings-article","created":{"date-parts":[[2024,1,15]],"date-time":"2024-01-15T20:55:59Z","timestamp":1705352159000},"page":"19972-19983","source":"Crossref","is-referenced-by-count":7,"title":["Beyond Object Recognition: A New Benchmark towards Object Concept Learning"],"prefix":"10.1109","author":[{"given":"Yong-Lu","family":"Li","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yue","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyu","family":"Xu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaohan","family":"Mao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Yao","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siqi","family":"Liu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cewu","family":"Lu","sequence":"additional","affiliation":[{"name":"Shanghai Jiao Tong University"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"A causal view of compositional zero-shot recognition","author":"Atzmon","year":"2020"},{"key":"ref2","article-title":"From system 1 deep learning to system 2 deep learning","author":"Bengio","year":"2019","journal-title":"Posner lecture at NeurIPS\u20192019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3233\/faia210348"},{"key":"ref4","article-title":"Visual causal feature learning","author":"Chalupka","year":"2014"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00048"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299054"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.33"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00108"},{"key":"ref9","article-title":"Causal reasoning from meta-reinforcement learning","author":"Dasgupta","year":"2019"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref11","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460902"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206772"},{"key":"ref14","doi-asserted-by":"crossref","DOI":"10.1002\/9781405198431.wbeal1285","article-title":"Wordnet","author":"Fellbaum","year":"2012","journal-title":"The encyclopedia of applied linguistics"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.4324\/9781315657752-10"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.2307\/1574154"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00633"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383331"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00550"},{"key":"ref20","article-title":"Visual semantic role labeling","author":"Gupta","year":"2015"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1201\/9781584888796.ch4"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/0167-2789(90)90087-6"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref24","article-title":"Affordance prediction via learned object attributes","volume-title":"ICRA Workshop","author":"Hermans"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00686"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995543"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298744"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_15"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-016-0981-7"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_30"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206594"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.351"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01133"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3119406"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref37","article-title":"Syntactic annotations for the google books ngram corpus","author":"Lin","year":"2012","journal-title":"ACL"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.663"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126373"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.psych.57.102904.190143"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.129"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_11"},{"key":"ref44","article-title":"Causal induction from visual observations for goal directed tasks","author":"Nair","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8206484"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1207\/s15516709cog1502_3"},{"key":"ref47","article-title":"Training language models to follow instructions with human feedback","author":"Ouyang","year":"2022"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126281"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46466-4_6"},{"key":"ref50","volume-title":"Causal inference in statistics: A primer.","author":"Pearl","year":"2016"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.262"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_1"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487517"},{"key":"ref54","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"International Conference on machine learning","author":"Radford"},{"key":"ref55","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"Ren","year":"2015","journal-title":"NIPS"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1007\/s10994-006-5833-1"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"ref58","first-page":"229","article-title":"Category learning: Learning to access and use relevant knowledge","author":"Ross","year":"2008","journal-title":"Memory and mind: A Festschrift for Gordon H. Bower"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_12"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1198\/016214504000001880"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2021.3058954"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/1754.001.0001"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.13"},{"key":"ref65","first-page":"5998","article-title":"Attention is all you need","author":"Vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01077"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.359"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20044-1_3"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.312"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.401"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611972825.35"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10605-2_27"}],"event":{"name":"2023 IEEE\/CVF International Conference on Computer Vision (ICCV)","location":"Paris, France","start":{"date-parts":[[2023,10,1]]},"end":{"date-parts":[[2023,10,6]]}},"container-title":["2023 IEEE\/CVF International Conference on Computer Vision (ICCV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10376473\/10376477\/10377301.pdf?arnumber=10377301","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T01:11:20Z","timestamp":1705540280000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10377301\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,1]]},"references-count":73,"URL":"https:\/\/doi.org\/10.1109\/iccv51070.2023.01833","relation":{},"subject":[],"published":{"date-parts":[[2023,10,1]]}}}