{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:11:19Z","timestamp":1730247079481,"version":"3.28.0"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1109\/icip51287.2024.10647831","type":"proceedings-article","created":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T18:34:45Z","timestamp":1727462085000},"page":"263-269","source":"Crossref","is-referenced-by-count":0,"title":["Masked Momentum Contrastive Learning for Semantic Understanding by Observation"],"prefix":"10.1109","author":[{"given":"Jiantao","family":"Wu","sequence":"first","affiliation":[{"name":"University of Surrey,People-Centred AI,Surrey,United Kingdom,GU2 7XH"}]},{"given":"Shentong","family":"Mo","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University,Pittsburgh,PA,USA,15213"}]},{"given":"Sara","family":"Atito","sequence":"additional","affiliation":[{"name":"University of Surrey,People-Centred AI,Surrey,United Kingdom,GU2 7XH"}]},{"given":"Zhenhua","family":"Feng","sequence":"additional","affiliation":[{"name":"CVSSP, University of Surrey,Surrey,United Kingdom,GU2 7XH"}]},{"given":"Josef","family":"Kittler","sequence":"additional","affiliation":[{"name":"CVSSP, University of Surrey,Surrey,United Kingdom,GU2 7XH"}]},{"given":"Syed Sameed","family":"Husain","sequence":"additional","affiliation":[{"name":"CVSSP, University of Surrey,Surrey,United Kingdom,GU2 7XH"}]},{"given":"Muhammad","family":"Awais","sequence":"additional","affiliation":[{"name":"University of Surrey,People-Centred AI,Surrey,United Kingdom,GU2 7XH"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref2","article-title":"AV-SAM: Segment anything model meets audio-visual localization and segmentation","author":"Mo","year":"2023","journal-title":"arXiv preprint arXiv:2305.01836"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01075"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref5","article-title":"SiT: Self-supervised vIsion transformer","volume":"abs\/2104.03602","author":"Atito","year":"2021","journal-title":"ArXiv preprint"},{"article-title":"ibot: Image bert pre-training with online tokenizer","volume-title":"International Conference on Learning Representations (ICLR)","author":"Zhou","key":"ref6"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/iccv48922.2021.00950"},{"key":"ref8","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event. 2020, vol. 119 of Proceedings of Machine Learning Research","author":"Chen"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref10","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint arXiv:1810.04805"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19821-2_26"},{"key":"ref12","article-title":"MC-SSL0.0: Towards multi-concept self-supervised learning","volume":"abs\/2111.15340","author":"Atito","year":"2021","journal-title":"ArXiv preprint"},{"key":"ref13","article-title":"Masked contrastive representation learning","author":"Yao","year":"2022","journal-title":"arXiv preprint arXiv:2211.06012"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/tpami.2023.3336525"},{"key":"ref15","article-title":"Zero-shot semantic segmentation","volume":"32","author":"Bucher","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref16","article-title":"Uniboost: Unsupervised unimodal pretraining for boosting zero-shot vision-language tasks","volume":"abs\/2306.04715","author":"Sun","year":"2023","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP46576.2022.9897365"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"ref19","article-title":"GMML is all you need","volume":"abs\/2205.14986","author":"Atito","year":"2022","journal-title":"ArXiv preprint"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"article-title":"Decoupled weight decay regularization","volume-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019","author":"Loshchilov","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.5555\/3495724.3497510"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1405.0312"},{"key":"ref25","first-page":"19545","article-title":"Spacetime correspondence as a contrastive random walk","volume":"33","author":"Jabri","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"The 2017 davis challenge on video object segmentation","author":"Pont-Tuset","year":"2017","journal-title":"arXiv preprint arXiv:1704.00675"},{"article-title":"Vicregl: Self-supervised learning of local visual features","year":"2022","author":"Bardes","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-018-1140-0"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-009-0275-4"}],"event":{"name":"2024 IEEE International Conference on Image Processing (ICIP)","start":{"date-parts":[[2024,10,27]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,30]]}},"container-title":["2024 IEEE International Conference on Image Processing (ICIP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10647221\/10647122\/10647831.pdf?arnumber=10647831","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,28]],"date-time":"2024-09-28T05:36:42Z","timestamp":1727501802000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10647831\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/icip51287.2024.10647831","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]}}}