{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:10:59Z","timestamp":1730247059950,"version":"3.28.0"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1109\/icip51287.2024.10647731","type":"proceedings-article","created":{"date-parts":[[2024,9,27]],"date-time":"2024-09-27T18:34:45Z","timestamp":1727462085000},"page":"2201-2207","source":"Crossref","is-referenced-by-count":0,"title":["Caseg: Clip-Based Action Segmentation With Learnable Text Prompt"],"prefix":"10.1109","author":[{"given":"Suyuan","family":"Huang","sequence":"first","affiliation":[{"name":"Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haoxin","family":"Zhang","sequence":"additional","affiliation":[{"name":"Xiaohongshu Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanyu","family":"Xu","sequence":"additional","affiliation":[{"name":"Institute of High Performance Computing, A*Star"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yan","family":"Gao","sequence":"additional","affiliation":[{"name":"Xiaohongshu Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yao","family":"Hu","sequence":"additional","affiliation":[{"name":"Xiaohongshu Inc."}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zengchang","family":"Qin","sequence":"additional","affiliation":[{"name":"Beihang University,Intelligent Computing and Machine Learning Lab, School of ASEE"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021","journal-title":"ICML. PMLR"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00369"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3021756"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00237"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58595-2_3"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01599"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16924"},{"key":"ref8","article-title":"Asformer: Transformer for action segmentation","author":"Yi","year":"2021","journal-title":"BMVC"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_4"},{"article-title":"An image is worth 16 times 16 words: Transformers for image recognition at scale","year":"2021","author":"Dosovitskiy","key":"ref10"},{"article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"Devlin","key":"ref11"},{"issue":"8","key":"ref12","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1250"},{"key":"ref14","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"Jia","year":"2021","journal-title":"ICML. PMLR"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-022-01653-1"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"},{"article-title":"Actionclip: A new paradigm for video action recognition","year":"2021","author":"Wang","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01926"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_3"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.113"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01653"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00947"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"}],"event":{"name":"2024 IEEE International Conference on Image Processing (ICIP)","start":{"date-parts":[[2024,10,27]]},"location":"Abu Dhabi, United Arab Emirates","end":{"date-parts":[[2024,10,30]]}},"container-title":["2024 IEEE International Conference on Image Processing (ICIP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10647221\/10647122\/10647731.pdf?arnumber=10647731","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,28]],"date-time":"2024-09-28T05:15:34Z","timestamp":1727500534000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10647731\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/icip51287.2024.10647731","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]}}}