{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T21:58:06Z","timestamp":1757627886001,"version":"3.44.0"},"reference-count":8,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T00:00:00Z","timestamp":1754870400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T00:00:00Z","timestamp":1754870400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,8,11]]},"DOI":"10.1109\/avss65446.2025.11149705","type":"proceedings-article","created":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T17:30:18Z","timestamp":1757439018000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["Fine-Grained Video Indexing and Retrieval with Vision-Language Models"],"prefix":"10.1109","author":[{"given":"Dong Gun","family":"Park","sequence":"first","affiliation":[{"name":"Handong Global University,Pohang,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Soyeon","family":"Park","sequence":"additional","affiliation":[{"name":"Handong Global University,Pohang,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chang Ha","family":"Lee","sequence":"additional","affiliation":[{"name":"GMDSOFT,Seongnam,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hyunkyoo","family":"Choi","sequence":"additional","affiliation":[{"name":"Korea Institute of Science and Technology Information,Seoul,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Charmgil","family":"Hong","sequence":"additional","affiliation":[{"name":"Handong Global University,Pohang,South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1571941.1572114"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TSMCC.2011.2109710"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/1009"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/1126004.1126005"},{"key":"ref5","author":"Liu","year":"2023","journal-title":"Visual instruction tuning"},{"key":"ref6","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning","author":"Radford"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1561\/1500000019"},{"key":"ref8","author":"Zheng","year":"2024","journal-title":"Semantic search evaluation"}],"event":{"name":"2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)","location":"Tainan, Taiwan","start":{"date-parts":[[2025,8,11]]},"end":{"date-parts":[[2025,8,13]]}},"container-title":["2025 IEEE International Conference on Advanced Visual and Signal-Based Systems (AVSS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11149666\/11149641\/11149705.pdf?arnumber=11149705","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T05:14:57Z","timestamp":1757481297000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11149705\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,11]]},"references-count":8,"URL":"https:\/\/doi.org\/10.1109\/avss65446.2025.11149705","relation":{},"subject":[],"published":{"date-parts":[[2025,8,11]]}}}