{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T05:15:12Z","timestamp":1773119712729,"version":"3.50.1"},"publisher-location":"ISCA","reference-count":0,"publisher":"ISCA","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"DOI":"10.21437\/interspeech.2023-135","type":"proceedings-article","created":{"date-parts":[[2023,8,14]],"date-time":"2023-08-14T08:22:20Z","timestamp":1692001340000},"page":"431-435","source":"Crossref","is-referenced-by-count":7,"title":["Segmental SpeechCLIP: Utilizing Pretrained Image-text Models for Audio-Visual Learning"],"prefix":"10.21437","author":[{"given":"Saurabhchand","family":"Bhati","sequence":"first","affiliation":[]},{"given":"Jes\u00fas","family":"Villalba","sequence":"additional","affiliation":[]},{"given":"Laureano","family":"Moro-Velazquez","sequence":"additional","affiliation":[]},{"given":"Thomas","family":"Thebaud","sequence":"additional","affiliation":[]},{"given":"Najim","family":"Dehak","sequence":"additional","affiliation":[]}],"member":"8866","published-online":{"date-parts":[[2023,8,20]]},"event":{"name":"INTERSPEECH 2023","acronym":"interspeech_2023"},"container-title":["INTERSPEECH 2023"],"original-title":[],"deposited":{"date-parts":[[2024,1,16]],"date-time":"2024-01-16T10:13:18Z","timestamp":1705399998000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.isca-archive.org\/interspeech_2023\/bhati23_interspeech.html"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,8,20]]},"references-count":0,"URL":"https:\/\/doi.org\/10.21437\/interspeech.2023-135","relation":{},"subject":[],"published":{"date-parts":[[2023,8,20]]}}}