{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T22:26:05Z","timestamp":1769034365547,"version":"3.49.0"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/cbmi66578.2025.11339307","type":"proceedings-article","created":{"date-parts":[[2026,1,20]],"date-time":"2026-01-20T20:38:56Z","timestamp":1768941536000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["TSalV360: A Method and Dataset for Text-driven Saliency Detection in 360-Degrees Videos*"],"prefix":"10.1109","author":[{"given":"Ioannis","family":"Kontostathis","sequence":"first","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]},{"given":"Evlampios","family":"Apostolidis","sequence":"additional","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]},{"given":"Vasileios","family":"Mezaris","sequence":"additional","affiliation":[{"name":"ITI, CERTH,Thessaloniki,Greece"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54190-7_10"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.150"},{"key":"ref3","first-page":"1396","article-title":"Deep 360 Pilot: Learning a Deep Agent for Piloting through 3600 Sports Videos","volume-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Hu"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2987682"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3306346.3323046"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR50242.2020.00040"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.1609\/aaai.v32i1.12335","article-title":"A deep ranking model for spatio-temporal highlight detection from a 360\u00b0 video","volume-title":"Proceedings of the Thirty-Second AAAI Conference on Artificial Intelligence and Thirtieth Innovative Applications of Artificial Intelligence Conference and Eighth AAAI Symposium on Educational Advances in Artificial Intelligence, ser. AAAI\u2019 18\/IAAI\u2019 18\/EAAI\u2019 18","author":"Yu"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00153"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-53302-0_15"},{"key":"ref10","first-page":"1190","article-title":"Your attention is unique: Detecting 360-degree video saliency in head-mounted display for head movement prediction","volume-title":"Proceedings of the 26th ACM International Conference on Multimedia, ser. MM \u201818","author":"Nguyen"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00154"},{"key":"ref12","first-page":"1","article-title":"V-BMS360: A Video Extention to the BMS360 Image Saliency Model","volume-title":"2018 IEEE International Conference on Multimedia & Expo Workshops (ICMEW)","author":"Lebreton"},{"key":"ref13","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1007\/978-3-030-01234-2_30","article-title":"Saliency detection in 360\u00b0 videos","volume-title":"15th European Conference on Computer Vision (ECCV) 2018","volume":"Part VII","author":"Zhang","year":"2018"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2858783"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-68796-0_22"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1016\/j.cag.2022.06.002"},{"key":"ref17","article-title":"Spherical vision transformer for 360\u00b0 video saliency prediction","volume-title":"34th British Machine Vision Conference 2023, BMVC 2023, Aberdeen, UK, November 20\u201324","author":"Cokelek","year":"2023"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2024.3377096"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_25"},{"key":"ref20","doi-asserted-by":"crossref","first-page":"9979","DOI":"10.1109\/TCSVT.2024.3407685","article-title":"360spred: Saliency prediction for 360-degree videos based on 3d separable graph convolutional networks","volume":"34","author":"Yang","year":"2024","journal-title":"IEEE Trans. Cir. and Sys. for Video Technol."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR62088.2024.00141"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3306596"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-78186-5_2"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3461956"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2025.3591725"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2023.3282444"},{"key":"ref27","first-page":"12632","article-title":"Towards Accurate Text-based Image Captioning with Content Diversity Exploration","volume-title":"2021 IEEEICVF Conference on Computer Vision and Pattern Recognition (CVPR)","author":"Xu"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00250"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00109"},{"key":"ref30","first-page":"35549","article-title":"Towards video text visual question answering: Benchmark and baseline","volume-title":"Advances in Neural Information Processing Systems","volume":"35","author":"Zhao","year":"2022"},{"key":"ref31","first-page":"4648","article-title":"Under-standing Video Scenes through Text: Insights from Text-based Video Question Answering","volume-title":"2023 IEEEICVF International Conference on Computer Vision Workshops (ICCVW)","author":"Jahagirdar"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP49359.2023.10222138"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3755821"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00061"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00737"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3243246"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"107615","DOI":"10.1016\/j.patcog.2020.107615","article-title":"Video saliency prediction using enhanced spatiotemporal alignment network","volume":"109","author":"Chen","year":"2021","journal-title":"Pattern Recognition"},{"key":"ref38","first-page":"3520","article-title":"ViNet: Pushing the limits of visual modality for audio-visual saliency prediction","volume-title":"2021 IEEEIRSJ International Conference on Intelligent Robots and Systems (IROS)","author":"Jain"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"113820","DOI":"10.1016\/j.knosys.2025.113820","article-title":"Predvsd: Video saliency prediction based on conditional diffusion model","volume":"324","author":"Li","year":"2025","journal-title":"Knowledge-Based Systems"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02575"},{"key":"ref41","volume-title":"Text-audio-visual-conditioned diffusion model for video saliency prediction","author":"Yu","year":"2025"},{"key":"ref42","doi-asserted-by":"crossref","DOI":"10.1145\/3696409.3700196","article-title":"Multimodal energy prompting for video salient object detection","volume-title":"Proceedings of the 6th ACM International Conference on Multimedia in Asia","author":"Jiang"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2020.2966082"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.303"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00559"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01244"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICMEW46912.2020.9105956"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/MFI49285.2020.9235263"},{"key":"ref51","article-title":"Landmark based shortest path detection by using a* algorithm and haversine formula","author":"Nichat","year":"2013","journal-title":"04"},{"key":"ref52","volume-title":"Llava-next: Stronger llms supercharge multimodal capabilities in the wild","author":"Li","year":"2024"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2815601"},{"key":"ref54","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proceedings of the 38th International Conference on Machine Learning","volume":"139","author":"Radford"}],"event":{"name":"2025 International Conference on Content-Based Multimedia Indexing (CBMI)","location":"Dublin, Ireland","start":{"date-parts":[[2025,10,22]]},"end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 International Conference on Content-Based Multimedia Indexing (CBMI)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11339229\/11339242\/11339307.pdf?arnumber=11339307","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T07:11:35Z","timestamp":1768979495000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11339307\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/cbmi66578.2025.11339307","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}