{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T06:05:35Z","timestamp":1780380335119,"version":"3.54.1"},"publisher-location":"Singapore","reference-count":22,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819698486","type":"print"},{"value":"9789819698493","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-9849-3_20","type":"book-chapter","created":{"date-parts":[[2025,7,18]],"date-time":"2025-07-18T08:59:33Z","timestamp":1752829173000},"page":"235-246","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Gaze-Driven Active Speaker Detection in Meetings"],"prefix":"10.1007","author":[{"given":"Weiwei","family":"Jiang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Long","family":"Rao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gaole","family":"Dai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yifan","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Xu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,19]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Zheng, X., Zhang, C., Woodland, P.: Tandem multitask training of speaker diarisation and speech recognition for meeting transcription. arXiv arXiv:2207.03852 (2022)","DOI":"10.21437\/Interspeech.2022-11368"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Li, S., Fujii, N.: Estimating gaze points from facial landmarks by a remote spherical camera. In: International Conference on Pattern Recognition, pp. 7633\u20137639 IEEE (2021)","DOI":"10.1109\/ICPR48806.2021.9412211"},{"key":"20_CR3","doi-asserted-by":"publisher","unstructured":"Afouras, T., Owens, A., Chung, J.S., Zisserman, A.: Self-supervised learning of audio-visual objects from video. In: Computer Vision\u2013ECCV 2020. 16th European Conference, LNCS, pp. 208\u2013224 Springer, Glasgow (2020). https:\/\/doi.org\/10.1007\/978-3-030-58523-5_13","DOI":"10.1007\/978-3-030-58523-5_13"},{"key":"20_CR4","doi-asserted-by":"crossref","unstructured":"Pouthier, B., Pilati, L., Gudupudi, L.K., Bouveyron, C., Precioso, F.: Active speaker detection as a multi-objective optimization with uncertainty-based multi-modal fusion. In International Speech Communication Association, pp. 3831\u20133835 ISCA, Brno (2021)","DOI":"10.21437\/Interspeech.2021-80"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Hou, Y., Zhang, Z., Horanyi, N., Moon, J., Cheng, Y., Chang, H.: Multi-modal gaze following in conversational scenarios. In: 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1175\u20131184 IEEE, Waikoloa (2024)","DOI":"10.1109\/WACV57701.2024.00122"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., et al.: Deformable convolutional networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 764\u2013773 IEEE, Venice (2017)","DOI":"10.1109\/ICCV.2017.89"},{"key":"20_CR7","doi-asserted-by":"crossref","unstructured":"Desplanques, B., Thienpondt, J., Demuynck, K.: ECAPA-TDNN: emphasized channel attention, propagation and aggregation in TDNN based speaker verification. In: International Speech Communication Association, pp. 3830\u20133834 ISCA, Shanghai (2020)","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"20_CR8","doi-asserted-by":"crossref","unstructured":"Huang, X., Jiang, W., Rao, L., Rao, W., Cheng, W.: Active speaker detection in fisheye meeting scenes with scene spatial spectrums. In: International Speech Communication Association, pp. 4283\u20134287 ISCA, Kos Island (2024)","DOI":"10.21437\/Interspeech.2024-1402"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Roth, J., Chaudhuri, S., Klejch, O., et al.: AVA active speaker: an audio-visual dataset for active speaker detection. In: IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 4492\u20134496. IEEE, Piscataway (2020)","DOI":"10.1109\/ICASSP40776.2020.9053900"},{"key":"20_CR10","doi-asserted-by":"crossref","unstructured":"Tao, R., Pan, Z., Das, R.K., et al.: Is someone speaking? Exploring long-term temporal features for audio-visual active speaker detection. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 3927\u20133935 ACM, China (2021)","DOI":"10.1145\/3474085.3475587"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"K\u00f6p\u00fckl\u00fc, O., Taseska, M., Rigoll, G.: How to design a three-stage architecture for audio-visual active speaker detection in the wild. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1193\u20131203 IEEE, Montreal (2021)","DOI":"10.1109\/ICCV48922.2021.00123"},{"key":"20_CR12","doi-asserted-by":"publisher","unstructured":"Min, K., Roy, S., et al.: Learning long-term spatial-temporal graphs for active speaker detection. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) European Conference on Computer Vision, pp. 371\u2013387 Springer, Tel Aviv (2022). https:\/\/doi.org\/10.1007\/978-3-031-19833-5_22","DOI":"10.1007\/978-3-031-19833-5_22"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Liao, J., Duan, H., Feng, K., Zhao, W., Yang, Y., Chen, L.: A light weight model for active speaker detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 22932\u201322941 IEEE, Vancouver (2023)","DOI":"10.1109\/CVPR52729.2023.02196"},{"key":"20_CR14","doi-asserted-by":"crossref","unstructured":"Wang, X., Cheng, F., Bertasius, G.: LoCoNet: long-short context network for active speaker detection. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, pp. 18462\u201318472 IEEE, Seattle (2024)","DOI":"10.1109\/CVPR52733.2024.01747"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Madrigal, F., Lerasle, F., Pibre, L., Ferrane, I.: Audio-video detection of the active speaker in meetings. In: 2020 25th International Conference on Pattern Recognition, pp. 2536\u20132543 IEEE, Piscataway (2020)","DOI":"10.1109\/ICPR48806.2021.9412681"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Tao, R. et al.: Target active speaker detection with audio-visual cues. In: International Speech Communication Association, pp. 3152\u20133156 ISCA, Dublin (2023)","DOI":"10.21437\/Interspeech.2023-574"},{"key":"20_CR17","doi-asserted-by":"crossref","unstructured":"Yu, X., Zhang, L., Li, X.-Y.: E-Talk: accelerating active speaker detection with audio-visual fusion and edge-cloud computing.In: 2023 20th Annual IEEE International Conference on Sensing, Communication, and Networking, pp. 528\u2013536 IEEE, Madrid (2023)","DOI":"10.1109\/SECON58729.2023.10287518"},{"key":"20_CR18","unstructured":"Recasens, A., Khosla, A., Vondrick, C., Torralba, A.: Where are they looking? In: Advances in Neural Information Processing Systems, pp. 199\u2013207 MIT, Cambridge (2015)"},{"key":"20_CR19","doi-asserted-by":"crossref","unstructured":"Chong, E., Wang, Y., Ruiz, N., Rehg, J.: Detecting attended visual targets in video. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5395\u20135405 IEEE, Seattle (2020)","DOI":"10.1109\/CVPR42600.2020.00544"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Li, Y., Shen, W., Gao, Z., and et al.: Looking here or there? Gaze Following in 360\u00b0 Images: In: 2021 IEEE\/CVF International Conference on Computer Vision, pp. 3722\u20133731 IEEE, Montreal (2021)","DOI":"10.1109\/ICCV48922.2021.00372"},{"key":"20_CR21","doi-asserted-by":"publisher","unstructured":"Song, Y., Wang, X., Yao, J., et al.: VitGaze: gaze Following with Interaction Features in Vision Transformers. arXiv https:\/\/doi.org\/10.48550\/arXiv.2403.12778 (2024)","DOI":"10.48550\/arXiv.2403.12778"},{"key":"20_CR22","doi-asserted-by":"publisher","unstructured":"Rao, L., Huang, X., Cai, S., Tian, B., Xu, W., Cheng, W.: A dual-path approach for gaze following in fisheye meeting scenes. In: Liu, Q., et al. (eds.) Pattern Recognition and Computer Vision: 6th Chinese Conference, pp. 199\u2013210 Springer, Xiamen (2024). https:\/\/doi.org\/10.1007\/978-981-99-8469-5_16","DOI":"10.1007\/978-981-99-8469-5_16"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-9849-3_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T05:45:18Z","timestamp":1780379118000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-9849-3_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819698486","9789819698493"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-9849-3_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"19 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ningbo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"21","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/icg\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}