{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T17:01:13Z","timestamp":1774630873661,"version":"3.50.1"},"publisher-location":"Cham","reference-count":32,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783319464534","type":"print"},{"value":"9783319464541","type":"electronic"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46454-1_18","type":"book-chapter","created":{"date-parts":[[2016,9,15]],"date-time":"2016-09-15T09:15:09Z","timestamp":1473930909000},"page":"285-301","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":43,"title":["Cross-Modal Supervision for Learning Active Speaker Detection in Video"],"prefix":"10.1007","author":[{"given":"Punarjay","family":"Chakravarty","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tinne","family":"Tuytelaars","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,16]]},"reference":[{"issue":"3","key":"18_CR1","doi-asserted-by":"publisher","first-page":"747","DOI":"10.1007\/s11042-012-1080-6","volume":"68","author":"E Khoury","year":"2014","unstructured":"Khoury, E., S\u00e9nac, C., Joly, P.: Audiovisual diarization of people in video content. Multimedia Tools Appl. 68(3), 747\u2013775 (2014)","journal-title":"Multimedia Tools Appl."},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Everingham, M., Sivic, J., Zisserman, A.: Hello! my name is... buffy\u201d-automatic naming of characters in tv video. In: BMVC, vol. 2, pp. 6 (2006)","DOI":"10.5244\/C.20.92"},{"issue":"5","key":"18_CR3","doi-asserted-by":"publisher","first-page":"545","DOI":"10.1016\/j.imavis.2008.04.018","volume":"27","author":"M Everingham","year":"2009","unstructured":"Everingham, M., Sivic, J., Zisserman, A.: Taking the bite out of automatic naming of characters in TV video. Image Vis. Comput. 27(5), 545\u2013559 (2009)","journal-title":"Image Vis. Comput."},{"key":"18_CR4","unstructured":"Haider, F., Al Moubayed, S.: Towards speaker detection using lips movements for humanmachine multiparty dialogue. In: 2012 FONETIK (2012)"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Chakravarty, P., Mirzaei, S., Tuytelaars, T., Vanhamme, H.: Who\u2019s speaking? audio-supervised classification of active speakers in video. In: ACM International Conference on Multimodal Interaction (ICMI) (2015)","DOI":"10.1145\/2818346.2820780"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Germain, F., Sun, D.L., Mysore, G.J.: Speaker and noise independent voice activity detection. In: INTERSPEECH, pp. 732\u2013736 (2013)","DOI":"10.21437\/Interspeech.2013-204"},{"issue":"3","key":"18_CR7","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/s11263-013-0646-8","volume":"106","author":"H Bilen","year":"2014","unstructured":"Bilen, H., Namboodiri, V.P., Gool, L.J.: Object and action classification with latent window parameters. Int. J. Comput. Vis. 106(3), 237\u2013251 (2014)","journal-title":"Int. J. Comput. Vis."},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Bilen, H., Pedersoli, M., Tuytelaars, T.: Weakly supervised object detection with posterior regularization. In: British Machine Vision Conference (2014)","DOI":"10.5244\/C.28.52"},{"key":"18_CR9","doi-asserted-by":"crossref","unstructured":"Bilen, H., Pedersoli, M., Tuytelaars, T.: Weakly supervised object detection with convex clustering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1081\u20131089 (2015)","DOI":"10.1109\/CVPR.2015.7298711"},{"issue":"3","key":"18_CR10","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1007\/s11263-012-0538-3","volume":"100","author":"T Deselaers","year":"2012","unstructured":"Deselaers, T., Alexe, B., Ferrari, V.: Weakly supervised localization and learning with generic knowledge. Int. J. Comput. Vis. 100(3), 275\u2013293 (2012)","journal-title":"Int. J. Comput. Vis."},{"key":"18_CR11","unstructured":"Song, H.O., Girshick, R., Jegelka, S., Mairal, J., Harchaoui, Z., Darrell, T.: On learning to localize objects with minimal supervision. arXiv preprint arXiv:1403.1024 (2014)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Nguyen, M.H., Torresani, L., de la Torre, F., Rother, C.: Weakly supervised discriminative localization and classification: a joint learning process. In: 2009 IEEE 12th International Conference on Computer Vision, pp. 1925\u20131932. IEEE (2009)","DOI":"10.1109\/ICCV.2009.5459426"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Bach, F., Laptev, I., Ponce, J., Schmid, C., Sivic, J.: Finding actors and actions in movies. In: 2013 IEEE International Conference on Computer Vision (ICCV), pp. 2280\u20132287. IEEE (2013)","DOI":"10.1109\/ICCV.2013.283"},{"key":"18_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"158","DOI":"10.1007\/978-3-642-33718-5_12","volume-title":"Computer Vision \u2013 ECCV 2012","author":"A Khosla","year":"2012","unstructured":"Khosla, A., Zhou, T., Malisiewicz, T., Efros, A.A., Torralba, A.: Undoing the damage of dataset bias. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7572, pp. 158\u2013171. Springer, Heidelberg (2012). doi: 10.1007\/978-3-642-33718-5_12"},{"key":"18_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/978-3-642-37213-1","volume-title":"Computer Vision \u2013 ACCV 2012","author":"T Tommasi","year":"2013","unstructured":"Tommasi, T., Quadrianto, N., Caputo, B., Lampert, C.H.: Beyond dataset bias: multi-task unaligned shared knowledge transfer. In: Lee, K.M., Matsushita, Y., Rehg, J.M., Hu, Z. (eds.) ACCV 2012, Part I. LNCS, vol. 7724, pp. 1\u201315. Springer, Heidelberg (2013)"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Aljundi, R., Emonet, R., Muselet, D., Sebban, M.: Landmarks-based kernelized subspace alignment for unsupervised domain adaptation. In: Computer Vision and Pattern Recognition (CVPR 2015) (2015)","DOI":"10.1109\/CVPR.2015.7298600"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Fernando, B., Habrard, A., Sebban, M., Tuytelaars, T.: Unsupervised visual domain adaptation using subspace alignment. In: 2013 IEEE International Conference on Computer Vision (ICCV), pp. 2960\u20132967. IEEE (2013)","DOI":"10.1109\/ICCV.2013.368"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Aytar, Y., Zisserman, A.: Tabula rasa: model transfer for object category detection. In: 2011 IEEE International Conference on Computer Vision (ICCV), pp. 2252\u20132259. IEEE (2011)","DOI":"10.1109\/ICCV.2011.6126504"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Tommasi, T., Caputo, B.: The more you know, the less you learn: from knowledge transfer to one-shot learning of object categories. In: BMVC, Number LIDIAP-CONF-2009-049 (2009)","DOI":"10.5244\/C.23.80"},{"key":"18_CR20","doi-asserted-by":"crossref","unstructured":"Tommasi, T., Orabona, F., Caputo, B.: Safety in numbers: learning categories from few examples with multi model knowledge transfer. In: 2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3081\u20133088. IEEE (2010)","DOI":"10.1109\/CVPR.2010.5540064"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Chen, J., Liu, X., Tu, P., Aragones, A.: Person-specific expression recognition with transfer learning. In: 2012 19th IEEE International Conference on Image Processing (ICIP), pp. 2621\u20132624. IEEE (2012)","DOI":"10.1109\/ICIP.2012.6467436"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Chu, W.S., De la Torre, F., Cohn, J.F.: Selective transfer machine for personalized facial action unit detection. In: 2013 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3515\u20133522. IEEE (2013)","DOI":"10.1109\/CVPR.2013.451"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Zen, G., Sangineto, E., Ricci, E., Sebe, N.: Unsupervised domain adaptation for personalized facial emotion recognition. In: Proceedings of the 16th International Conference on Multimodal Interaction, pp. 128\u2013135. ACM (2014)","DOI":"10.1145\/2663204.2663247"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Gavves, E., Mensink, T., Tommasi, T., Snoek, C.G., Tuytelaars, T.: Active transfer learning with zero-shot priors: reusing past datasets for future tasks. arXiv preprint arXiv:1510.01544 (2015)","DOI":"10.1109\/ICCV.2015.313"},{"key":"18_CR25","unstructured":"Mirzaei, S., Van hamme, H., Norouzi, Y.: Blind audio source separation of stereo mixtures using bayesian non-negative matrix factorization. In: Signal Processing Conference (EUSIPCO), pp. 621\u2013625, September 2014"},{"key":"18_CR26","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1007\/978-3-642-15939-8_6","volume-title":"Machine Learning and Knowledge Discovery in Databases","author":"P Pletscher","year":"2010","unstructured":"Pletscher, P., Ong, C.S., Buhmann, J.M.: Entropy and margin maximization for structured output learning. In: Balc\u00e1zar, J.L., Bonchi, F., Gionis, A., Sebag, M. (eds.) ECML PKDD 2010. LNCS (LNAI), vol. 6323, pp. 83\u201398. Springer, Heidelberg (2010). doi: 10.1007\/978-3-642-15939-8_6"},{"issue":"2","key":"18_CR27","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings, J.R., Sande, K.E., Gevers, T., Smeulders, A.W.: Selective search for object recognition. Int. J. Comput. Vis. 104(2), 154\u2013171 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: ICCV, Sydney, Australia, pp. 3551\u20133558, December 2013","DOI":"10.1109\/ICCV.2013.441"},{"key":"18_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/978-3-642-15561-1_11","volume-title":"Computer Vision \u2013 ECCV 2010","author":"F Perronnin","year":"2010","unstructured":"Perronnin, F., S\u00e1nchez, J., Mensink, T.: Improving the fisher kernel for large-scale image classification. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010, Part IV. LNCS, vol. 6314, pp. 143\u2013156. Springer, Heidelberg (2010)"},{"issue":"3","key":"18_CR30","doi-asserted-by":"publisher","first-page":"480","DOI":"10.1109\/TPAMI.2011.153","volume":"34","author":"A Vedaldi","year":"2012","unstructured":"Vedaldi, A., Zisserman, A.: Efficient additive kernels via explicit feature maps. IEEE Trans. Pattern Anal. Mach. Intell. 34(3), 480\u2013492 (2012)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR31","unstructured":"Peng, X., Wang, L., Wang, X., Qiao, Y.: Bag of visual words and fusion methods for action recognition: comprehensive study and good practice. CoRR abs\/1405.4506 (2014)"},{"key":"18_CR32","unstructured":"Girshick, R.B., Felzenszwalb, P.F., McAllester, D.: Discriminatively trained deformable part models, release 5. http:\/\/people.cs.uchicago.edu\/rbg\/latent-release5\/"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2016"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46454-1_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,8]],"date-time":"2022-07-08T14:54:10Z","timestamp":1657292050000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-46454-1_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319464534","9783319464541"],"references-count":32,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46454-1_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"16 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Amsterdam","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 October 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 October 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.eccv2016.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}