{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T19:51:44Z","timestamp":1775245904671,"version":"3.50.1"},"publisher-location":"Cham","reference-count":71,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030012243","type":"print"},{"value":"9783030012250","type":"electronic"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-030-01225-0_38","type":"book-chapter","created":{"date-parts":[[2018,10,8]],"date-time":"2018-10-08T04:39:54Z","timestamp":1538973594000},"page":"641-659","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":17,"title":["Multiple-Gaze Geometry: Inferring Novel 3D Locations from Gazes Observed in Monocular Video"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0380-8630","authenticated-orcid":false,"given":"Ernesto","family":"Brau","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9721-6267","authenticated-orcid":false,"given":"Jinyan","family":"Guan","sequence":"additional","affiliation":[]},{"given":"Tanya","family":"Jeffries","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8568-9518","authenticated-orcid":false,"given":"Kobus","family":"Barnard","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,10,6]]},"reference":[{"issue":"8","key":"38_CR1","doi-asserted-by":"publisher","first-page":"1707","DOI":"10.1109\/TPAMI.2015.2496269","volume":"38","author":"X Alameda-Pineda","year":"2016","unstructured":"Alameda-Pineda, X., et al.: Salsa: a novel dataset for multimodal group behavior analysis. IEEE Trans. Pattern Anal. Mach. Intell. 38(8), 1707\u20131720 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR2","doi-asserted-by":"crossref","unstructured":"Andriluka, M., Roth, S., Schiele, B.: People-tracking-by-detection and people-detection-by-tracking. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2008, pp. 1\u20138. IEEE (2008)","DOI":"10.1109\/CVPR.2008.4587583"},{"key":"38_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"466","DOI":"10.1007\/978-3-642-15549-9_34","volume-title":"Computer Vision \u2013 ECCV 2010","author":"A Andriyenko","year":"2010","unstructured":"Andriyenko, A., Schindler, K.: Globally optimal multi-target tracking on a hexagonal lattice. In: Daniilidis, K., Maragos, P., Paragios, N. (eds.) ECCV 2010. LNCS, vol. 6311, pp. 466\u2013479. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-15549-9_34"},{"key":"38_CR4","doi-asserted-by":"crossref","unstructured":"Andriyenko, A., Schindler, K., Roth, S.: Discrete-continuous optimization for multi-target tracking. In: CVPR, pp. 1926\u20131933 (2012)","DOI":"10.1109\/CVPR.2012.6247893"},{"key":"38_CR5","doi-asserted-by":"crossref","unstructured":"Ba, S.O., Hung, H., Odobez, J.M.: Visual activity context for focus of attention estimation in dynamic meetings. In: IEEE International Conference on Multimedia and Expo, ICME 2009, pp. 1424\u20131427. IEEE (2009)","DOI":"10.1109\/ICME.2009.5202769"},{"key":"38_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1007\/978-3-540-68585-2_26","volume-title":"Multimodal Technologies for Perception of Humans","author":"SO Ba","year":"2008","unstructured":"Ba, S.O., Odobez, J.-M.: Probabilistic head pose tracking evaluation in single and multiple camera setups. In: Stiefelhagen, R., Bowers, R., Fiscus, J. (eds.) CLEAR\/RT -2007. LNCS, vol. 4625, pp. 276\u2013286. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-68585-2_26"},{"issue":"1","key":"38_CR7","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1109\/TSMCB.2008.927274","volume":"39","author":"SO Ba","year":"2009","unstructured":"Ba, S.O., Odobez, J.M.: Recognizing visual focus of attention from head pose in natural meetings. IEEE Trans. Syst. Man Cybern. Part B Cybern. 39(1), 16\u201333 (2009)","journal-title":"IEEE Trans. Syst. Man Cybern. Part B Cybern."},{"issue":"1","key":"38_CR8","doi-asserted-by":"publisher","first-page":"101","DOI":"10.1109\/TPAMI.2010.69","volume":"33","author":"SO Ba","year":"2011","unstructured":"Ba, S.O., Odobez, J.M.: Multiperson visual focus of attention from head pose and meeting contextual cues. IEEE Trans. Pattern Anal. Mach. Intell. 33(1), 101\u2013116 (2011)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR9","doi-asserted-by":"crossref","unstructured":"Benfold, B., Reid, I.: Stable multi-target tracking in real-time surveillance video. In: CVPR, pp. 3457\u20133464 (2011)","DOI":"10.1109\/CVPR.2011.5995667"},{"key":"38_CR10","doi-asserted-by":"crossref","unstructured":"Benfold, B., Reid, I.: Guiding visual surveillance by tracking human attention. In: BMVC, pp. 1\u201311 (2009)","DOI":"10.5244\/C.23.14"},{"key":"38_CR11","doi-asserted-by":"crossref","unstructured":"Beymer, D.J.: Face recognition under varying pose. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, CVPR 1994, pp. 756\u2013761. IEEE (1994)","DOI":"10.1109\/CVPR.1994.323893"},{"issue":"9","key":"38_CR12","doi-asserted-by":"publisher","first-page":"1063","DOI":"10.1109\/TPAMI.2003.1227983","volume":"25","author":"V Blanz","year":"2003","unstructured":"Blanz, V., Vetter, T.: Face recognition based on fitting a 3D morphable model. IEEE Trans. Pattern Anal. Mach. Intell. 25(9), 1063\u20131074 (2003)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR13","doi-asserted-by":"crossref","unstructured":"Brau, E., Guan, J., Simek, K., Del Pero, L., Dawson, C.R., Barnard, K.: Bayesian 3D tracking from monocular video. In: 2013 IEEE International Conference on Computer Vision (ICCV), pp. 3368\u20133375. IEEE (2013)","DOI":"10.1109\/ICCV.2013.418"},{"key":"38_CR14","doi-asserted-by":"crossref","unstructured":"Chen, C., Heili, A., Odobez, J.M.: A joint estimation of head and body orientation cues in surveillance video. In: 2011 IEEE International Conference on Computer Vision Workshops (ICCV Workshops), pp. 860\u2013867. IEEE (2011)","DOI":"10.1109\/ICCVW.2011.6130342"},{"key":"38_CR15","doi-asserted-by":"crossref","unstructured":"Chen, C., Odobez, J.M.: We are not contortionists: coupled adaptive learning for head and body orientation estimation in surveillance video. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1544\u20131551. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247845"},{"key":"38_CR16","doi-asserted-by":"crossref","unstructured":"Cristani, M., et al.: Social interaction discovery by statistical analysis of F-formations. In: BMVC (2011)","DOI":"10.5244\/C.25.23"},{"key":"38_CR17","doi-asserted-by":"crossref","unstructured":"Dehghan, A., Assari, S.M., Shah, M.: GMMCP tracker: globally optimal generalized maximum multi clique problem for multiple object tracking. In: CVPR, vol. 1, p. 2 (2015)","DOI":"10.1109\/CVPR.2015.7299036"},{"key":"38_CR18","doi-asserted-by":"crossref","unstructured":"Del Pero, L., Guan, J., Brau, E., Schlecht, J., Barnard, K.: Sampling bedrooms. In: CVPR, pp. 2009\u20132016 (2011)","DOI":"10.1109\/CVPR.2011.5995737"},{"issue":"12","key":"38_CR19","doi-asserted-by":"publisher","first-page":"2264","DOI":"10.1109\/TCSVT.2015.2501920","volume":"26","author":"S Duffner","year":"2016","unstructured":"Duffner, S., Garcia, C.: Visual focus of attention estimation with unsupervised incremental learning. IEEE Trans. Circuits Syst. Video Technol. 26(12), 2264\u20132272 (2016)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"38_CR20","doi-asserted-by":"crossref","unstructured":"Felzenszwalb, P., Girshick, R., McAllester, D., Ramanan, D.: Object detection with discriminatively trained part-based models. In: IEEE PAMI (2009)","DOI":"10.1109\/TPAMI.2009.167"},{"issue":"10","key":"38_CR21","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1016\/0262-8856(94)90039-6","volume":"12","author":"A Gee","year":"1994","unstructured":"Gee, A., Cipolla, R.: Determining the gaze of faces in images. Image Vis. Comput. 12(10), 639\u2013647 (1994)","journal-title":"Image Vis. Comput."},{"key":"38_CR22","doi-asserted-by":"crossref","unstructured":"Gu, L., Kanade, T.: 3D alignment of face in a single image. In: 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 1, pp. 1305\u20131312. IEEE (2006)","DOI":"10.1109\/CVPR.2006.11"},{"key":"38_CR23","volume-title":"Multiple View Geometry in Computer Vision","author":"R Hartley","year":"2000","unstructured":"Hartley, R., Zisserman, A.: Multiple View Geometry in Computer Vision. Cambridge University Press, New York (2000)"},{"key":"38_CR24","series-title":"Springer Series in Statistics","doi-asserted-by":"crossref","DOI":"10.1007\/978-0-387-21606-5","volume-title":"The Elements of Statistical Learning; Data Mining, Inference, and Prediction","author":"T Hastie","year":"2001","unstructured":"Hastie, T., Tibshirani, R., Friedman, J.: The Elements of Statistical Learning; Data Mining, Inference, and Prediction. Springer Series in Statistics. Springer, New York (2001)"},{"key":"38_CR25","doi-asserted-by":"crossref","unstructured":"Horprasert, T., Yacoob, Y., Davis, L.S.: Computing 3d head orientation from a monocular image sequence. In: 25th Annual AIPR Workshop on Emerging Applications of Computer Vision, pp. 244\u2013252. International Society for Optics and Photonics (1997)","DOI":"10.1117\/12.267830"},{"key":"38_CR26","doi-asserted-by":"crossref","unstructured":"Huang, J., Shao, X., Wechsler, H.: Face pose discrimination using support vector machines (SVM). In: Proceedings of the Fourteenth International Conference on Pattern Recognition, vol. 1, pp. 154\u2013156. IEEE (1998)","DOI":"10.1007\/978-3-642-72201-1_32"},{"key":"38_CR27","doi-asserted-by":"crossref","unstructured":"Huang, Y., Duan, D., Cui, J., Davoine, F., Wang, L., Zha, H.: Joint estimation of head pose and visual focus of attention. In: 2014 IEEE International Conference on Image Processing (ICIP), pp. 3332\u20133336. IEEE (2014)","DOI":"10.1109\/ICIP.2014.7025674"},{"key":"38_CR28","doi-asserted-by":"crossref","unstructured":"Isard, M., MacCormick, J.: BraMBLe: a Bayesian multiple-blob tracker. In: ICCV, pp. 34\u201341 (2001)","DOI":"10.1109\/ICCV.2001.937594"},{"key":"38_CR29","doi-asserted-by":"crossref","unstructured":"Jayagopi, D.B., et al.: The vernissage corpus: a multimodal human-robot-interaction dataset. Technical report (2012)","DOI":"10.1109\/HRI.2013.6483545"},{"key":"38_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1007\/978-3-642-33765-9_15","volume-title":"Computer Vision \u2013 ECCV 2012","author":"KM Kitani","year":"2012","unstructured":"Kitani, K.M., Ziebart, B.D., Bagnell, J.A., Hebert, M.: Activity forecasting. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7575, pp. 201\u2013214. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33765-9_15"},{"key":"38_CR31","doi-asserted-by":"crossref","unstructured":"Kuo, C., Huang, C., Nevatia, R.: Multi-target tracking by on-line learned discriminative appearance models. In: CVPR, pp. 685\u2013692 (2010)","DOI":"10.1109\/CVPR.2010.5540148"},{"issue":"4","key":"38_CR32","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1109\/34.845375","volume":"22","author":"M La Cascia","year":"2000","unstructured":"La Cascia, M., Sclaroff, S., Athitsos, V.: Fast, reliable head tracking under varying illumination: an approach based on registration of texture-mapped 3d models. IEEE Trans. Pattern Anal. Mach. Intell. 22(4), 322\u2013336 (2000)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR33","unstructured":"Li, Y., Gong, S., Liddell, H.: Support vector regression and classification based multi-view face detection and recognition. In: Proceedings of the Fourth IEEE International Conference on Automatic Face and Gesture Recognition, pp. 300\u2013305. IEEE (2000)"},{"issue":"5","key":"38_CR34","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1016\/j.imavis.2003.12.005","volume":"22","author":"Y Li","year":"2004","unstructured":"Li, Y., Gong, S., Sherrah, J., Liddell, H.: Support vector machine based multi-view face detection and recognition. Image Vis. Comput. 22(5), 413\u2013427 (2004)","journal-title":"Image Vis. Comput."},{"key":"38_CR35","unstructured":"Liu, C.: Exploring new representations and applications for motion analysis. Ph.D. thesis, M.I.T (2009)"},{"key":"38_CR36","doi-asserted-by":"crossref","unstructured":"Mass\u00e9, B., Ba, S., Horaud, R.: Simultaneous estimation of gaze direction and visual focus of attention for multi-person-to-robot interaction. In: 2016 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136. IEEE (2016)","DOI":"10.1109\/ICME.2016.7552986"},{"key":"38_CR37","doi-asserted-by":"crossref","unstructured":"Milan, A., Leal-Taix\u00e9, L., Schindler, K., Reid, I.: Joint tracking and segmentation of multiple targets. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5397\u20135406 (2015)","DOI":"10.1109\/CVPR.2015.7299178"},{"issue":"4","key":"38_CR38","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1109\/TPAMI.2008.106","volume":"31","author":"E Murphy-Chutorian","year":"2009","unstructured":"Murphy-Chutorian, E., Trivedi, M.M.: Head pose estimation in computer vision: a survey. IEEE Trans. Pattern Anal. Mach. Intell. 31(4), 607\u2013626 (2009)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR39","unstructured":"Niyogi, S., Freeman, W.T.: Example-based head tracking. In: Proceedings of the Second International Conference on Automatic Face and Gesture Recognition, pp. 374\u2013378. IEEE (1996)"},{"key":"38_CR40","unstructured":"Oh, S.: Bayesian formulation of data association and Markov chain Monte Carlo data association. In: Robotics: Science and Systems Conference (RSS) Workshop Inside Data association (2008)"},{"key":"38_CR41","unstructured":"Oh, S., Russell, S., Sastry, S.: Markov chain Monte Carlo data association for general multiple target tracking problems (2004)"},{"key":"38_CR42","doi-asserted-by":"crossref","unstructured":"Otsuka, K., Takemae, Y., Yamato, J.: A probabilistic inference of multiparty-conversation structure based on Markov-switching models of gaze patterns, head directions, and utterances. In: Proceedings of the 7th International Conference on Multimodal Interfaces, pp. 191\u2013198. ACM (2005)","DOI":"10.1145\/1088463.1088497"},{"key":"38_CR43","doi-asserted-by":"crossref","unstructured":"Otsuka, K., Yamato, J., Takemae, Y., Murase, H.: Conversation scene analysis with dynamic Bayesian network basedon visual head tracking. In: 2006 IEEE International Conference on Multimedia and Expo, pp. 949\u2013952. IEEE (2006)","DOI":"10.1109\/ICME.2006.262677"},{"key":"38_CR44","doi-asserted-by":"crossref","unstructured":"Pirsiavash, H., Ramanan, D., Fowlkes, C.: Globally-optimal greedy algorithms for tracking a variable number of objects. In: CVPR, pp. 1201\u20131208 (2011)","DOI":"10.1109\/CVPR.2011.5995604"},{"key":"38_CR45","doi-asserted-by":"crossref","unstructured":"Sankaranarayanan, K., Chang, M.C., Krahnstoever, N.: Tracking gaze direction from far-field surveillance cameras. In: 2011 IEEE Workshop on Applications of Computer Vision (WACV), pp. 519\u2013526. IEEE (2011)","DOI":"10.1109\/WACV.2011.5711548"},{"key":"38_CR46","doi-asserted-by":"crossref","unstructured":"Segal, A.V., Reid, I.: Latent data association: Bayesian model selection for multi-target tracking. In: 2013 IEEE International Conference on Computer Vision (ICCV), pp. 2904\u20132911. IEEE (2013)","DOI":"10.1109\/ICCV.2013.361"},{"key":"38_CR47","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1007\/978-3-642-34014-7_9","volume-title":"Human Behavior Understanding","author":"S Sheikhi","year":"2012","unstructured":"Sheikhi, S., Odobez, J.-M.: Recognizing the visual focus of attention for human robot interaction. In: Salah, A.A., Ruiz-del-Solar, J., Meri\u00e7li, \u00c7., Oudeyer, P.-Y. (eds.) HBU 2012. LNCS, vol. 7559, pp. 99\u2013112. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-34014-7_9"},{"key":"38_CR48","doi-asserted-by":"crossref","unstructured":"Smith, K., Ba, S.O., Gatica-Perez, D., Odobez, J.M.: Tracking the multi person wandering visual focus of attention. In: Proceedings of the 8th International Conference on Multimodal Interfaces, pp. 265\u2013272. ACM (2006)","DOI":"10.1145\/1180995.1181048"},{"issue":"7","key":"38_CR49","doi-asserted-by":"publisher","first-page":"1212","DOI":"10.1109\/TPAMI.2007.70773","volume":"30","author":"K Smith","year":"2008","unstructured":"Smith, K., Ba, S.O., Odobez, J.M., Gatica-Perez, D.: Tracking the visual focus of attention for a varying number of wandering people. IEEE Trans. Pattern Anal. Mach. Intell. 30(7), 1212\u20131229 (2008)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"38_CR50","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-540-69568-4_1","volume-title":"Multimodal Technologies for Perception of Humans","author":"R Stiefelhagen","year":"2007","unstructured":"Stiefelhagen, R., Bernardin, K., Bowers, R., Garofolo, J., Mostefa, D., Soundararajan, P.: The CLEAR 2006 evaluation. In: Stiefelhagen, R., Garofolo, J. (eds.) CLEAR 2006. LNCS, vol. 4122, pp. 1\u201344. Springer, Heidelberg (2007). https:\/\/doi.org\/10.1007\/978-3-540-69568-4_1"},{"key":"38_CR51","doi-asserted-by":"crossref","unstructured":"Stiefelhagen, R., Yang, J., Waibel, A.: Modeling focus of attention for meeting indexing. In: Proceedings of the seventh ACM International Conference on Multimedia (Part 1), pp. 3\u201310. ACM (1999)","DOI":"10.1145\/319463.319464"},{"issue":"4","key":"38_CR52","doi-asserted-by":"publisher","first-page":"928","DOI":"10.1109\/TNN.2002.1021893","volume":"13","author":"R Stiefelhagen","year":"2002","unstructured":"Stiefelhagen, R., Yang, J., Waibel, A.: Modeling focus of attention for meeting indexing based on multiple cues. IEEE Trans. Neural Netw. 13(4), 928\u2013938 (2002)","journal-title":"IEEE Trans. Neural Netw."},{"key":"38_CR53","doi-asserted-by":"crossref","unstructured":"Stiefelhagen, R., Zhu, J.: Head orientation and gaze direction in meetings. In: Extended Abstracts on Human Factors in Computing Systems, CHI 2002, pp. 858\u2013859. ACM (2002)","DOI":"10.1145\/506443.506634"},{"key":"38_CR54","doi-asserted-by":"crossref","unstructured":"Tang, S., Andres, B., Andriluka, M., Schiele, B.: Subgraph decomposition for multi-target tracking. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5033\u20135041 (2015)","DOI":"10.1109\/CVPR.2015.7299138"},{"key":"38_CR55","unstructured":"Titsias, M.K., Lawrence, N.D., Rattray, M.: Efficient sampling for Gaussian Process inference using control variables. In: Advances in Neural Information Processing Systems, vol. 21, pp. 1681\u20131688. Curran Associates Inc., Vancouver, British Columbia, Canada (2008)"},{"issue":"2","key":"38_CR56","doi-asserted-by":"publisher","first-page":"802","DOI":"10.1109\/TIP.2011.2162740","volume":"21","author":"R Valenti","year":"2012","unstructured":"Valenti, R., Sebe, N., Gevers, T.: Combining head pose and eye location information for gaze estimation. IEEE Trans. Image Process. 21(2), 802\u2013815 (2012)","journal-title":"IEEE Trans. Image Process."},{"key":"38_CR57","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1007\/978-3-540-68585-2_29","volume-title":"Multimodal Technologies for Perception of Humans","author":"M Voit","year":"2008","unstructured":"Voit, M., Nickel, K., Stiefelhagen, R.: Head pose estimation in single- and multi-view environments - results on the CLEAR\u201907 benchmarks. In: Stiefelhagen, R., Bowers, R., Fiscus, J. (eds.) CLEAR\/RT -2007. LNCS, vol. 4625, pp. 307\u2013316. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-68585-2_29"},{"key":"38_CR58","doi-asserted-by":"crossref","unstructured":"Voit, M., Stiefelhagen, R.: Deducing the visual focus of attention from head pose estimation in dynamic multi-view meeting scenarios. In: Proceedings of the 10th International Conference on Multimodal Interfaces, pp. 173\u2013180. ACM (2008)","DOI":"10.1145\/1452392.1452425"},{"key":"38_CR59","doi-asserted-by":"crossref","unstructured":"Voit, M., Stiefelhagen, R.: 3D user-perspective, voxel-based estimation of visual focus of attention in dynamic meeting scenarios. In: International Conference on Multimodal Interfaces and the Workshop on Machine Learning for Multimodal Interaction, p. 51. ACM (2010)","DOI":"10.1145\/1891903.1891966"},{"key":"38_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-012-0564-1","volume":"101","author":"C Vondrick","year":"2013","unstructured":"Vondrick, C., Patterson, D., Ramanan, D.: Efficiently scaling up crowdsourced video annotation. Int. J. Comput. Vis. 101, 1\u201321 (2013). https:\/\/doi.org\/10.1007\/s11263-012-0564-1","journal-title":"Int. J. Comput. Vis."},{"key":"38_CR61","doi-asserted-by":"publisher","first-page":"1165","DOI":"10.1109\/TPAMI.2016.2574712","volume":"39","author":"P Wei","year":"2016","unstructured":"Wei, P., Zhao, Y., Zheng, N., Zhu, S.C.: Modeling 4d human-object interactions for joint event segmentation, recognition, and object localization. IEEE Trans Pattern Anal. Mach. Intell. 39, 1165\u20131179 (2016)","journal-title":"IEEE Trans Pattern Anal. Mach. Intell."},{"key":"38_CR62","unstructured":"Wu, Y., Toyama, K.: Wide-range, person-and illumination-insensitive head orientation estimation. In: Proceedings of the Fourth IEEE International Conference on Automatic Face and Gesture Recognition, pp. 183\u2013188. IEEE (2000)"},{"issue":"1","key":"38_CR63","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1002\/ima.10048","volume":"13","author":"J Xiao","year":"2003","unstructured":"Xiao, J., Moriyama, T., Kanade, T., Cohn, J.F.: Robust full-motion recovery of head by dynamic templates and re-registration techniques. Int. J. Imaging Syst. Technol. 13(1), 85\u201394 (2003)","journal-title":"Int. J. Imaging Syst. Technol."},{"key":"38_CR64","unstructured":"Xie, D., Todorovicy, S., Zhu, S.C.: Inferring \u201cdark matter\u201d and \u201cdark energy\u201d from videos. In: ICCV (2013)"},{"key":"38_CR65","unstructured":"Yang, R., Zhang, Z.: Model-based head pose tracking with stereovision. In: Proceedings of the Fifth IEEE International Conference on Automatic Face and Gesture Recognition, pp. 255\u2013260. IEEE (2002)"},{"issue":"3","key":"38_CR66","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1109\/LSP.2014.2300497","volume":"21","author":"Y Yi","year":"2014","unstructured":"Yi, Y., Xu, H.: Hierarchical data association framework with occlusion handling for multiple targets tracking. IEEE Signal Process. Lett. 21(3), 288\u2013291 (2014)","journal-title":"IEEE Signal Process. Lett."},{"issue":"3","key":"38_CR67","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1109\/TSMCB.2012.2216979","volume":"43","author":"Z Y\u00fccel","year":"2013","unstructured":"Y\u00fccel, Z., Salah, A.A., Mericli, C., Meri\u00e7li, T., Valenti, R., Gevers, T.: Joint attention by gaze interpolation and saliency. IEEE Trans. Cybern. 43(3), 829\u2013842 (2013)","journal-title":"IEEE Trans. Cybern."},{"key":"38_CR68","doi-asserted-by":"crossref","unstructured":"Zen, G., Lepri, B., Ricci, E., Lanz, O.: Space speaks: towards socially and personality aware visual surveillance. In: 1st ACM International Workshop on Multimodal Pervasive Video Analysis, pp. 37\u201342. ACM, Firenze, Italy (2010)","DOI":"10.1145\/1878039.1878048"},{"key":"38_CR69","doi-asserted-by":"crossref","unstructured":"Zhang, L., Li, Y., Nevatia, R.: Global data association for multi-object tracking using network flows. In: IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2008, pp. 1\u20138. IEEE (2008)","DOI":"10.1109\/CVPR.2008.4587584"},{"key":"38_CR70","doi-asserted-by":"crossref","unstructured":"Zhao, G., Chen, L., Song, J., Chen, G.: Large head movement tracking using sift-based registration. In: Proceedings of the 15th International Conference on Multimedia, pp. 807\u2013810. ACM (2007)","DOI":"10.1145\/1291233.1291416"},{"key":"38_CR71","doi-asserted-by":"crossref","unstructured":"Zhu, X., Ramanan, D.: Face detection, pose estimation, and landmark localization in the wild. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2879\u20132886. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6248014"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-01225-0_38","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T18:54:10Z","timestamp":1775242450000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-01225-0_38"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783030012243","9783030012250"],"references-count":71,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-01225-0_38","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018]]},"assertion":[{"value":"6 October 2018","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}