{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T17:44:21Z","timestamp":1776793461350,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":70,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,19]],"date-time":"2017-10-19T00:00:00Z","timestamp":1508371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,19]]},"DOI":"10.1145\/3123266.3123298","type":"proceedings-article","created":{"date-parts":[[2017,10,20]],"date-time":"2017-10-20T13:04:26Z","timestamp":1508504666000},"page":"591-599","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":10,"title":["Predicting Human Intentions from Motion Cues Only"],"prefix":"10.1145","author":[{"given":"Andrea","family":"Zunino","sequence":"first","affiliation":[{"name":"Istituto Italiano di Tecnologia (IIT) &amp; Universit\u00e0 degli Studi di Genova, Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jacopo","family":"Cavazza","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia (IIT) &amp; Universit\u00e0 degli Studi di Genova, Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Atesh","family":"Koul","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia (IIT), Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrea","family":"Cavallo","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia (IIT) &amp; Universit\u00e0 di Torino, Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Cristina","family":"Becchio","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia (IIT) &amp; Universit\u00e0 di Torino, Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vittorio","family":"Murino","sequence":"additional","affiliation":[{"name":"Istituto Italiano di Tecnologia &amp; Universit\u00e0 di Verona, Genova, Italy"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2017,10,19]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1922649.1922653"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1177\/1073858414533827"},{"key":"e_1_3_2_1_3_1","volume-title":"PloS One","volume":"2","author":"Ansuini C.","year":"2015"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00530-010-0182-0"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"crossref","unstructured":"Oren Boiman Eli Shechtman and Michal Irani. 2008. In defense of Nearest-Neighbor based image classification CVPR.  Oren Boiman Eli Shechtman and Michal Irani. 2008. In defense of Nearest-Neighbor based image classification CVPR.","DOI":"10.1109\/CVPR.2008.4587598"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.5555\/2503308.2188387"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1177\/0956797612472909"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.343"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1186\/1743-0003-8-19"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Andrea Cavallo Atesh Koul Caterina Ansuini Francesca Capozzi and Cristina Becchio. 2016. Decoding intentions from movement kinematics. In Scientific Reports.  Andrea Cavallo Atesh Koul Caterina Ansuini Francesca Capozzi and Cristina Becchio. 2016. Decoding intentions from movement kinematics. In Scientific Reports.","DOI":"10.1038\/srep37036"},{"key":"e_1_3_2_1_11_1","volume-title":"Marco San Biagio, and Vittorio Murino","author":"Cavazza Jacopo","year":"2016"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Anirban Chakraborty and K. Roy-Chowdhury. 2014. Context-Aware Activity Forecasting. In ACCV.  Anirban Chakraborty and K. Roy-Chowdhury. 2014. Context-Aware Activity Forecasting. In ACCV.","DOI":"10.1007\/978-3-319-16814-2_2"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2013.153"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"R. Chaudhry A. Ravichandran G. Hager and R. Vidal. 2009. Histograms of oriented optical flow and Binet-Cauchy kernels on nonlinear dynamical systems for the recognition of human actions CVPR.  R. Chaudhry A. Ravichandran G. Hager and R. Vidal. 2009. Histograms of oriented optical flow and Binet-Cauchy kernels on nonlinear dynamical systems for the recognition of human actions CVPR.","DOI":"10.1109\/CVPR.2009.5206821"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Guilhem Ch\u00e9ron Ivan Laptev and Cordelia Schmid. 2015. P-CNN: Pose-based CNN Features for Action Recognition ICCV.  Guilhem Ch\u00e9ron Ivan Laptev and Cordelia Schmid. 2015. P-CNN: Pose-based CNN Features for Action Recognition ICCV.","DOI":"10.1109\/ICCV.2015.368"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.177"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744047_33"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2006.01.012"},{"key":"e_1_3_2_1_19_1","volume-title":"Prediction of Manipulation Actions. arXiv preprint arXiv:1610.00759","author":"Ferm\u00fcller Cornelia","year":"2016"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.260"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Steinn Gudmundsson Thomas P. Runarsson and Sven Sigurdsson. 2008. Support Vector Machines and Dynamic Time Warping for Time Series IJCNN.  Steinn Gudmundsson Thomas P. Runarsson and Sven Sigurdsson. 2008. Support Vector Machines and Dynamic Time Warping for Time Series IJCNN.","DOI":"10.1109\/IJCNN.2008.4634188"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Minh Hoai and Fernando De la Torre. 2012. Max-Margin Early Event Detectors. In CVPR.   Minh Hoai and Fernando De la Torre. 2012. Max-Margin Early Event Detectors. In CVPR.","DOI":"10.1109\/CVPR.2012.6248012"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"De-An Huang and Kris M Kitani. 2014. Action-Reaction: Forecasting the Dynamics of Human Interaction ECCV.  De-An Huang and Kris M Kitani. 2014. Action-Reaction: Forecasting the Dynamics of Human Interaction ECCV.","DOI":"10.1007\/978-3-319-10584-0_32"},{"key":"e_1_3_2_1_24_1","unstructured":"M. Hussein M. Torki M. Gowayyed and M. El-Saban.. 2013. Human Action Recognition Using a Temporal Hierarchy of Covariance Descriptors on 3D Joint Locations IJCAI.   M. Hussein M. Torki M. Gowayyed and M. El-Saban.. 2013. Human Action Recognition Using a Temporal Hierarchy of Covariance Descriptors on 3D Joint Locations IJCAI."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Ashesh Jain Amir. R. Zamir Silvio Savarese and Ashutosh Saxena. 2016. Structural-RNN: Deep Learning on Spatio-Temporal Graphs CVPR.  Ashesh Jain Amir. R. Zamir Silvio Savarese and Ashutosh Saxena. 2016. Structural-RNN: Deep Learning on Spatio-Temporal Graphs CVPR.","DOI":"10.1109\/CVPR.2016.573"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Herv\u00e9 J\u00e9gou Matthijs Douze Cordelia Schmid and Patrick P\u00e9rez. 2010. Aggregating local descriptors into a compact image representation CVPR.  Herv\u00e9 J\u00e9gou Matthijs Douze Cordelia Schmid and Patrick P\u00e9rez. 2010. Aggregating local descriptors into a compact image representation CVPR.","DOI":"10.1109\/CVPR.2010.5540039"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.tics.2011.06.005"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33765-9_15"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2491928"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Hema Koppula and Ashutosh Saxena. 2013. Anticipating Human Activities using Object Affordances for Reactive Robotic Response Robot Sci Syst.  Hema Koppula and Ashutosh Saxena. 2013. Anticipating Human Activities using Object Affordances for Reactive Robotic Response Robot Sci Syst.","DOI":"10.15607\/RSS.2013.IX.006"},{"key":"e_1_3_2_1_31_1","volume-title":"Hinton","author":"Krizhevsky Alex","year":"2012"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Tian Lan Tsung-Chuan Chen and Silvio Savarese. 2014. A Hierarchical Representation for Future Action Prediction ECCV.  Tian Lan Tsung-Chuan Chen and Silvio Savarese. 2014. A Hierarchical Representation for Future Action Prediction ECCV.","DOI":"10.1007\/978-3-319-10578-9_45"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.2297321"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33718-5_21"},{"key":"e_1_3_2_1_36_1","volume-title":"CVPR workshop.","author":"Li W."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744085_28"},{"key":"e_1_3_2_1_38_1","unstructured":"Shugao Ma Leonid Sigal and Stan Sclaroff. 2016. Learning Activity Progression in LS\u2122s for Activity Detection and Early Detection CVPR.  Shugao Ma Leonid Sigal and Stan Sclaroff. 2016. Learning Activity Progression in LS\u2122s for Activity Detection and Early Detection CVPR."},{"key":"e_1_3_2_1_39_1","unstructured":"Mingtao Yunde Jia and Song-Chun Zhu. 2011. Parsing video events with goal inference and intent prediction ICCV.  Mingtao Yunde Jia and Song-Chun Zhu. 2011. Parsing video events with goal inference and intent prediction ICCV."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2015.27"},{"key":"e_1_3_2_1_41_1","volume-title":"Dynamic Time Warping. Information Retrieval for Music and Motion, bibfieldeditor","author":"M\u00fcller Meinard"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Joe Yue-Hei Ng Matthew Hausknecht Sudheendra Vijayanarasimhan Oriol Vinyals Rajat Monga and George Toderici. 2015. Beyond Short Snippets: Deep Networks for Video Classification CVPR.  Joe Yue-Hei Ng Matthew Hausknecht Sudheendra Vijayanarasimhan Oriol Vinyals Rajat Monga and George Toderici. 2015. Beyond Short Snippets: Deep Networks for Video Classification CVPR.","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Florent Perronnin and Christopher Dance. 2007. Fishers on visual vocabularies for image categorization CVPR.  Florent Perronnin and Christopher Dance. 2007. Fishers on visual vocabularies for image categorization CVPR.","DOI":"10.1109\/CVPR.2007.383266"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"crossref","unstructured":"M. Rohrbach S. Amin M. Andriluka and B. Schiele. 2012. A Database for Fine Grained Activity Detection of Cooking Activities CVPR.  M. Rohrbach S. Amin M. Andriluka and B. Schiele. 2012. A Database for Fine Grained Activity Detection of Cooking Activities CVPR.","DOI":"10.1109\/CVPR.2012.6247801"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126349"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/2696454.2696462"},{"key":"e_1_3_2_1_47_1","volume-title":"Van Gool","author":"Schindler Konrad","year":"2008"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2005.90"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"crossref","unstructured":"K. Soomro H. Idrees and S. Mubarak. 2016. Predicting the Where and What of actors and actions through Online Action Localization CVPR.  K. Soomro H. Idrees and S. Mubarak. 2016. Predicting the Where and What of actors and actions through Online Action Localization CVPR.","DOI":"10.1109\/CVPR.2016.290"},{"key":"e_1_3_2_1_50_1","volume-title":"Amir Roshan Zamir, and Mubarak Shah","author":"Soomro Khurram","year":"2012"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.530"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"J. C. Stapel S. Hunnius and H. Bekkering. 2012. Online prediction of others' actions: the contribution of target object action context and movement kinematics. In Psychol. Res.  J. C. Stapel S. Hunnius and H. Bekkering. 2012. Online prediction of others' actions: the contribution of target object action context and movement kinematics. In Psychol. Res.","DOI":"10.1007\/s00426-012-0423-2"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/2493432.2493482"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_2_1_55_1","volume-title":"The left IPL represents stored hand-postures for object use and action prediction. Frontiers in Psychology","author":"van Elk Michiel","year":"2014"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.82"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Carl Vondrick Hamed Pirsiavash and Antonio Torralba. 2016. Anticipating Visual Representations with Unlabeled Video CVPR.  Carl Vondrick Hamed Pirsiavash and Antonio Torralba. 2016. Anticipating Visual Representations with Unlabeled Video CVPR.","DOI":"10.1109\/CVPR.2016.18"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.416"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-012-0594-8"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.441"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"crossref","unstructured":"J. Wang Z. Liu J. Chorowski Z. Chen and Y. Wu. 2012 a. Robust 3d action recognition with random occupancy patterns ECCV.  J. Wang Z. Liu J. Chorowski Z. Chen and Y. Wu. 2012 a. Robust 3d action recognition with random occupancy patterns ECCV.","DOI":"10.1007\/978-3-642-33709-3_62"},{"key":"e_1_3_2_1_62_1","unstructured":"J. Wang Z. Liu Y. Wu and J. Yuan. 2012 b. Mining actionlet ensemble for action recognition with depth cameras CVPR.  J. Wang Z. Liu Y. Wu and J. Yuan. 2012 b. Mining actionlet ensemble for action recognition with depth cameras CVPR."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.277"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.365"},{"key":"e_1_3_2_1_65_1","unstructured":"Zhongwen Xu Yi Yang and Alexander G. Hauptmann. 2015 b. A Discriminative CNN Video Representation for Event Detection CVPR.  Zhongwen Xu Yi Yang and Alexander G. Hauptmann. 2015 b. A Discriminative CNN Video Representation for Event Detection CVPR."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995468"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"crossref","unstructured":"X. Yang and Y. Tian. 2012. Eigenjoints-based action recognition using naivebayes-nearest-neighbor CVPR workshop.  X. Yang and Y. Tian. 2012. Eigenjoints-based action recognition using naivebayes-nearest-neighbor CVPR workshop.","DOI":"10.1109\/CVPRW.2012.6239232"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/2393347.2396380"},{"key":"e_1_3_2_1_69_1","unstructured":"Shengxin Zha Florian Luisier Walter Andrews Nitish Srivastava and Ruslan Salakhutdinov. 2015. Exploiting Image-trained CNN Architectures for Unconstrained Video Classification BMVC.  Shengxin Zha Florian Luisier Walter Andrews Nitish Srivastava and Ruslan Salakhutdinov. 2015. Exploiting Image-trained CNN Architectures for Unconstrained Video Classification BMVC."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2015.03.006"}],"event":{"name":"MM '17: ACM Multimedia Conference","location":"Mountain View California USA","acronym":"MM '17","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 25th ACM international conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123266.3123298","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3123266.3123298","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T03:39:28Z","timestamp":1750217968000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3123266.3123298"}},"subtitle":["A 2D+3D Fusion Approach"],"short-title":[],"issued":{"date-parts":[[2017,10,19]]},"references-count":70,"alternative-id":["10.1145\/3123266.3123298","10.1145\/3123266"],"URL":"https:\/\/doi.org\/10.1145\/3123266.3123298","relation":{},"subject":[],"published":{"date-parts":[[2017,10,19]]},"assertion":[{"value":"2017-10-19","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}