{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T16:42:02Z","timestamp":1777653722033,"version":"3.51.4"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031915802","type":"print"},{"value":"9783031915819","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91581-9_10","type":"book-chapter","created":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T11:22:56Z","timestamp":1748344976000},"page":"130-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Massively Multi-person 3D Human Motion Forecasting with\u00a0Scene Context"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-5848-5250","authenticated-orcid":false,"given":"Felix B.","family":"Mueller","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Julian","family":"Tanke","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9447-3399","authenticated-orcid":false,"given":"Juergen","family":"Gall","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","unstructured":"Adeli, V., et al.: Tripod: human trajectory and pose dynamics forecasting in the wild. In: 2021 IEEE\/CVF International Conference on Computer Vision, ICCV 2021, Montreal, QC, Canada, 10\u201317 October 2021, pp. 13370\u201313380. IEEE (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.01314","DOI":"10.1109\/ICCV48922.2021.01314"},{"key":"10_CR2","doi-asserted-by":"publisher","unstructured":"Aksan, E., Kaufmann, M., Cao, P., Hilliges, O.: A spatio-temporal transformer for 3d human motion prediction. In: International Conference on 3D Vision, 3DV 2021, London, United Kingdom, 1\u20133 December 2021, pp. 565\u2013574. IEEE (2021). https:\/\/doi.org\/10.1109\/3DV53792.2021.00066","DOI":"10.1109\/3DV53792.2021.00066"},{"key":"10_CR3","unstructured":"Bai, S., Kolter, J.Z., Koltun, V.: An empirical evaluation of generic convolutional and recurrent networks for sequence modeling. CoRR arxiv:1803.01271 (2018)"},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Barquero, G., Escalera, S., Palmero, C.: Belfusion: latent diffusion for behavior-driven human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2317\u20132327 (2023)","DOI":"10.1109\/ICCV51070.2023.00220"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Barquero, G., Escalera, S., Palmero, C.: Seamless human motion composition with blended positional encodings. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 457\u2013469 (2024)","DOI":"10.1109\/CVPR52733.2024.00051"},{"key":"10_CR6","doi-asserted-by":"crossref","unstructured":"Butepage, J., Black, M.J., Kragic, D., Kjellstrom, H.: Deep representation learning for human motion prediction and classification. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6158\u20136166 (2017)","DOI":"10.1109\/CVPR.2017.173"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Dabral, R., Mughal, M.H., Golyanik, V., Theobalt, C.: Mofusion: A framework for denoising-diffusion-based motion synthesis. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 9760\u20139770 (2023)","DOI":"10.1109\/CVPR52729.2023.00941"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Diller, C., Funkhouser, T., Dai, A.: Futurehuman3d: forecasting complex long-term 3d human behavior from video observations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19902\u201319914 (2024)","DOI":"10.1109\/CVPR52733.2024.01881"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Fan, K., et al.: Freemotion: a unified framework for number-free text-to-motion synthesis. arXiv preprint arXiv:2405.15763 (2024)","DOI":"10.1007\/978-3-031-73242-3_6"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Levine, S., Felsen, P., Malik, J.: Recurrent network models for human dynamics. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4346\u20134354 (2015)","DOI":"10.1109\/ICCV.2015.494"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Guo, C., et al.: Generating diverse and natural 3d human motions from text. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5152\u20135161 (2022)","DOI":"10.1109\/CVPR52688.2022.00509"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Guo, W., Du, Y., Shen, X., Lepetit, V., Alameda-Pineda, X., Moreno-Noguer, F.: Back to mlp: a simple baseline for human motion prediction. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4809\u20134819 (2023)","DOI":"10.1109\/WACV56688.2023.00479"},{"key":"10_CR13","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Larochelle, H., Ranzato, M., Hadsell, R., Balcan, M., Lin, H. (eds.) Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, 6\u201312 December 2020, virtual (2020). https:\/\/proceedings.neurips.cc\/paper\/2020\/hash\/4c5bcfec8584af0d967f1ab10179ca4b-Abstract.html"},{"key":"10_CR14","doi-asserted-by":"crossref","unstructured":"Holden, D., Saito, J., Komura, T., Joyce, T.: Learning motion manifolds with convolutional autoencoders. In: SIGGRAPH Asia 2015 Technical Briefs, pp.\u00a01\u20134 (2015)","DOI":"10.1145\/2820903.2820918"},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Jain, A., Zamir, A.R., Savarese, S., Saxena, A.: Structural-rnn: deep learning on spatio-temporal graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5308\u20135317 (2016)","DOI":"10.1109\/CVPR.2016.573"},{"key":"10_CR16","doi-asserted-by":"crossref","unstructured":"Jeong, J., Park, D., Yoon, K.J.: Multi-agent long-term 3d human pose forecasting via interaction-aware trajectory conditioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1617\u20131628 (2024)","DOI":"10.1109\/CVPR52733.2024.00160"},{"issue":"2","key":"10_CR17","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1108\/JAT-08-2013-0024","volume":"8","author":"J Kantorovitch","year":"2014","unstructured":"Kantorovitch, J., V\u00e4re, J., Pehkonen, V., Laikari, A., Sepp\u00e4l\u00e4, H.: An assistive household robot-doing more than just cleaning. J. Assist. Technol. 8(2), 64\u201376 (2014)","journal-title":"J. Assist. Technol."},{"issue":"1","key":"10_CR18","doi-asserted-by":"publisher","first-page":"4054","DOI":"10.1038\/s41467-020-17807-z","volume":"11","author":"\u0141 Kidzi\u0144ski","year":"2020","unstructured":"Kidzi\u0144ski, \u0141, Yang, B., Hicks, J.L., Rajagopal, A., Delp, S.L., Schwartz, M.H.: Deep neural networks enable quantitative movement analysis using single-camera videos. Nat. Commun. 11(1), 4054 (2020)","journal-title":"Nat. Commun."},{"key":"10_CR19","doi-asserted-by":"publisher","first-page":"731","DOI":"10.3389\/fpsyg.2015.00731","volume":"6","author":"SC Levinson","year":"2015","unstructured":"Levinson, S.C., Torreira, F.: Timing in turn-taking and its implications for processing models of language. Front. Psychol. 6, 731 (2015)","journal-title":"Front. Psychol."},{"key":"10_CR20","doi-asserted-by":"crossref","unstructured":"Li, B., Ho, E.S., Shum, H.P., Wang, H.: Two-person interaction augmentation with skeleton priors. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1900\u20131910 (2024)","DOI":"10.1109\/CVPRW63382.2024.00196"},{"key":"10_CR21","doi-asserted-by":"crossref","unstructured":"Li, C., Zhang, Z., Lee, W.S., Lee, G.H.: Convolutional sequence to sequence model for human dynamics. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5226\u20135234 (2018)","DOI":"10.1109\/CVPR.2018.00548"},{"key":"10_CR22","unstructured":"Li, Z., Zhou, Y., Xiao, S., He, C., Huang, Z., Li, H.: Auto-conditioned recurrent networks for extended complex human motion synthesis. arXiv preprint arXiv:1707.05363 (2017)"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Liang, H., Zhang, W., Li, W., Yu, J., Xu, L.: Intergen: diffusion-based multi-human motion generation under complex interactions. Int. J. Comput. Vision., 1\u201321 (2024)","DOI":"10.1007\/s11263-024-02042-6"},{"key":"10_CR24","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, 6\u20139 May 2019. OpenReview.net (2019). https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"10_CR25","doi-asserted-by":"publisher","unstructured":"Lucas, T., Baradel, F., Weinzaepfel, P., Rogez, G.: Posegpt: quantization-based 3d human motion generation and forecasting. In: Avidan, S., Brostow, G.J., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022 - 17th European Conference, Tel Aviv, Israel, 23\u201327 October 2022, Proceedings, Part VI. Lecture Notes in Computer Science, vol. 13666, pp. 417\u2013435. Springer, Heidelberg (2022). https:\/\/doi.org\/10.1007\/978-3-031-20068-7_24","DOI":"10.1007\/978-3-031-20068-7_24"},{"key":"10_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"474","DOI":"10.1007\/978-3-030-58568-6_28","volume-title":"Computer Vision \u2013 ECCV 2020","author":"W Mao","year":"2020","unstructured":"Mao, W., Liu, M., Salzmann, M.: History repeats itself: human motion prediction via motion attention. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 474\u2013489. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_28"},{"key":"10_CR27","doi-asserted-by":"publisher","unstructured":"Mao, W., Liu, M., Salzmann, M., Li, H.: Learning trajectory dependencies for human motion prediction. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October\u20132 November 2019, pp. 9488\u20139496. IEEE (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00958","DOI":"10.1109\/ICCV.2019.00958"},{"key":"10_CR28","doi-asserted-by":"crossref","unstructured":"Martinez, J., Black, M.J., Romero, J.: On human motion prediction using recurrent neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2891\u20132900 (2017)","DOI":"10.1109\/CVPR.2017.497"},{"key":"10_CR29","doi-asserted-by":"crossref","unstructured":"Mughal, M.H., Dabral, R., Habibie, I., Donatelli, L., Habermann, M., Theobalt, C.: Convofusion: multi-modal conversational diffusion for co-speech gesture synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1388\u20131398 (2024)","DOI":"10.1109\/CVPR52733.2024.00138"},{"key":"10_CR30","doi-asserted-by":"crossref","unstructured":"Ng, E., et al.: From audio to photoreal embodiment: synthesizing humans in conversations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1001\u20131010 (2024)","DOI":"10.1109\/CVPR52733.2024.00101"},{"key":"10_CR31","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: Meila, M., Zhang, T. (eds.) Proceedings of the 38th International Conference on Machine Learning, ICML 2021, 18\u201324 July 2021, Virtual Event. Proceedings of Machine Learning Research, vol.\u00a0139, pp. 8162\u20138171. PMLR (2021). http:\/\/proceedings.mlr.press\/v139\/nichol21a.html"},{"key":"10_CR32","doi-asserted-by":"publisher","unstructured":"Peng, X., Shen, Y., Wang, H., Nie, B., Wang, Y., Wu, Z.: Somoformer: social-aware motion transformer for multi-person motion prediction. CoRR arxiv:2208.09224 (2022). https:\/\/doi.org\/10.48550\/arXiv.2208.09224","DOI":"10.48550\/arXiv.2208.09224"},{"key":"10_CR33","doi-asserted-by":"publisher","unstructured":"Prokudin, S., Lassner, C., Romero, J.: Efficient learning on point clouds with basis point sets. In: 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), 27 October\u20132 November 2019, pp. 4331\u20134340. IEEE (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00443","DOI":"10.1109\/ICCV.2019.00443"},{"key":"10_CR34","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"10_CR35","doi-asserted-by":"crossref","unstructured":"R\u00f6smann, C., Oeljeklaus, M., Hoffmann, F., Bertram, T.: Online trajectory prediction and planning for social robot navigation. In: 2017 IEEE International Conference on Advanced Intelligent Mechatronics (AIM), pp. 1255\u20131260. IEEE (2017)","DOI":"10.1109\/AIM.2017.8014190"},{"key":"10_CR36","doi-asserted-by":"crossref","unstructured":"Sacks, H., Schegloff, E.A., Jefferson, G.: A simplest systematics for the organization of turn-taking for conversation. Language 50(4), 696\u2013735 (1974)","DOI":"10.1353\/lan.1974.0010"},{"key":"10_CR37","doi-asserted-by":"crossref","unstructured":"Schmidt, R.A.: Anticipation and timing in human motor performance. Psychol. Bull. 70(6p1), 631 (1968)","DOI":"10.1037\/h0026740"},{"key":"10_CR38","unstructured":"Tanke, J., Kwon, O., Mueller, F.B., Doering, A., Gall, J.: Humans in kitchens: a dataset for multi-person human motion forecasting with scene context. In: Oh, A., Naumann, T., Globerson, A., Saenko, K., Hardt, M., Levine, S. (eds.) Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, 10\u201316 December 2023 (2023). http:\/\/papers.nips.cc\/paper_files\/paper\/2023\/hash\/2052b3e0617ecb2ce9474a6feaf422b3-Abstract-Datasets_and_Benchmarks.html"},{"key":"10_CR39","doi-asserted-by":"crossref","unstructured":"Tanke, J., Zaveri, C., Gall, J.: Intention-based long-term human motion anticipation. In: 2021 International Conference on 3D Vision (3DV), pp. 596\u2013605. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00069"},{"key":"10_CR40","doi-asserted-by":"publisher","unstructured":"Tanke, J., et al.: Social diffusion: long-term multiple human motion anticipation. In: IEEE\/CVF International Conference on Computer Vision, ICCV 2023, Paris, France, 1\u20136 October 2023, pp. 9567\u20139577. IEEE (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.00880","DOI":"10.1109\/ICCV51070.2023.00880"},{"key":"10_CR41","doi-asserted-by":"crossref","unstructured":"Tseng, J., Castellon, R., Liu, K.: Edge: editable dance generation from music. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 448\u2013458 (2023)","DOI":"10.1109\/CVPR52729.2023.00051"},{"key":"10_CR42","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Guyon, I., et al. (eds.) Advances in Neural Information Processing Systems 30: Annual Conference on Neural Information Processing Systems 2017, Long Beach, CA, USA, 4\u20139 December 2017, pp. 5998\u20136008 (2017). https:\/\/proceedings.neurips.cc\/paper\/2017\/hash\/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html"},{"key":"10_CR43","doi-asserted-by":"publisher","unstructured":"Vendrow, E., Kumar, S., Adeli, E., Rezatofighi, H.: Somoformer: multi-person pose forecasting with transformers. CoRR arxiv:2208.14023 (2022). https:\/\/doi.org\/10.48550\/arXiv.2208.14023","DOI":"10.48550\/arXiv.2208.14023"},{"key":"10_CR44","unstructured":"Wang, J., Xu, H., Narasimhan, M., Wang, X.: Multi-person 3d motion prediction with multi-range transformers. In: Ranzato, M., Beygelzimer, A., Dauphin, Y.N., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems 34: Annual Conference on Neural Information Processing Systems 2021, NeurIPS 2021, 6\u201314 December 2021, Virtual, pp. 6036\u20136049 (2021). https:\/\/proceedings.neurips.cc\/paper\/2021\/hash\/2fd5d41ec6cfab47e32164d5624269b1-Abstract.html"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91581-9_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,6]],"date-time":"2025-09-06T16:05:30Z","timestamp":1757174730000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91581-9_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031915802","9783031915819"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91581-9_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}