{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,17]],"date-time":"2025-11-17T17:24:30Z","timestamp":1763400270816,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":105,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819608843"},{"type":"electronic","value":"9789819608850"}],"license":[{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T00:00:00Z","timestamp":1733529600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0885-0_12","type":"book-chapter","created":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T08:02:04Z","timestamp":1733472124000},"page":"203-224","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["RD-Diff: RLTransformer -Based Diffusion Model with\u00a0Diversity-Inducing Modulator for\u00a0Human Motion Prediction"],"prefix":"10.1007","author":[{"given":"Haosong","family":"Zhang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mei Chee","family":"Leong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liyuan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Weisi","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,7]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, H., Mascaro, E.V., Lee, D.: Can we use diffusion probabilistic models for 3d motion prediction? In: arXiv preprint arXiv:2302.14503 (2023)","DOI":"10.1109\/ICRA48891.2023.10160722"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Aksan, E., Kaufmann, M., Cao, P., Hilliges, O.: A spatio-temporal transformer for 3d human motion prediction. In: 2021 International Conference on 3D Vision (3DV). pp. 565\u2013574 (2021)","DOI":"10.1109\/3DV53792.2021.00066"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Aksan, E., Cao, P., Kaufmann, M., Hilliges, O.: Attention, please: A spatio-temporal transformer for 3d human motion prediction. arXiv preprint arXiv:2004.086922(3), 5 (2020)","DOI":"10.1109\/3DV53792.2021.00066"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Aksan, E., Kaufmann, M., Cao, P., Hilliges, O.: A spatio-temporal transformer for 3d human motion prediction. In: 2021 International Conference on 3D Vision (3DV). pp. 565\u2013574. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00066"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Aksan, E., Kaufmann, M., Hilliges, O.: Structured prediction helps 3d human motion modelling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 7144\u20137153 (2019)","DOI":"10.1109\/ICCV.2019.00724"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Aliakbarian, S., Saleh, F., Petersson, L., Gould, S., Salzmann, M.: Contextually plausible and diverse 3d human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 11333\u201311342 (2021)","DOI":"10.1109\/ICCV48922.2021.01114"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Aliakbarian, S., Saleh, F.S., Salzmann, M., Petersson, L., Gould, S.: A stochastic conditioning scheme for diverse human motion prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 5223\u20135232 (2020)","DOI":"10.1109\/CVPR42600.2020.00527"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Barquero, G., Escalera, S., Palmero, C.: Belfusion: Latent diffusion for behavior-driven human motion prediction. In: arXiv preprint arXiv:2211.14304 (2022)","DOI":"10.1109\/ICCV51070.2023.00220"},{"key":"12_CR9","doi-asserted-by":"crossref","unstructured":"Barsoum, E., Kender, J., Liu, Z.: Hp-gan: Probabilistic 3d human motion prediction via gan. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops. pp. 1418\u20131427 (2018)","DOI":"10.1109\/CVPRW.2018.00191"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Barsoum, E., Kender, J., Liu, Z.: Hp-gan: Probabilistic 3d human motion prediction via gan. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops. pp. 1418\u20131427 (2018)","DOI":"10.1109\/CVPRW.2018.00191"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Bhattacharyya, A., Schiele, B., Fritz, M.: Accurate and diverse sampling of sequences based on a \u201cbest of many\u201d sample objective. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 8485\u20138493 (2018)","DOI":"10.1109\/CVPR.2018.00885"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Bhattacharyya, A., Schiele, B., Fritz, M.: Accurate and diverse sampling of sequences based on a \u201cbest of many\u201d sample objective. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 8485\u20138493 (2018)","DOI":"10.1109\/CVPR.2018.00885"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Bouazizi, A., Holzbock, A., Kressel, U., Dietmayer, K., Belagiannis, V.: Motionmixer: Mlp-based 3d human body pose forecasting. In: arXiv preprint arXiv:2207.00499 (2022)","DOI":"10.24963\/ijcai.2022\/111"},{"key":"12_CR14","unstructured":"Cai, H., Gan, C., Han, S.: Efficientvit: Enhanced linear attention for high-resolution low-computation visual recognition. arXiv preprint arXiv:2205.14756 (2022)"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Cai, H., Li, J., Hu, M., Gan, C., Han, S.: Efficientvit: Multi-scale linear attention for high-resolution dense prediction (2024)","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"12_CR16","doi-asserted-by":"publisher","unstructured":"Cai, Y., Huang, L., Wang, Y., Cham, T.-J., Cai, J., Yuan, J., Liu, J., Yang, X., Zhu, Y., Shen, X., Liu, D., Liu, J., Thalmann, N.M.: Learning Progressive Joint Propagation for Human Motion Prediction. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12352, pp. 226\u2013242. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58571-6_14","DOI":"10.1007\/978-3-030-58571-6_14"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Cai, Y., Huang, L., Wang, Y., Cham, T.J., Cai, J., Yuan, J., Liu, J., Yang, X., Zhu, Y., Shen, X., et\u00a0al.: Learning progressive joint propagation for human motion prediction. In: European Conference on Computer Vision. pp. 226\u2013242. Springer (2020)","DOI":"10.1007\/978-3-030-58571-6_14"},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Cai, Y., Wang, Y., Zhu, Y., Cham, T.J., Cai, J., Yuan, J., Liu, J., Zheng, C., Yan, S., Ding, H., et\u00a0al.: A unified 3d human motion synthesis model via conditional variational auto-encoder. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 11645\u201311655 (2021)","DOI":"10.1109\/ICCV48922.2021.01144"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Chen, L.H., Zhang, J., Li, Y., Pang, Y., Xia, X., Liu, T.: Humanmac: Masked motion completion for human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 9544\u20139555 (October 2023)","DOI":"10.1109\/ICCV51070.2023.00875"},{"key":"12_CR20","doi-asserted-by":"crossref","unstructured":"Chiu, H.k., Adeli, E., Wang, B., Huang, D.A., Niebles, J.C.: Action-agnostic human pose forecasting. In: 2019 IEEE Winter Conference on Applications of Computer Vision (WACV). pp. 1423\u20131432. IEEE (2019)","DOI":"10.1109\/WACV.2019.00156"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Corona, E., Pumarola, A., Alenya, G., Moreno-Noguer, F.: Context-aware human motion prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 6992\u20137001 (2020)","DOI":"10.1109\/CVPR42600.2020.00702"},{"issue":"9","key":"12_CR22","doi-asserted-by":"publisher","first-page":"10850","DOI":"10.1109\/tpami.2023.3261988","volume":"45","author":"FA Croitoru","year":"2023","unstructured":"Croitoru, F.A., Hondru, V., Ionescu, R.T., Shah, M.: Diffusion models in vision: A survey. IEEE Trans. Pattern Anal. Mach. Intell. 45(9), 10850\u201310869 (2023). https:\/\/doi.org\/10.1109\/tpami.2023.3261988","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Cui, Q., Sun, H.: Towards accurate 3d human motion prediction from incomplete observations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 4801\u20134810 (2021)","DOI":"10.1109\/CVPR46437.2021.00477"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Cui, Q., Sun, H., Yang, F.: Learning dynamic relationships for 3d human motion prediction. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 6519\u20136527 (2020)","DOI":"10.1109\/CVPR42600.2020.00655"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Dang, L., Nie, Y., Long, C., Zhang, Q., Li, G.: Msr-gcn: Multi-scale residual graph convolution networks for human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 11467\u201311476 (2021)","DOI":"10.1109\/ICCV48922.2021.01127"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Dang, L., Nie, Y., Long, C., Zhang, Q., Li, G.: Diverse human motion prediction via gumbel-softmax sampling from an auxiliary space. In: Proceedings of the 30th ACM International Conference on Multimedia. pp. 5162\u20135171 (2022)","DOI":"10.1145\/3503161.3547956"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Dang, L., Nie, Y., Long, C., Zhang, Q., Li, G.: Msr-gcn: Multi-scale residual graph convolution networks for human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 11467\u201311476 (2021)","DOI":"10.1109\/ICCV48922.2021.01127"},{"key":"12_CR28","unstructured":"Dhariwal, P., Nichol, A.: Diffusion models beat gans on image synthesis. In: Advances in Neural Information Processing Systems. vol.\u00a034, pp. 8780\u20138794 (2021)"},{"key":"12_CR29","unstructured":"Dilokthanakul, N., Mediano, P.A., Garnelo, M., Lee, M.C., Salimbeni, H., Arulkumaran, K., Shanahan, M.: Deep unsupervised clustering with gaussian mixture variational autoencoders. arXiv preprint arXiv:1611.02648 (2016)"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Levine, S., Felsen, P., Malik, J.: Recurrent network models for human dynamics. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 4346\u20134354 (2015)","DOI":"10.1109\/ICCV.2015.494"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Fragkiadaki, K., Levine, S., Felsen, P., Malik, J.: Recurrent network models for human dynamics. In: Proceedings of the IEEE international conference on computer vision. pp. 4346\u20134354 (2015)","DOI":"10.1109\/ICCV.2015.494"},{"key":"12_CR32","unstructured":"Goodfellow, I., Pouget-Abadie, J., Mirza, M., Xu, B., Warde-Farley, D., Ozair, S., Courville, A., Bengio, Y.: Generative adversarial nets. Advances in neural information processing systems 27 (2014)"},{"key":"12_CR33","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109998","volume":"146","author":"C Gu","year":"2024","unstructured":"Gu, C., Yu, J., Zhang, C.: Learning disentangled representations for controllable human motion prediction. Pattern Recogn. 146, 109998 (2024)","journal-title":"Pattern Recogn."},{"key":"12_CR34","doi-asserted-by":"crossref","unstructured":"Guo, W., Du, Y., Shen, X., Lepetit, V., Alameda-Pineda, X., Moreno-Noguer, F.: Back to mlp: A simple baseline for human motion prediction. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. pp. 4809\u20134819 (2023)","DOI":"10.1109\/WACV56688.2023.00479"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Gurumurthy, S., Kiran\u00a0Sarvadevabhatla, R., Venkatesh\u00a0Babu, R.: Deligan: Generative adversarial networks for diverse and limited data. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 166\u2013174 (2017)","DOI":"10.1109\/CVPR.2017.525"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Gurumurthy, S., Kiran\u00a0Sarvadevabhatla, R., Venkatesh\u00a0Babu, R.: Deligan: Generative adversarial networks for diverse and limited data. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 166\u2013174 (2017)","DOI":"10.1109\/CVPR.2017.525"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Han, D., Pan, X., Han, Y., Song, S., Huang, G.: Flatten transformer: Vision transformer using focused linear attention (2023)","DOI":"10.1109\/ICCV51070.2023.00548"},{"key":"12_CR38","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: Advances in Neural Information Processing Systems. vol.\u00a033, pp. 6840\u20136851 (2020)"},{"key":"12_CR39","doi-asserted-by":"crossref","unstructured":"Ionescu, C., Papava, D., Olaru, V., Sminchisescu, C.: Human3. 6m: Large scale datasets and predictive methods for 3d human sensing in natural environments. IEEE transactions on pattern analysis and machine intelligence 36(7), 1325\u20131339 (2013)","DOI":"10.1109\/TPAMI.2013.248"},{"key":"12_CR40","doi-asserted-by":"crossref","unstructured":"Jain, A., Zamir, A.R., Savarese, S., Saxena, A.: Structural-rnn: Deep learning on spatio-temporal graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 5308\u20135317 (2016)","DOI":"10.1109\/CVPR.2016.573"},{"key":"12_CR41","unstructured":"Katharopoulos, A., Vyas, A., Pappas, N., Fleuret, F.: Transformers are rnns: Fast autoregressive transformers with linear attention. In: International Conference on Machine Learning. pp. 5156\u20135165. PMLR (2020)"},{"issue":"1","key":"12_CR42","first-page":"31","volume":"114","author":"SA Khayam","year":"2003","unstructured":"Khayam, S.A.: The discrete cosine transform (dct): theory and application. Michigan State University 114(1), 31 (2003)","journal-title":"Michigan State University"},{"key":"12_CR43","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. In: arXiv preprint arXiv:1412.6980 (2014)"},{"key":"12_CR44","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"12_CR45","doi-asserted-by":"crossref","unstructured":"Kundu, J.N., Gor, M., Babu, R.V.: Bihmp-gan: Bidirectional 3d human motion prediction gan. In: Proceedings of the AAAI conference on artificial intelligence. vol.\u00a033, pp. 8553\u20138560 (2019)","DOI":"10.1609\/aaai.v33i01.33018553"},{"key":"12_CR46","doi-asserted-by":"crossref","unstructured":"Lee, M.L., Behdad, S., Liang, X., Zheng, M.: Task allocation and planning for product disassembly with human\u2013robot collaboration. In: Robotics and Computer-Integrated Manufacturing. vol.\u00a076, p. 102306 (2022)","DOI":"10.1016\/j.rcim.2021.102306"},{"key":"12_CR47","doi-asserted-by":"crossref","unstructured":"Lee, M.L., Liu, W., Behdad, S., Liang, X., Zheng, M.: Robot-assisted disassembly sequence planning with real-time human motion prediction. In: IEEE Transactions on Systems, Man, and Cybernetics: Systems. vol.\u00a053, pp. 438\u2013450 (2022)","DOI":"10.1109\/TSMC.2022.3185889"},{"key":"12_CR48","doi-asserted-by":"publisher","first-page":"2562","DOI":"10.1109\/TIP.2020.3038362","volume":"30","author":"B Li","year":"2020","unstructured":"Li, B., Tian, J., Zhang, Z., Feng, H., Li, X.: Multitask non-autoregressive model for human motion prediction. IEEE Trans. Image Process. 30, 2562\u20132574 (2020)","journal-title":"IEEE Trans. Image Process."},{"key":"12_CR49","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Zhao, Y., Zhang, Y., Wang, Y., Tian, Q.: Dynamic multiscale graph neural networks for 3d skeleton-based human motion prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 214\u2013223 (2020)","DOI":"10.1109\/CVPR42600.2020.00029"},{"key":"12_CR50","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Chen, X., Zhang, Y., Wang, Y., Tian, Q.: Symbiotic graph neural networks for 3d skeleton-based human action recognition and motion prediction. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021)","DOI":"10.1109\/TPAMI.2021.3053765"},{"key":"12_CR51","doi-asserted-by":"crossref","unstructured":"Li, M., Chen, S., Zhao, Y., Zhang, Y., Wang, Y., Tian, Q.: Dynamic multiscale graph neural networks for 3d skeleton based human motion prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 214\u2013223 (2020)","DOI":"10.1109\/CVPR42600.2020.00029"},{"key":"12_CR52","unstructured":"Li, R., Su, J., Duan, C., Zheng, S.: Linear attention mechanism: An efficient attention for semantic segmentation (2020), https:\/\/arxiv.org\/abs\/2007.14902"},{"key":"12_CR53","unstructured":"Li, Z., Zhou, Y., Xiao, S., He, C., Huang, Z., Li, H.: Auto-conditioned recurrent networks for extended complex human motion synthesis. arXiv preprint arXiv:1707.05363 (2017)"},{"key":"12_CR54","doi-asserted-by":"crossref","unstructured":"Liu, W., Liang, X., Zheng, M.: Dynamic model informed human motion prediction based on unscented kalman filter. In: IEEE\/ASME Transactions on Mechatronics. vol.\u00a027, pp. 5287\u20135295 (2022)","DOI":"10.1109\/TMECH.2022.3173167"},{"key":"12_CR55","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lyu, K., Wu, S., Chen, H., Hao, Y., Ji, S.: Aggregated multi-gans for controlled 3d human motion prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a035, pp. 2225\u20132232 (2021)","DOI":"10.1609\/aaai.v35i3.16321"},{"key":"12_CR56","doi-asserted-by":"crossref","unstructured":"Liu, Z., Su, P., Wu, S., Shen, X., Chen, H., Hao, Y., Wang, M.: Motion prediction using trajectory cues. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 13299\u201313308 (2021)","DOI":"10.1109\/ICCV48922.2021.01305"},{"key":"12_CR57","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wu, S., Jin, S., Liu, Q., Ji, S., Lu, S., Cheng, L.: Investigating pose representations and motion contexts modeling for 3d motion prediction. IEEE Transactions on Pattern Analysis and Machine Intelligence (2022)","DOI":"10.1109\/TPAMI.2021.3139918"},{"key":"12_CR58","doi-asserted-by":"crossref","unstructured":"Lugmayr, A., Danelljan, M., Romero, A., Yu, F., Timofte, R., Van\u00a0Gool, L.: Repaint: Inpainting using denoising diffusion probabilistic models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 11461\u201311471 (2022)","DOI":"10.1109\/CVPR52688.2022.01117"},{"key":"12_CR59","doi-asserted-by":"crossref","unstructured":"Lyu, K., Liu, Z., Wu, S., Chen, H., Zhang, X., Yin, Y.: Learning human motion prediction via stochastic differential equations. In: Proceedings of the 29th ACM International Conference on Multimedia. pp. 4976\u20134984 (2021)","DOI":"10.1145\/3474085.3475630"},{"key":"12_CR60","doi-asserted-by":"crossref","unstructured":"Ma, H., Li, J., Hosseini, R., Tomizuka, M., Choi, C.: Multi-objective diverse human motion prediction with knowledge distillation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 8161\u20138171 (2022)","DOI":"10.1109\/CVPR52688.2022.00799"},{"key":"12_CR61","doi-asserted-by":"crossref","unstructured":"Ma, T., Nie, Y., Long, C., Zhang, Q., Li, G.: Progressively generating better initial guesses towards next stages for high-quality human motion prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR). pp. 6437\u20136446 (June 2022)","DOI":"10.1109\/CVPR52688.2022.00633"},{"key":"12_CR62","doi-asserted-by":"publisher","unstructured":"Mao, W., Liu, M., Salzmann, M.: History Repeats Itself: Human Motion Prediction via Motion Attention. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 474\u2013489. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_28","DOI":"10.1007\/978-3-030-58568-6_28"},{"key":"12_CR63","doi-asserted-by":"crossref","unstructured":"Mao, W., Liu, M., Salzmann, M.: Generating smooth pose sequences for diverse human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 13309\u201313318 (2021)","DOI":"10.1109\/ICCV48922.2021.01306"},{"key":"12_CR64","doi-asserted-by":"crossref","unstructured":"Mao, W., Liu, M., Salzmann, M., Li, H.: Learning trajectory dependencies for human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 9489\u20139497 (2019)","DOI":"10.1109\/ICCV.2019.00958"},{"key":"12_CR65","doi-asserted-by":"crossref","unstructured":"Mao, W., Liu, M., Salzmann, M.: History repeats itself: Human motion prediction via motion attention. In: European Conference on Computer Vision. pp. 474\u2013489. Springer (2020)","DOI":"10.1007\/978-3-030-58568-6_28"},{"key":"12_CR66","doi-asserted-by":"crossref","unstructured":"Mao, W., Liu, M., Salzmann, M.: Generating smooth pose sequences for diverse human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 13309\u201313318 (2021)","DOI":"10.1109\/ICCV48922.2021.01306"},{"key":"12_CR67","doi-asserted-by":"crossref","unstructured":"Mao, W., Liu, M., Salzmann, M., Li, H.: Learning trajectory dependencies for human motion prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 9489\u20139497 (2019)","DOI":"10.1109\/ICCV.2019.00958"},{"key":"12_CR68","doi-asserted-by":"crossref","unstructured":"Martinez, J., Black, M.J., Romero, J.: On human motion prediction using recurrent neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 2891\u20132900 (2017)","DOI":"10.1109\/CVPR.2017.497"},{"key":"12_CR69","doi-asserted-by":"crossref","unstructured":"Martinez, J., Black, M.J., Romero, J.: On human motion prediction using recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition. pp. 2891\u20132900 (2017)","DOI":"10.1109\/CVPR.2017.497"},{"key":"12_CR70","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez-Gonz\u00e1lez, A., Villamizar, M., Odobez, J.M.: Pose transformers (potr): Human motion prediction with non-autoregressive transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 2276\u20132284 (2021)","DOI":"10.1109\/ICCVW54120.2021.00257"},{"key":"12_CR71","doi-asserted-by":"crossref","unstructured":"Mart\u00ednez-Gonz\u00e1lez, A., Villamizar, M., Odobez, J.M.: Pose transformers (potr): Human motion prediction with non-autoregressive transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 2276\u20132284 (2021)","DOI":"10.1109\/ICCVW54120.2021.00257"},{"key":"12_CR72","unstructured":"Nichol, A.Q., Dhariwal, P.: Improved denoising diffusion probabilistic models. In: International Conference on Machine Learning. pp. 8162\u20138171 (2021)"},{"key":"12_CR73","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: International Conference on Machine Learning. pp. 1310\u20131318 (2013)"},{"key":"12_CR74","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Chintala, S.: Pytorch: An imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems. vol.\u00a032 (2019)"},{"key":"12_CR75","unstructured":"Pavllo, D., Grangier, D., Auli, M.: Quaternet: A quaternion-based recurrent model for human motion. In: arXiv preprint arXiv:1805.06485 (2018)"},{"key":"12_CR76","doi-asserted-by":"crossref","unstructured":"Perez, E., Strub, F., de\u00a0Vries, H., Dumoulin, V., Courville, A.: Film: Visual reasoning with a general conditioning layer (2017)","DOI":"10.1609\/aaai.v32i1.11671"},{"key":"12_CR77","unstructured":"Qin, Z., Sun, W., Deng, H., Li, D., Wei, Y., Lv, B., Yan, J., Kong, L., Zhong, Y.: cosformer: Rethinking softmax in attention. In: International Conference on Learning Representations (2022)"},{"key":"12_CR78","doi-asserted-by":"crossref","unstructured":"Rombach, R., Blattmann, A., Lorenz, D., Esser, P., Ommer, B.: High-resolution image synthesis with latent diffusion models. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 10684\u201310695 (2022)","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"12_CR79","doi-asserted-by":"crossref","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-net: Convolutional networks for biomedical image segmentation (2015)","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"12_CR80","doi-asserted-by":"crossref","unstructured":"Saadatnejad, S., Rasekh, A., Mofayezi, M., Medghalchi, Y., Rajabzadeh, S., Mordan, T., Alahi, A.: A generic diffusion-based approach for 3d human pose prediction in the wild. In: 2023 IEEE International Conference on Robotics and Automation (ICRA). pp. 8246\u20138253 (2023)","DOI":"10.1109\/ICRA48891.2023.10160399"},{"key":"12_CR81","doi-asserted-by":"crossref","unstructured":"Sajedi, S., Liu, W., Eltouny, K., Behdad, S., Zheng, M., Liang, X.: Uncertainty-assisted image-processing for human-robot close collaboration. In: IEEE Robotics and Automation Letters. vol.\u00a07, pp. 4236\u20134243 (2022)","DOI":"10.1109\/LRA.2022.3150487"},{"issue":"9","key":"12_CR82","doi-asserted-by":"publisher","first-page":"5529","DOI":"10.1007\/s11042-019-08269-7","volume":"79","author":"HF Sang","year":"2020","unstructured":"Sang, H.F., Chen, Z.Z., He, D.K.: Human motion prediction based on attention mechanism. Multimedia Tools and Applications 79(9), 5529\u20135544 (2020)","journal-title":"Multimedia Tools and Applications"},{"key":"12_CR83","unstructured":"Si, C., Huang, Z., Jiang, Y., Liu, Z.: Freeu: Free lunch in diffusion u-net (2023), https:\/\/arxiv.org\/abs\/2309.11497"},{"issue":"1","key":"12_CR84","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11263-009-0273-6","volume":"87","author":"L Sigal","year":"2010","unstructured":"Sigal, L., Balan, A.O., Black, M.J.: Humaneva: Synchronized video and motion capture dataset and baseline algorithm for evaluation of articulated human motion. Int. J. Comput. Vision 87(1), 4\u201327 (2010)","journal-title":"Int. J. Comput. Vision"},{"key":"12_CR85","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: arXiv preprint arXiv:2010.02502 (2020)"},{"key":"12_CR86","doi-asserted-by":"crossref","unstructured":"Song, S., Lan, C., Xing, J., Zeng, W., Liu, J.: An end-to-end spatio-temporal attention model for human action recognition from skeleton data. In: Proceedings of the AAAI conference on artificial intelligence. vol.\u00a031 (2017)","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"12_CR87","doi-asserted-by":"crossref","unstructured":"Su, P., Liu, Z., Wu, S., Zhu, L., Yin, Y., Shen, X.: Motion prediction via joint dependency modeling in phase space. In: Proceedings of the 29th ACM International Conference on Multimedia. pp. 713\u2013721 (2021)","DOI":"10.1145\/3474085.3475237"},{"key":"12_CR88","unstructured":"Tang, J., Sun, J., Lin, X., Zheng, W.S., Hu, J.F., et\u00a0al.: Temporal continual learning with prior compensation for human motion prediction. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"12_CR89","doi-asserted-by":"crossref","unstructured":"Tanke, J., Zaveri, C., Gall, J.: Intention-based long-term human motion anticipation. In: 2021 International Conference on 3D Vision (3DV). pp. 596\u2013605. IEEE (2021)","DOI":"10.1109\/3DV53792.2021.00069"},{"key":"12_CR90","doi-asserted-by":"crossref","unstructured":"Tian, S., Liang, X., Zheng, M.: An optimization-based human behavior modeling and prediction for human-robot collaborative disassembly. In: 2023 American Control Conference (ACC). pp. 3356\u20133361 (2023)","DOI":"10.23919\/ACC55779.2023.10156342"},{"key":"12_CR91","doi-asserted-by":"crossref","unstructured":"Tian, S., Zheng, M., Liang, X.: Transfusion: A practical and effective transformer-based diffusion model for 3d human motion prediction (2023)","DOI":"10.1109\/LRA.2024.3401116"},{"key":"12_CR92","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"12_CR93","doi-asserted-by":"crossref","unstructured":"Walker, J., Marino, K., Gupta, A., Hebert, M.: The pose knows: Video forecasting by generating pose futures. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 3332\u20133341 (2017)","DOI":"10.1109\/ICCV.2017.361"},{"key":"12_CR94","doi-asserted-by":"crossref","unstructured":"Walker, J., Marino, K., Gupta, A., Hebert, M.: The pose knows: Video forecasting by generating pose futures. In: Proceedings of the IEEE international conference on computer vision. pp. 3332\u20133341 (2017)","DOI":"10.1109\/ICCV.2017.361"},{"key":"12_CR95","unstructured":"Wang, J., Xu, H., Narasimhan, M., Wang, X.: Multi-person 3d motion prediction with multi-range transformers. In: Advances in Neural Information Processing Systems. vol.\u00a034, pp. 6036\u20136049 (2021)"},{"key":"12_CR96","doi-asserted-by":"crossref","unstructured":"Wei, D., Sun, H., Li, B., Lu, J., Li, W., Sun, X., Hu, S.: Human joint kinematics diffusion-refinement for stochastic motion prediction. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a037, pp. 6110\u20136118 (2023)","DOI":"10.1609\/aaai.v37i5.25754"},{"key":"12_CR97","doi-asserted-by":"publisher","unstructured":"Xu, S., Wang, Y.X., Gui, L.Y.: Diverse Human Motion Prediction Guided by Multi-level Spatial-Temporal Anchors, p. 251\u2013269. Springer Nature Switzerland (2022). https:\/\/doi.org\/10.1007\/978-3-031-20047-2_15","DOI":"10.1007\/978-3-031-20047-2_15"},{"key":"12_CR98","doi-asserted-by":"crossref","unstructured":"Yan, X., Rastogi, A., Villegas, R., Sunkavalli, K., Shechtman, E., Hadap, S., Lee, H.: Mt-vae: Learning motion transformations to generate multimodal human dynamics. In: Proceedings of the European conference on computer vision (ECCV). pp. 265\u2013281 (2018)","DOI":"10.1007\/978-3-030-01228-1_17"},{"key":"12_CR99","doi-asserted-by":"crossref","unstructured":"Yan, X., Rastogi, A., Villegas, R., Sunkavalli, K., Shechtman, E., Hadap, S., Yumer, E., Lee, H.: Mt-vae: Learning motion transformations to generate multimodal human dynamics. In: Proceedings of the European conference on computer vision (ECCV). pp. 265\u2013281 (2018)","DOI":"10.1007\/978-3-030-01228-1_17"},{"key":"12_CR100","unstructured":"Yuan, Y., Kitani, K.: Diverse trajectory forecasting with determinantal point processes. In: arXiv preprint arXiv:1907.04967 (2019)"},{"key":"12_CR101","unstructured":"Yuan, Y., Kitani, K.: Diverse trajectory forecasting with determinantal point processes. arXiv preprint arXiv:1907.04967 (2019)"},{"key":"12_CR102","doi-asserted-by":"crossref","unstructured":"Yuan, Y., Kitani, K.: Dlow: Diversifying latent flows for diverse human motion prediction. In: European Conference on Computer Vision. pp. 346\u2013364. Springer (2020)","DOI":"10.1007\/978-3-030-58545-7_20"},{"key":"12_CR103","doi-asserted-by":"crossref","unstructured":"Zhang, X., Yi, D., Behdad, S., Saxena, S.: Unsupervised human activity recognition learning for disassembly tasks. In: IEEE Transactions on Industrial Informatics (2023)","DOI":"10.1109\/TII.2023.3264284"},{"key":"12_CR104","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Black, M.J., Tang, S.: We are more than our joints: Predicting how 3d bodies move. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 3372\u20133382 (2021)","DOI":"10.1109\/CVPR46437.2021.00338"},{"key":"12_CR105","unstructured":"Zhao, G., Lin, J., Zhang, Z., Ren, X., Su, Q., Sun, X.: Explicit sparse transformer: Concentrated attention through explicit selection. arXiv preprint arXiv:1912.11637 (2019)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0885-0_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T08:09:25Z","timestamp":1733472565000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0885-0_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,7]]},"ISBN":["9789819608843","9789819608850"],"references-count":105,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0885-0_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,7]]},"assertion":[{"value":"7 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}