{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T10:05:03Z","timestamp":1742983503760,"version":"3.40.3"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031729911"},{"type":"electronic","value":"9783031729928"}],"license":[{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T00:00:00Z","timestamp":1730246400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72992-8_18","type":"book-chapter","created":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T08:29:02Z","timestamp":1730190542000},"page":"311-328","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Enhanced Motion Forecasting with\u00a0Visual Relation Reasoning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8765-2332","authenticated-orcid":false,"given":"Sungjune","family":"Kim","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9028-6779","authenticated-orcid":false,"given":"Hadam","family":"Baek","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4171-7269","authenticated-orcid":false,"given":"Seunggwan","family":"Lee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5454-3404","authenticated-orcid":false,"given":"Hyung-gun","family":"Chi","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3369-8169","authenticated-orcid":false,"given":"Hyerin","family":"Lim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6520-2074","authenticated-orcid":false,"given":"Jinkyu","family":"Kim","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7349-0018","authenticated-orcid":false,"given":"Sangpil","family":"Kim","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,30]]},"reference":[{"key":"18_CR1","unstructured":"Abu-El-Haija, S., et al.: MixHop: higher-order graph convolutional architectures via sparsified neighborhood mixing. In: International Conference on Machine Learning, pp. 21\u201329. PMLR (2019)"},{"key":"18_CR2","doi-asserted-by":"crossref","unstructured":"Caesar, H., et al.: Nuscenes: a multimodal dataset for autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11621\u201311631 (2020)","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"18_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1007\/978-3-030-58592-1_37","volume-title":"Computer Vision \u2013 ECCV 2020","author":"S Casas","year":"2020","unstructured":"Casas, S., Gulino, C., Suo, S., Luo, K., Liao, R., Urtasun, R.: Implicit latent variable model for scene-consistent motion forecasting. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12368, pp. 624\u2013641. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_37"},{"key":"18_CR4","unstructured":"Casas, S., Luo, W., Urtasun, R.: Intentnet: learning to predict intention from raw sensor data. In: Conference on Robot Learning, pp. 947\u2013956. PMLR (2018)"},{"issue":"1","key":"18_CR5","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2021.3137605","volume":"45","author":"X Chang","year":"2021","unstructured":"Chang, X., Ren, P., Xu, P., Li, Z., Chen, X., Hauptmann, A.: A comprehensive survey of scene graphs: generation and application. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 1\u201326 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"18_CR6","first-page":"16975","volume":"35","author":"M Chatterjee","year":"2022","unstructured":"Chatterjee, M., Ahuja, N., Cherian, A.: Learning audio-visual dynamics using scene graphs for audio source separation. Adv. Neural. Inf. Process. Syst. 35, 16975\u201316988 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"18_CR7","unstructured":"Contributors, M.: MMCV: openmmlab computer vision foundation (2018). https:\/\/github.com\/open-mmlab\/mmcv"},{"key":"18_CR8","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"18_CR9","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"18_CR10","doi-asserted-by":"crossref","unstructured":"Gu, J., et al.: Vip3d: end-to-end visual trajectory prediction via 3d agent queries. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5496\u20135506 (2023)","DOI":"10.1109\/CVPR52729.2023.00532"},{"key":"18_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"18_CR13","doi-asserted-by":"publisher","first-page":"4282","DOI":"10.1103\/PhysRevE.51.4282","volume":"51","author":"D Helbing","year":"1995","unstructured":"Helbing, D., Molnar, P.: Social force model for pedestrian dynamics. Phys. Rev. E 51(5), 4282 (1995)","journal-title":"Phys. Rev. E"},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Hu, A., Murez, Z., et al.: Fiery: future instance prediction in bird\u2019s-eye view from surround monocular cameras. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15273\u201315282 (2021)","DOI":"10.1109\/ICCV48922.2021.01499"},{"key":"18_CR15","doi-asserted-by":"crossref","unstructured":"Hu, Y., et al.: Planning-oriented autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 17853\u201317862 (2023)","DOI":"10.1109\/CVPR52729.2023.01712"},{"key":"18_CR16","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., Fei-Fei, L.: Image generation from scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1219\u20131228 (2018)","DOI":"10.1109\/CVPR.2018.00133"},{"key":"18_CR17","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Jung, D., Kim, S., Kim, W.H., Cho, M.: Devil\u2019s on the edges: selective quad attention for scene graph generation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18664\u201318674 (2023)","DOI":"10.1109\/CVPR52729.2023.01790"},{"key":"18_CR19","doi-asserted-by":"crossref","unstructured":"Kim, S., Chi, H.G., Lim, H., Ramani, K., Kim, J., Kim, S.: Higher-order relational reasoning for pedestrian trajectory prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15251\u201315260 (2024)","DOI":"10.1109\/CVPR52733.2024.01444"},{"key":"18_CR20","unstructured":"Kipf, T.N., Welling, M.: Semi-supervised classification with graph convolutional networks. In: International Conference on Learning Representations (2016)"},{"issue":"8","key":"18_CR21","doi-asserted-by":"publisher","first-page":"2117","DOI":"10.1109\/TMM.2019.2896516","volume":"21","author":"X Li","year":"2019","unstructured":"Li, X., Jiang, S.: Know more say less: image captioning based on scene graphs. IEEE Trans. Multimedia 21(8), 2117\u20132130 (2019)","journal-title":"IEEE Trans. Multimedia"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Liang, M., et al.: End-to-end perception and prediction with tracking in the loop. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11553\u201311562 (2020)","DOI":"10.1109\/CVPR42600.2020.01157"},{"key":"18_CR23","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"18_CR24","doi-asserted-by":"crossref","unstructured":"Luo, W., Yang, B., Urtasun, R.: Fast and furious: real time end-to-end 3d detection, tracking and motion forecasting with a single convolutional net. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3569\u20133577 (2018)","DOI":"10.1109\/CVPR.2018.00376"},{"key":"18_CR25","unstructured":"Van\u00a0der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9(11) (2008)"},{"key":"18_CR26","doi-asserted-by":"crossref","unstructured":"Nayakanti, N., Al-Rfou, R., Zhou, A., Goel, K., Refaat, K.S., Sapp, B.: Wayformer: motion forecasting via simple & efficient attention networks. In: 2023 IEEE International Conference on Robotics and Automation (ICRA), pp. 2980\u20132987. IEEE (2023)","DOI":"10.1109\/ICRA48891.2023.10160609"},{"key":"18_CR27","unstructured":"Paszke, A., et al.: Pytorch: an imperative style, high-performance deep learning library. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"18_CR28","doi-asserted-by":"crossref","unstructured":"Phillips, J., Martinez, J., B\u00e2rsan, I.A., Casas, S., Sadat, A., Urtasun, R.: Deep multi-task learning for joint localization, perception, and prediction. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4679\u20134689 (2021)","DOI":"10.1109\/CVPR46437.2021.00465"},{"key":"18_CR29","unstructured":"Roh, W., et al.: Ora3d: overlap region aware multi-view 3d object detection. arXiv preprint arXiv:2207.00865 (2022)"},{"key":"18_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1007\/978-3-030-58592-1_25","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Sadat","year":"2020","unstructured":"Sadat, A., Casas, S., Ren, M., Wu, X., Dhawan, P., Urtasun, R.: Perceive, predict, and plan: safe motion planning through interpretable semantic representations. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020 Part XXIII. LNCS, vol. 12368, pp. 414\u2013430. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_25"},{"key":"18_CR31","doi-asserted-by":"crossref","unstructured":"Simonelli, A., Bulo, S.R., Porzi, L., L\u00f3pez-Antequera, M., Kontschieder, P.: Disentangling monocular 3d object detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1991\u20131999 (2019)","DOI":"10.1109\/ICCV.2019.00208"},{"key":"18_CR32","unstructured":"Sohn, K., Lee, H., Yan, X.: Learning structured output representation using deep conditional generative models. Adv. Neural Inf. Process. Syst. 28 (2015)"},{"key":"18_CR33","doi-asserted-by":"crossref","unstructured":"Tang, K., Niu, Y., Huang, J., Shi, J., Zhang, H.: Unbiased scene graph generation from biased training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3716\u20133725 (2020)","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"18_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"18_CR35","doi-asserted-by":"crossref","unstructured":"Wu, P., Chen, S., Metaxas, D.N.: Motionnet: joint perception and motion prediction for autonomous driving based on bird\u2019s eye view maps. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11385\u201311395 (2020)","DOI":"10.1109\/CVPR42600.2020.01140"},{"issue":"1","key":"18_CR36","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/TNNLS.2020.2978386","volume":"32","author":"Z Wu","year":"2020","unstructured":"Wu, Z., Pan, S., Chen, F., Long, G., Zhang, C., Philip, S.Y.: A comprehensive survey on graph neural networks. IEEE Trans. Neural Netw. Learn. Syst. 32(1), 4\u201324 (2020)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"18_CR37","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph r-CNN for scene graph generation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 670\u2013685 (2018)","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"18_CR39","doi-asserted-by":"crossref","unstructured":"Yang, X., Tang, K., Zhang, H., Cai, J.: Auto-encoding scene graphs for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10685\u201310694 (2019)","DOI":"10.1109\/CVPR.2019.01094"},{"key":"18_CR40","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"18_CR41","series-title":"LNCS","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1007\/978-3-031-19812-0_38","volume-title":"ECCV 2022","author":"F Zeng","year":"2022","unstructured":"Zeng, F., Dong, B., Zhang, Y., Wang, T., Zhang, X., Wei, Y.: MOTR: end-to-end multiple-object tracking with transformer. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13687, pp. 659\u2013675. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19812-0_38"},{"key":"18_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, T., Chen, X., Wang, Y., Wang, Y., Zhao, H.: Mutr3d: a multi-camera tracking framework via 3d-to-2d queries. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4537\u20134546 (2022)","DOI":"10.1109\/CVPRW56347.2022.00500"},{"key":"18_CR43","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1016\/j.aiopen.2021.01.001","volume":"1","author":"J Zhou","year":"2020","unstructured":"Zhou, J., et al.: Graph neural networks: a review of methods and applications. AI open 1, 57\u201381 (2020)","journal-title":"AI open"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72992-8_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T08:48:16Z","timestamp":1730191696000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72992-8_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,30]]},"ISBN":["9783031729911","9783031729928"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72992-8_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,10,30]]},"assertion":[{"value":"30 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}