{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:58:09Z","timestamp":1775667489100,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031733468","type":"print"},{"value":"9783031733475","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T00:00:00Z","timestamp":1730160000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73347-5_16","type":"book-chapter","created":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:15:43Z","timestamp":1730106943000},"page":"274-291","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Heterogeneous Graph Learning for\u00a0Scene Graph Prediction in\u00a03D Point Clouds"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4608-8664","authenticated-orcid":false,"given":"Yanni","family":"Ma","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5955-1577","authenticated-orcid":false,"given":"Hao","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9624-7440","authenticated-orcid":false,"given":"Yun","family":"Pei","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7051-841X","authenticated-orcid":false,"given":"Yulan","family":"Guo","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,10,29]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Chen, T., Yu, W., Chen, R., Lin, L.: Knowledge-embedded routing network for scene graph generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6163\u20136171 (2019)","DOI":"10.1109\/CVPR.2019.00632"},{"key":"16_CR2","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS). vol.\u00a027 (2014)"},{"key":"16_CR3","doi-asserted-by":"crossref","unstructured":"Feng, M., Hou, H., Zhang, L., Wu, Z., Guo, Y., Mian, A.: 3D spatial multimodal knowledge accumulation for scene graph prediction in point cloud. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9182\u20139191 (2023)","DOI":"10.1109\/CVPR52729.2023.00886"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"Hu, Q., et al.: RandLA-Net: efficient semantic segmentation of large-scale point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11108\u201311117 (2020)","DOI":"10.1109\/CVPR42600.2020.01112"},{"key":"16_CR5","doi-asserted-by":"crossref","unstructured":"Hu, Z., Dong, Y., Wang, K., Sun, Y.: Heterogeneous graph transformer. In: Proceedings of the Web Conference 2020, pp. 2704\u20132710 (2020)","DOI":"10.1145\/3366423.3380027"},{"key":"16_CR6","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"16_CR7","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","volume":"123","author":"R Krishna","year":"2017","unstructured":"Krishna, R., et al.: Visual genome: connecting language and vision using crowdsourced dense image annotations. Int. J. Comput. Vis. (IJCV) 123, 32\u201373 (2017)","journal-title":"Int. J. Comput. Vis. (IJCV)"},{"key":"16_CR8","doi-asserted-by":"crossref","unstructured":"Li, R., Zhang, S., He, X.: SGTR: end-to-end scene graph generation with transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19486\u201319496 (2022)","DOI":"10.1109\/CVPR52688.2022.01888"},{"key":"16_CR9","doi-asserted-by":"publisher","first-page":"2045","DOI":"10.1109\/TMM.2020.3007331","volume":"23","author":"H Liu","year":"2021","unstructured":"Liu, H., Guo, Y., Ma, Y., Lei, Y., Wen, G.: Semantic context encoding for accurate 3D point cloud segmentation. IEEE Trans. Multimedia (TMM) 23, 2045\u20132055 (2021)","journal-title":"IEEE Trans. Multimedia (TMM)"},{"key":"16_CR10","doi-asserted-by":"publisher","first-page":"8793","DOI":"10.1109\/TMM.2023.3241548","volume":"25","author":"H Liu","year":"2023","unstructured":"Liu, H., Ma, Y., Hu, Q., Guo, Y.: CenterTube: tracking multiple 3D objects with 4D tubelets in dynamic point clouds. IEEE Trans. Multimedia (TMM) 25, 8793\u20138804 (2023)","journal-title":"IEEE Trans. Multimedia (TMM)"},{"issue":"10","key":"16_CR11","doi-asserted-by":"publisher","first-page":"10988","DOI":"10.1109\/TITS.2023.3282204","volume":"24","author":"H Liu","year":"2023","unstructured":"Liu, H., Ma, Y., Wang, H., Guo, Y.: AnchorPoint: query design for transformer-based 3D object detection and tracking. IEEE Trans. Intell. Transp. Syst. (TITS) 24(10), 10988\u201311000 (2023)","journal-title":"IEEE Trans. Intell. Transp. Syst. (TITS)"},{"key":"16_CR12","doi-asserted-by":"crossref","unstructured":"Lv, C., Qi, M., Li, X., Yang, Z., Ma, H.: SGFormer: semantic graph transformer for point cloud-based 3d scene graph generation. In: Proceedings of the AAAI Conference on Artificial Intelligence. vol.\u00a038, pp. 4035\u20134043 (2024)","DOI":"10.1609\/aaai.v38i5.28197"},{"key":"16_CR13","doi-asserted-by":"crossref","unstructured":"Ma, Y., Guo, Y., Liu, H., Lei, Y., Wen, G.: Global context reasoning for semantic segmentation of 3D point clouds. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 2931\u20132940 (2020)","DOI":"10.1109\/WACV45572.2020.9093411"},{"issue":"3","key":"16_CR14","doi-asserted-by":"publisher","first-page":"1531","DOI":"10.1109\/LRA.2023.3234771","volume":"8","author":"T Monninger","year":"2023","unstructured":"Monninger, T., et al.: SCENE: reasoning about traffic scenes using heterogeneous graph neural networks. IEEE Robot. Autom. Lett. (RAL) 8(3), 1531\u20131538 (2023)","journal-title":"IEEE Robot. Autom. Lett. (RAL)"},{"key":"16_CR15","unstructured":"Qi, C.R., Su, H., Mo, K., Guibas, L.J.: PointNet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 652\u2013660 (2017)"},{"key":"16_CR16","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: PointNet++: deep hierarchical feature learning on point sets in a metric space. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS), pp. 5099\u20135108 (2017)"},{"key":"16_CR17","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: Proceedings of the International Conference on Machine Learning (ICML), pp. 8748\u20138763. PMLR (2021)"},{"key":"16_CR18","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS), vol. 28 (2015)"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Rosinol, A., Gupta, A., Abate, M., Shi, J., Carlone, L.: 3D dynamic scene graphs: Actionable spatial perception with places, objects, and humans. arXiv preprint arXiv:2002.06289 (2020)","DOI":"10.15607\/RSS.2020.XVI.079"},{"key":"16_CR20","doi-asserted-by":"crossref","unstructured":"Sahand, S., Sina, M.B., Volker, T.: Classification by attention: scene graph classification with prior knowledge. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI). vol.\u00a035, pp. 5025\u20135033 (2021)","DOI":"10.1609\/aaai.v35i6.16636"},{"issue":"2","key":"16_CR21","doi-asserted-by":"publisher","first-page":"357","DOI":"10.1109\/TKDE.2018.2833443","volume":"31","author":"C Shi","year":"2018","unstructured":"Shi, C., Hu, B., Zhao, W.X., Philip, S.Y.: Heterogeneous information network embedding for recommendation. IEEE Trans. Knowl. Data Eng. (TKDE) 31(2), 357\u2013370 (2018)","journal-title":"IEEE Trans. Knowl. Data Eng. (TKDE)"},{"key":"16_CR22","doi-asserted-by":"publisher","unstructured":"Shit, S., et al.: Relationformer: a unified framework for image-to-graph generation. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) Computer Vision - ECCV 2022. ECCV 2022. LNCS, vol. 13697, pp. 422\u2013439. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-19836-6_24","DOI":"10.1007\/978-3-031-19836-6_24"},{"issue":"2","key":"16_CR23","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1145\/2481244.2481248","volume":"14","author":"Y Sun","year":"2013","unstructured":"Sun, Y., Han, J.: Mining heterogeneous information networks: a structural analysis approach. ACM SIGKDD Explor. Newsl. 14(2), 20\u201328 (2013)","journal-title":"ACM SIGKDD Explor. Newsl."},{"key":"16_CR24","doi-asserted-by":"crossref","unstructured":"Tahara, T., Seno, T., Narita, G., Ishikawa, T.: Retargetable AR: context-aware augmented reality in indoor scenes based on 3D scene graph. In: 2020 IEEE International Symposium on Mixed and Augmented Reality Adjunct (ISMAR-Adjunct), pp. 249\u2013255 (2020)","DOI":"10.1109\/ISMAR-Adjunct51615.2020.00072"},{"key":"16_CR25","doi-asserted-by":"crossref","unstructured":"Tang, K., Niu, Y., Huang, J., Shi, J., Zhang, H.: Unbiased scene graph generation from biased training. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3716\u20133725 (2020)","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"16_CR26","doi-asserted-by":"crossref","unstructured":"Tang, K., Zhang, H., Wu, B., Luo, W., Liu, W.: Learning to compose dynamic tree structures for visual contexts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 6619\u20136628 (2019)","DOI":"10.1109\/CVPR.2019.00678"},{"key":"16_CR27","doi-asserted-by":"crossref","unstructured":"Wald, J., Avetisyan, A., Navab, N., Tombari, F., Nie\u00dfner, M.: RIO: 3D object instance relocalization in changing indoor environments. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 7658\u20137667 (2019)","DOI":"10.1109\/ICCV.2019.00775"},{"key":"16_CR28","doi-asserted-by":"crossref","unstructured":"Wald, J., Dhamo, H., Navab, N., Tombari, F.: Learning 3D semantic scene graphs from 3D indoor reconstructions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3961\u20133970 (2020)","DOI":"10.1109\/CVPR42600.2020.00402"},{"key":"16_CR29","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Heterogeneous graph attention network. In: Proceedings of the World Wide Web conference, pp. 2022\u20132032 (2019)","DOI":"10.1145\/3308558.3313562"},{"key":"16_CR30","doi-asserted-by":"crossref","unstructured":"Wang, Z., Cheng, B., Zhao, L., Xu, D., Tang, Y., Sheng, L.: VL-SAT: visual-linguistic semantics assisted training for 3D semantic scene graph prediction in point cloud. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 21560\u201321569 (2023)","DOI":"10.1109\/CVPR52729.2023.02065"},{"key":"16_CR31","doi-asserted-by":"crossref","unstructured":"Wu, S.C., Wald, J., Tateno, K., Navab, N., Tombari, F.: SceneGraphFusion: incremental 3D scene graph prediction from RGB-D sequences. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7515\u20137525 (2021)","DOI":"10.1109\/CVPR46437.2021.00743"},{"key":"16_CR32","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"16_CR33","doi-asserted-by":"crossref","unstructured":"Xu, D., Zhu, Y., Choy, C.B., Fei-Fei, L.: Scene graph generation by iterative message passing. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5410\u20135419 (2017)","DOI":"10.1109\/CVPR.2017.330"},{"key":"16_CR34","doi-asserted-by":"crossref","unstructured":"Yang, J., Lu, J., Lee, S., Batra, D., Parikh, D.: Graph R-CNN for scene graph generation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 670\u2013685 (2018)","DOI":"10.1007\/978-3-030-01246-5_41"},{"key":"16_CR35","doi-asserted-by":"crossref","unstructured":"Yin, T., Zhou, X., Krahenbuhl, P.: Center-based 3D object detection and tracking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11784\u201311793 (2021)","DOI":"10.1109\/CVPR46437.2021.01161"},{"key":"16_CR36","doi-asserted-by":"crossref","unstructured":"Yoon, K., Kim, K., Moon, J., Park, C.: Unbiased heterogeneous scene graph generation with relation-aware message passing neural network. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI). vol.\u00a037, pp. 3285\u20133294 (2023)","DOI":"10.1609\/aaai.v37i3.25435"},{"key":"16_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1007\/978-3-030-58592-1_36","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Zareian","year":"2020","unstructured":"Zareian, A., Karaman, S., Chang, S.-F.: Bridging knowledge graphs to generate scene graphs. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12368, pp. 606\u2013623. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_36"},{"key":"16_CR38","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"16_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, C., Yu, J., Song, Y., Cai, W.: Exploiting edge-oriented reasoning for 3D point-based scene graph analysis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 9705\u20139715 (2021)","DOI":"10.1109\/CVPR46437.2021.00958"},{"key":"16_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, C., Song, D., Huang, C., Swami, A., Chawla, N.V.: Heterogeneous graph neural network. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, pp. 793\u2013803 (2019)","DOI":"10.1145\/3292500.3330961"},{"key":"16_CR41","first-page":"18620","volume":"34","author":"S Zhang","year":"2021","unstructured":"Zhang, S., Hao, A., Qin, H., et al.: Knowledge-inspired 3D scene graph prediction in point cloud. Proc. Adv. Neural Inf. Process. Syst. (NeruIPS) 34, 18620\u201318632 (2021)","journal-title":"Proc. Adv. Neural Inf. Process. Syst. (NeruIPS)"},{"key":"16_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Hu, Q., Xu, G., Ma, Y., Wan, J., Guo, Y.: Not all points are equal: learning highly efficient point-based detectors for 3D lidar point clouds. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.01838"},{"key":"16_CR43","doi-asserted-by":"crossref","unstructured":"Zhao, J., Wang, X., Shi, C., Hu, B., Song, G., Ye, Y.: Heterogeneous graph structure learning for graph neural networks. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI). vol.\u00a035, pp. 4697\u20134705 (2021)","DOI":"10.1609\/aaai.v35i5.16600"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73347-5_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T09:47:25Z","timestamp":1730108845000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73347-5_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,29]]},"ISBN":["9783031733468","9783031733475"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73347-5_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,10,29]]},"assertion":[{"value":"29 October 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}