{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T20:08:51Z","timestamp":1743019731283,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819980758"},{"type":"electronic","value":"9789819980765"}],"license":[{"start":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T00:00:00Z","timestamp":1699920000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T00:00:00Z","timestamp":1699920000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8076-5_10","type":"book-chapter","created":{"date-parts":[[2023,11,13]],"date-time":"2023-11-13T14:02:10Z","timestamp":1699884130000},"page":"133-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Progressive Temporal Transformer for\u00a0Bird\u2019s-Eye-View Camera Pose Estimation"],"prefix":"10.1007","author":[{"given":"Zhuoyuan","family":"Wu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiancheng","family":"Cai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ranran","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinmin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenhua","family":"Chai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,14]]},"reference":[{"key":"10_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"782","DOI":"10.1007\/978-3-030-01264-9_46","volume-title":"Computer Vision \u2013 ECCV 2018","author":"V Balntas","year":"2018","unstructured":"Balntas, V., Li, S., Prisacariu, V.: RelocNet: continuous metric learning relocalisation using neural nets. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) Computer Vision \u2013 ECCV 2018. LNCS, vol. 11218, pp. 782\u2013799. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01264-9_46"},{"doi-asserted-by":"crossref","unstructured":"Brachmann, E., et al.: DSAC-differentiable RANSAC for camera localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6684\u20136692 (2017)","key":"10_CR2","DOI":"10.1109\/CVPR.2017.267"},{"doi-asserted-by":"crossref","unstructured":"Brachmann, E., Michel, F., Krull, A., Yang, M.Y., Gumhold, S., et al.: Uncertainty-driven 6d pose estimation of objects and scenes from a single RGB image. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3364\u20133372 (2016)","key":"10_CR3","DOI":"10.1109\/CVPR.2016.366"},{"doi-asserted-by":"crossref","unstructured":"Brachmann, E., Rother, C.: Learning less is more-6d camera localization via 3d surface regression. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4654\u20134662 (2018)","key":"10_CR4","DOI":"10.1109\/CVPR.2018.00489"},{"doi-asserted-by":"crossref","unstructured":"Brahmbhatt, S., Gu, J., Kim, K., Hays, J., Kautz, J.: Geometry-aware learning of maps for camera localization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2616\u20132625 (2018)","key":"10_CR5","DOI":"10.1109\/CVPR.2018.00277"},{"doi-asserted-by":"crossref","unstructured":"Cao, S., Snavely, N.: Minimal scene descriptions from structure from motion models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 461\u2013468 (2014)","key":"10_CR6","DOI":"10.1109\/CVPR.2014.66"},{"key":"10_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"N Carion","year":"2020","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 213\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13"},{"doi-asserted-by":"crossref","unstructured":"Clark, R., Wang, S., Markham, A., Trigoni, N., Wen, H.: VidLoc: a deep spatio-temporal model for 6-DoF video-clip relocalization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6856\u20136864 (2017)","key":"10_CR8","DOI":"10.1109\/CVPR.2017.284"},{"doi-asserted-by":"crossref","unstructured":"DeTone, D., Malisiewicz, T., Rabinovich, A.: SuperPoint: self-supervised interest point detection and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, pp. 224\u2013236 (2018)","key":"10_CR9","DOI":"10.1109\/CVPRW.2018.00060"},{"doi-asserted-by":"crossref","unstructured":"Dusmanu, M., et al.: D2- Net: a trainable CNN for joint detection and description of local features. In: CVPR 2019-IEEE Conference on Computer Vision and Pattern Recognition (2019)","key":"10_CR10","DOI":"10.1109\/CVPR.2019.00828"},{"key":"10_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"738","DOI":"10.1007\/978-3-030-11009-3_46","volume-title":"Computer Vision \u2013 ECCV 2018 Workshops","author":"S En","year":"2019","unstructured":"En, S., Lechervy, A., Jurie, F.: RPNet: an end-to-end network for relative camera pose estimation. In: Leal-Taix\u00e9, L., Roth, S. (eds.) ECCV 2018. LNCS, vol. 11129, pp. 738\u2013745. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11009-3_46"},{"issue":"6","key":"10_CR12","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1145\/358669.358692","volume":"24","author":"MA Fischler","year":"1981","unstructured":"Fischler, M.A., Bolles, R.C.: Random sample consensus: a paradigm for model fitting with applications to image analysis and automated cartography. Commun. ACM 24(6), 381\u2013395 (1981)","journal-title":"Commun. ACM"},{"key":"10_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1007\/978-3-031-20047-2_9","volume-title":"Computer Vision - ECCV 2022","author":"S Gao","year":"2022","unstructured":"Gao, S., Zhou, C., Ma, C., Wang, X., Yuan, J.: AiATrack: attention in attention for transformer visual tracking. In: Avidan, S., Brostow, G., Ciss\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13682, pp. 146\u2013164. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20047-2_9"},{"issue":"4","key":"10_CR14","doi-asserted-by":"publisher","first-page":"629","DOI":"10.1364\/JOSAA.4.000629","volume":"4","author":"BK Horn","year":"1987","unstructured":"Horn, B.K.: Closed-form solution of absolute orientation using unit quaternions. Josa a 4(4), 629\u2013642 (1987)","journal-title":"Josa a"},{"doi-asserted-by":"crossref","unstructured":"Kendall, A., Cipolla, R.: Modelling uncertainty in deep learning for camera relocalization. In: 2016 IEEE International Conference on Robotics and Automation, pp. 4762\u20134769. IEEE (2016)","key":"10_CR15","DOI":"10.1109\/ICRA.2016.7487679"},{"doi-asserted-by":"crossref","unstructured":"Kendall, A., Cipolla, R.: Geometric loss functions for camera pose regression with deep learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5974\u20135983 (2017)","key":"10_CR16","DOI":"10.1109\/CVPR.2017.694"},{"doi-asserted-by":"crossref","unstructured":"Kendall, A., Grimes, M., Cipolla, R.: PoseNet: a convolutional network for real-time 6-DoF camera relocalization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2938\u20132946 (2015)","key":"10_CR17","DOI":"10.1109\/ICCV.2015.336"},{"doi-asserted-by":"crossref","unstructured":"Laskar, Z., Melekhov, I., Kalia, S., Kannala, J.: Camera relocalization by computing pairwise relative poses using convolutional neural network. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 929\u2013938 (2017)","key":"10_CR18","DOI":"10.1109\/ICCVW.2017.113"},{"key":"10_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"229","DOI":"10.1007\/978-3-031-20080-9_14","volume-title":"Computer Vision - ECCV 2022","author":"X Li","year":"2022","unstructured":"Li, X., Ling, H.: GTCaR: graph transformer for camera re-localization. In: Avidan, S., Brostow, G., Cis\u00e9, M., Farinella, G.M., Hassner, T. (eds.) ECCV 2022. LNCS, vol. 13670, pp. 229\u2013246. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20080-9_14"},{"key":"10_CR20","series-title":"Advances in Computer Vision and Pattern Recognition","doi-asserted-by":"publisher","first-page":"147","DOI":"10.1007\/978-3-319-25781-5_8","volume-title":"Large-Scale Visual Geo-Localization","author":"Y Li","year":"2016","unstructured":"Li, Y., Snavely, N., Huttenlocher, D.P., Fua, P.: Worldwide pose estimation using 3D point clouds. In: Zamir, A.R.R., Hakeem, A., Van Van Gool, L., Shah, M., Szeliski, R. (eds.) Large-Scale Visual Geo-Localization. ACVPR, pp. 147\u2013163. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-25781-5_8"},{"issue":"1","key":"10_CR21","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1177\/0278364916679498","volume":"36","author":"W Maddern","year":"2017","unstructured":"Maddern, W., Pascoe, G., Linegar, C., Newman, P.: 1 year, 1000 km: the oxford robotcar dataset. Int. J. Robot. Res. 36(1), 3\u201315 (2017)","journal-title":"Int. J. Robot. Res."},{"doi-asserted-by":"crossref","unstructured":"Melekhov, I., Ylioinas, J., Kannala, J., Rahtu, E.: Image-based localization using hourglass networks. In: Proceedings of the IEEE International Conference on Computer Vision Workshops, pp. 879\u2013886 (2017)","key":"10_CR22","DOI":"10.1109\/ICCVW.2017.107"},{"doi-asserted-by":"crossref","unstructured":"Sarlin, P.E., Cadena, C., Siegwart, R., Dymczyk, M.: From coarse to fine: Robust hierarchical localization at large scale. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12716\u201312725 (2019)","key":"10_CR23","DOI":"10.1109\/CVPR.2019.01300"},{"issue":"9","key":"10_CR24","doi-asserted-by":"publisher","first-page":"1744","DOI":"10.1109\/TPAMI.2016.2611662","volume":"39","author":"T Sattler","year":"2016","unstructured":"Sattler, T., Leibe, B., Kobbelt, L.: Efficient & effective prioritized matching for large-scale image-based localization. IEEE Trans. Pattern Anal. Mach. Intell. 39(9), 1744\u20131756 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"unstructured":"Schleiss, M., Rouatbi, F., Cremers, D.: Vpair-aerial visual place recognition and localization in large-scale outdoor environments. arXiv preprint arXiv:2205.11567 (2022)","key":"10_CR25"},{"doi-asserted-by":"crossref","unstructured":"Shavit, Y., Ferens, R., Keller, Y.: Learning multi-scene absolute pose regression with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2733\u20132742 (2021)","key":"10_CR26","DOI":"10.1109\/ICCV48922.2021.00273"},{"doi-asserted-by":"crossref","unstructured":"Shotton, J., Glocker, B., Zach, C., Izadi, S., Criminisi, A., Fitzgibbon, A.: Scene coordinate regression forests for camera relocalization in RGB-D images. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2930\u20132937 (2013)","key":"10_CR27","DOI":"10.1109\/CVPR.2013.377"},{"doi-asserted-by":"crossref","unstructured":"Stenborg, E., Sattler, T., Hammarstrand, L.: Using image sequences for long-term visual localization. In: 2020 International Conference on 3d Vision, pp. 938\u2013948. IEEE (2020)","key":"10_CR28","DOI":"10.1109\/3DV50981.2020.00104"},{"doi-asserted-by":"crossref","unstructured":"Sun, J., Shen, Z., Wang, Y., Bao, H., Zhou, X.: LoFTR: detector-free local feature matching with transformers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8922\u20138931 (2021)","key":"10_CR29","DOI":"10.1109\/CVPR46437.2021.00881"},{"issue":"7","key":"10_CR30","doi-asserted-by":"publisher","first-page":"1455","DOI":"10.1109\/TPAMI.2016.2598331","volume":"39","author":"L Sv\u00e4rm","year":"2016","unstructured":"Sv\u00e4rm, L., Enqvist, O., Kahl, F., Oskarsson, M.: City-scale localization for cameras with known vertical direction. IEEE Trans. Pattern Anal. Mach. Intell. 39(7), 1455\u20131461 (2016)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"doi-asserted-by":"crossref","unstructured":"Taira, H., et al.: InLoc: indoor visual localization with dense matching and view synthesis. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7199\u20137209 (2018)","key":"10_CR31","DOI":"10.1109\/CVPR.2018.00752"},{"unstructured":"Tan, M., Le, Q.: EfficientNet: rethinking model scaling for convolutional neural networks. In: Proceedings of International Conference on Machine Learning, pp. 6105\u20136114. PMLR (2019)","key":"10_CR32"},{"issue":"4","key":"10_CR33","doi-asserted-by":"publisher","first-page":"9207","DOI":"10.1109\/LRA.2022.3187491","volume":"7","author":"A Vallone","year":"2022","unstructured":"Vallone, A., Warburg, F., Hansen, H., Hauberg, S., Civera, J.: Danish airs and grounds: a dataset for aerial-to-street-level place recognition and localization. IEEE Robot. Autom. Lett. 7(4), 9207\u20139214 (2022)","journal-title":"IEEE Robot. Autom. Lett."},{"unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)","key":"10_CR34"},{"doi-asserted-by":"crossref","unstructured":"Walch, F., Hazirbas, C., Leal-Taixe, L., Sattler, T., Hilsenbeck, S., Cremers, D.: Image-based localization using LSTMs for structured feature correlation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 627\u2013637 (2017)","key":"10_CR35","DOI":"10.1109\/ICCV.2017.75"},{"doi-asserted-by":"crossref","unstructured":"Wang, B., Chen, C., Lu, C.X., Zhao, P., Trigoni, N., Markham, A.: AtLoc: attention guided camera localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 10393\u201310401 (2020)","key":"10_CR36","DOI":"10.1609\/aaai.v34i06.6608"},{"doi-asserted-by":"crossref","unstructured":"Wu, J., Ma, L., Hu, X.: Delving deeper into convolutional neural networks for camera relocalization. In: 2017 IEEE International Conference on Robotics and Automation, pp. 5644\u20135651. IEEE (2017)","key":"10_CR37","DOI":"10.1109\/ICRA.2017.7989663"},{"doi-asserted-by":"crossref","unstructured":"Xue, F., Wang, X., Yan, Z., Wang, Q., Wang, J., Zha, H.: Local supports global: deep camera relocalization with sequence enhancement. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2841\u20132850 (2019)","key":"10_CR38","DOI":"10.1109\/ICCV.2019.00293"},{"doi-asserted-by":"crossref","unstructured":"Xue, F., Wu, X., Cai, S., Wang, J.: Learning multi-view camera relocalization with graph neural networks. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11372\u201311381. IEEE (2020)","key":"10_CR39","DOI":"10.1109\/CVPR42600.2020.01139"},{"doi-asserted-by":"crossref","unstructured":"Zhou, K., Chen, C., Wang, B., Saputra, M.R.U., Trigoni, N., Markham, A.: VMLoc: variational fusion for learning-based multimodal camera localization. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 6165\u20136173 (2021)","key":"10_CR40","DOI":"10.1609\/aaai.v35i7.16767"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8076-5_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T15:14:31Z","timestamp":1710256471000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8076-5_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,14]]},"ISBN":["9789819980758","9789819980765"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8076-5_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023,11,14]]},"assertion":[{"value":"14 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}