{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:00:01Z","timestamp":1777654801728,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Fundamental Research Funds for the Central Universities under Grant","award":["XTR042021005"],"award-info":[{"award-number":["XTR042021005"]}]},{"name":"National Science and Technology Major Project under Grant","award":["2023ZD0121300"],"award-info":[{"award-number":["2023ZD0121300"]}]},{"name":"Natural Science Foundation of Shaanxi Province under Grant","award":["2022JC-41"],"award-info":[{"award-number":["2022JC-41"]}]},{"name":"National Natural Science Foundation of China under Grants","award":["62088102, 12326608 and 62106192"],"award-info":[{"award-number":["62088102, 12326608 and 62106192"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681021","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"2506-2514","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Semantic-aware Representation Learning for Homography Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7975-0006","authenticated-orcid":false,"given":"Yuhan","family":"Liu","sequence":"first","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7145-1245","authenticated-orcid":false,"given":"Qianxin","family":"Huang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1724-7862","authenticated-orcid":false,"given":"Siqi","family":"Hui","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3256-1497","authenticated-orcid":false,"given":"Jingwen","family":"Fu","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4100-2304","authenticated-orcid":false,"given":"Sanping","family":"Zhou","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7382-4949","authenticated-orcid":false,"given":"Kangyi","family":"Wu","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8477-8340","authenticated-orcid":false,"given":"Pengna","family":"Li","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9434-0617","authenticated-orcid":false,"given":"Jinjun","family":"Wang","sequence":"additional","affiliation":[{"name":"Xi'an Jiaotong University, Xi'an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248018"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.410"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01114"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19824-3_2"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00060"},{"key":"e_1_3_2_1_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Zo\u00eb Papakipos, Lowik Chanussot, Filip Radenovic, Tomas Jenicek, Maxim Maximov, Laura Leal-Taix\u00e9, Ismail Elezi, et al.","author":"Douze Matthijs","year":"2021","unstructured":"Matthijs Douze, Giorgos Tolias, Ed Pizzi, Zo\u00eb Papakipos, Lowik Chanussot, Filip Radenovic, Tomas Jenicek, Maxim Maximov, Laura Leal-Taix\u00e9, Ismail Elezi, et al. 2021. The 2021 image similarity dataset and challenge. arXiv preprint arXiv:2106.09672 (2021)."},{"key":"e_1_3_2_1_8_1","volume-title":"RoMa: Revisiting Robust Losses for Dense Feature Matching. arXiv preprint arXiv:2305.15404","author":"Edstedt Johan","year":"2023","unstructured":"Johan Edstedt, Qiyu Sun, Georg B\u00f6kman, M\u00e5rten Wadenb\u00e4ck, and Michael Felsberg. 2023. RoMa: Revisiting Robust Losses for Dense Feature Matching. arXiv preprint arXiv:2305.15404 (2023)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874088"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the AAAI conference on artificial intelligence","volume":"37","author":"Giang Khang Truong","year":"2023","unstructured":"Khang Truong Giang, Soohwan Song, and Sungho Jo. 2023. TopicFM: Robust and interpretable topic-assisted feature matching. In Proceedings of the AAAI conference on artificial intelligence, Vol. 37. 2447--2455."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2607419"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01714"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00525"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00615"},{"key":"e_1_3_2_1_16_1","volume-title":"SeD: Semantic-Aware Discriminator for Image Super-Resolution. arXiv preprint arXiv:2402.19387","author":"Li Bingchen","year":"2024","unstructured":"Bingchen Li, Xin Li, Hanxin Zhu, Yeying Jin, Ruoyu Feng, Zhizheng Zhang, and Zhibo Chen. 2024. SeD: Semantic-Aware Discriminator for Image Super-Resolution. arXiv preprint arXiv:2402.19387 (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00218"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00876"},{"key":"e_1_3_2_1_20_1","volume-title":"Matcher: Segment anything with one shot using all-purpose feature matching. arXiv preprint arXiv:2305.13310","author":"Liu Yang","year":"2023","unstructured":"Yang Liu, Muzhi Zhu, Hengtao Li, Hao Chen, Xinlong Wang, and Chunhua Shen. 2023. Matcher: Segment anything with one shot using all-purpose feature matching. arXiv preprint arXiv:2305.13310 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1023\/B:VISI.0000029664.99615.94"},{"key":"e_1_3_2_1_22_1","volume-title":"Jose Maria Martinez Montiel, and Juan D Tardos","author":"Mur-Artal Raul","year":"2015","unstructured":"Raul Mur-Artal, Jose Maria Martinez Montiel, and Juan D Tardos. 2015. ORB-SLAM: a versatile and accurate monocular SLAM system. IEEE transactions on robotics, Vol. 31, 5 (2015), 1147--1163."},{"key":"e_1_3_2_1_23_1","unstructured":"Maxime Oquab Timoth\u00e9e Darcet Th\u00e9o Moutakanni Huy Vo Marc Szafraniec Vasil Khalidov Pierre Fernandez Daniel Haziza Francisco Massa Alaaeldin El-Nouby et al. 2023. Dinov2: Learning robust visual features without supervision. arXiv preprint arXiv:2304.07193 (2023)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2007.383172"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587635"},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_27_1","volume-title":"Martin Humenberger, and Philippe Weinzaepfel.","author":"Revaud Jerome","year":"2019","unstructured":"Jerome Revaud, Cesar De Souza, Martin Humenberger, and Philippe Weinzaepfel. 2019. R2d2: Reliable and repeatable detector and descriptor. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126544"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00499"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00881"},{"key":"e_1_3_2_1_31_1","volume-title":"Quadtree attention for vision transformers. arXiv preprint arXiv:2201.02767","author":"Tang Shitao","year":"2022","unstructured":"Shitao Tang, Jiahui Zhang, Siyu Zhu, and Ping Tan. 2022. Quadtree attention for vision transformers. arXiv preprint arXiv:2201.02767 (2022)."},{"key":"e_1_3_2_1_32_1","unstructured":"Khang Truong Giang Soohwan Song and Sungho Jo. 2023. TopicFM: Boosting Accuracy and Efficiency of Topic-Assisted Feature Matching. arXiv e-prints (2023) arXiv-2307."},{"key":"e_1_3_2_1_33_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_34_1","volume-title":"Karla K Evans, and Michelle R Greene.","author":"Wolfe Jeremy M","year":"2011","unstructured":"Jeremy M Wolfe, Melissa L-H V\u00f5, Karla K Evans, and Michelle R Greene. 2011. Visual search in scenes involves selective and nonselective pathways. Trends in cognitive sciences, Vol. 15, 2 (2011), 77--84."},{"key":"e_1_3_2_1_35_1","volume-title":"Farahnaz Ahmed Wick, and Marc Pomplun","author":"Wu Chia-Chien","year":"2014","unstructured":"Chia-Chien Wu, Farahnaz Ahmed Wick, and Marc Pomplun. 2014. Guidance of visual attention by semantic information in real-world scenes. Frontiers in psychology, Vol. 5 (2014), 54."},{"key":"e_1_3_2_1_36_1","volume-title":"SeeSR: Towards Semantics-Aware Real-World Image Super-Resolution. arXiv preprint arXiv:2311.16518","author":"Wu Rongyuan","year":"2023","unstructured":"Rongyuan Wu, Tao Yang, Lingchen Sun, Zhengqiang Zhang, Shuai Li, and Lei Zhang. 2023. SeeSR: Towards Semantics-Aware Real-World Image Super-Resolution. arXiv preprint arXiv:2311.16518 (2023)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00166"},{"key":"e_1_3_2_1_38_1","volume-title":"Depth anything: Unleashing the power of large-scale unlabeled data. arXiv preprint arXiv:2401.10891","author":"Yang Lihe","year":"2024","unstructured":"Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, and Hengshuang Zhao. 2024. Depth anything: Unleashing the power of large-scale unlabeled data. arXiv preprint arXiv:2401.10891 (2024)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.303"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings, Part I 16","author":"Zhang Jirong","year":"2020","unstructured":"Jirong Zhang, Chuan Wang, Shuaicheng Liu, Lanpeng Jia, Nianjin Ye, Jue Wang, Ji Zhou, and Jian Sun. 2020. Content-aware unsupervised deep homography estimation. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part I 16. Springer, 653--669."},{"key":"e_1_3_2_1_41_1","volume-title":"MESA: Matching Everything by Segmenting Anything. arXiv preprint arXiv:2401.16741","author":"Zhang Yesheng","year":"2024","unstructured":"Yesheng Zhang and Xu Zhao. 2024. MESA: Matching Everything by Segmenting Anything. arXiv preprint arXiv:2401.16741 (2024)."},{"key":"e_1_3_2_1_42_1","volume-title":"Searching from Area to Point: A Hierarchical Framework for Semantic-Geometric Combined Feature Matching. arXiv preprint arXiv:2305.00194","author":"Zhang Yesheng","year":"2023","unstructured":"Yesheng Zhang, Xu Zhao, and Dahong Qian. 2023. Searching from Area to Point: A Hierarchical Framework for Semantic-Geometric Combined Feature Matching. arXiv preprint arXiv:2305.00194 (2023)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/34.888718"},{"key":"e_1_3_2_1_44_1","volume-title":"Coslam: Collaborative visual slam in dynamic environments","author":"Zou Danping","year":"2012","unstructured":"Danping Zou and Ping Tan. 2012. Coslam: Collaborative visual slam in dynamic environments. IEEE transactions on pattern analysis and machine intelligence, Vol. 35, 2 (2012), 354--366."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681021","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681021","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:37Z","timestamp":1750295857000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681021"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":44,"alternative-id":["10.1145\/3664647.3681021","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681021","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}