{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T15:26:49Z","timestamp":1771514809720,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T00:00:00Z","timestamp":1705622400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"Beijing Information Science and Technology University","doi-asserted-by":"publisher","award":["202311232025"],"award-info":[{"award-number":["202311232025"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,1,19]]},"DOI":"10.1145\/3647649.3647682","type":"proceedings-article","created":{"date-parts":[[2024,5,3]],"date-time":"2024-05-03T19:40:20Z","timestamp":1714765220000},"page":"199-204","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["PVT-Unet: Road Extraction in Remote Sensing Imagery Based on U-shaped Pyramid Vision Transformer Neural Network"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-7783-1259","authenticated-orcid":false,"given":"Youqiang","family":"Xiong","sequence":"first","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9823-0565","authenticated-orcid":false,"given":"Lu","family":"Li","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5792-2710","authenticated-orcid":false,"given":"Haoqi","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5350-0221","authenticated-orcid":false,"given":"Tianliang","family":"Ma","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-2253-4005","authenticated-orcid":false,"given":"Zhongqi","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-4116-9585","authenticated-orcid":false,"given":"Yuping","family":"Yang","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0266-3689","authenticated-orcid":false,"given":"Shuo","family":"Wang","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-3374-725X","authenticated-orcid":false,"given":"Shubo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Beijing Information Science and Technology University, China"}]}],"member":"320","published-online":{"date-parts":[[2024,5,3]]},"reference":[{"key":"e_1_3_2_1_1_1","article-title":"A semantics-geometry framework for road extraction from remote sensing images","author":"Qiu D.","year":"2023","unstructured":"L. Qiu, D. Yu, C. Zhang, and X. Zhang, \u201cA semantics-geometry framework for road extraction from remote sensing images,\u201d IEEE Geoscience and Remote Sensing Letters, 2023.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_2_1","first-page":"1","article-title":"Ddu-net: Dual-decoder-u-net for road extraction using highresolution remote sensing images","volume":"60","author":"Wang Y.","year":"2022","unstructured":"Y. Wang, Y. Peng, W. Li, G. C. Alexandropoulos, J. Yu, D. Ge, and W. Xiang, \u201cDdu-net: Dual-decoder-u-net for road extraction using highresolution remote sensing images,\u201d IEEE Transactions on Geoscience and Remote Sensing, vol. 60, pp. 1\u201312, 2022.","journal-title":"IEEE Transactions on Geoscience and Remote Sensing"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2023.3303947"},{"key":"e_1_3_2_1_4_1","volume-title":"Road centerline extraction from high-resolution imagery based on shape features and multivariate adaptive regression splines","author":"Miao W.","unstructured":"Z. Miao, W. Shi, H. Zhang, and X. Wang, \u201cRoad centerline extraction from high-resolution imagery based on shape features and multivariate adaptive regression splines,\u201d IEEE geoscience and remote sensing letters, vol. 10, no. 3, pp. 583\u2013587, 2012."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2013.2282469"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2015.2426112"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2016.2524025"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2015.7351549"},{"key":"e_1_3_2_1_9_1","first-page":"48","volume-title":"Semantic road segmentation using deep learning","author":"Pham","year":"2020","unstructured":"T. Pham, \u201cSemantic road segmentation using deep learning,\u201d in 2020 Applying New Technology in Green Buildings (ATiGB). IEEE, 2021, pp. 45\u201348."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CCNS50731.2020.00016"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ATiGB56486.2022.9984093"},{"key":"e_1_3_2_1_12_1","first-page":"1","article-title":"Nl-linknet: Toward lighter but more accurate road extraction with nonlocal operations","volume":"19","author":"Wang J.","year":"2021","unstructured":"Y. Wang, J. Seo, and T. Jeon, \u201cNl-linknet: Toward lighter but more accurate road extraction with nonlocal operations,\u201d IEEE Geoscience and Remote Sensing Letters, vol. 19, pp. 1\u20135, 2021.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2017.2672734"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/LGRS.2018.2802944"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS47720.2021.9553728"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS.2019.8898392"},{"key":"e_1_3_2_1_17_1","volume-title":"Sequence to sequence learning with neural networks[J]. Advances in neural information processing systems","author":"Sutskever I","year":"2014","unstructured":"Sutskever I, Vinyals O, Le Q V. Sequence to sequence learning with neural networks[J]. Advances in neural information processing systems, 2014, 27."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11431-020-1647-3"},{"key":"e_1_3_2_1_19_1","volume-title":"On the relationship between self-attention and convolutional layers[J]. arXiv preprint arXiv:1911.03584","author":"Cordonnier J B","year":"2019","unstructured":"Cordonnier J B, Loukas A, Jaggi M. On the relationship between self-attention and convolutional layers[J]. arXiv preprint arXiv:1911.03584, 2019."},{"key":"e_1_3_2_1_20_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy A","year":"2020","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_21_1","volume-title":"End-to-end object detection with transformers[C]\/\/European conference on computer vision","author":"Carion N","year":"2020","unstructured":"Carion N, Massa F, Synnaeve G, End-to-end object detection with transformers[C]\/\/European conference on computer vision. Cham: Springer International Publishing, 2020: 213-229."},{"key":"e_1_3_2_1_22_1","volume-title":"Adam H","author":"Wang H","year":"2021","unstructured":"Wang H, Zhu Y, Adam H, Max-deeplab: End-to-end panoptic segmentation with mask transformers[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021: 5463-5474."},{"key":"e_1_3_2_1_23_1","volume-title":"IEEE\/CVF conference on computer vision and pattern recognition. 2021: 8126-8135","author":"Chen X","unstructured":"Chen X, Yan B, Zhu J, Transformer tracking[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021: 8126-8135."},{"key":"e_1_3_2_1_24_1","first-page":"14745","article-title":"Two pure transformers can make one strong gan, and that can scale up[J]","volume":"34","author":"Jiang Y","year":"2021","unstructured":"Jiang Y, Chang S, Wang Z. Transgan: Two pure transformers can make one strong gan, and that can scale up[J]. Advances in Neural Information Processing Systems, 2021, 34: 14745-14758.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Chen H Wang Y Guo T Pre-trained image processing transformer[C]\/\/Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. 2021: 12299-12310.","DOI":"10.1109\/CVPR46437.2021.01212"},{"key":"e_1_3_2_1_26_1","first-page":"241","volume-title":"18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18","author":"Ronneberger P.","year":"2015","unstructured":"O. Ronneberger, P. Fischer, and T. Brox, \u201cU-net: Convolutional networks for biomedical image segmentation,\u201d in Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18. Springer, 2015, pp. 234\u2013241."},{"key":"e_1_3_2_1_27_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy A","year":"2020","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, An image is worth 16x16 words: Transformers for image recognition at scale[J]. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Wang W Xie E Li X Pyramid vision transformer: A versatile backbone for dense prediction without convolutions[C]\/\/Proceedings of the IEEE\/CVF international conference on computer vision. 2021: 568-578.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"e_1_3_2_1_29_1","volume-title":"Attention is all you need[J]. Advances in neural information processing systems","author":"Vaswani A","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Attention is all you need[J]. Advances in neural information processing systems, 2017, 30."},{"key":"e_1_3_2_1_30_1","volume-title":"Recurrent neural network regularization[J]. arXiv preprint arXiv:1409.2329","author":"Zaremba W","year":"2014","unstructured":"Zaremba W, Sutskever I, Vinyals O. Recurrent neural network regularization[J]. arXiv preprint arXiv:1409.2329, 2014."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1080\/01431161.2022.2068989"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00034"},{"key":"e_1_3_2_1_33_1","first-page":"1","article-title":"Dbranet: Road extraction by dual-branch encoder and regional attention decoder","volume":"19","author":"Chen Y.-X.","year":"2021","unstructured":"S.-B. Chen, Y.-X. Ji, J. Tang, B. Luo, W.-Q. Wang, and K. Lv, \u201cDbranet: Road extraction by dual-branch encoder and regional attention decoder,\u201d IEEE Geoscience and Remote Sensing Letters, vol. 19, pp. 1\u20135, 2021.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_34_1","first-page":"1","article-title":"Multistage attention resu-net for semantic segmentation of fine-resolution remote sensing images","volume":"19","author":"Li S.","year":"2021","unstructured":"R. Li, S. Zheng, C. Duan, J. Su, and C. Zhang, \u201cMultistage attention resu-net for semantic segmentation of fine-resolution remote sensing images,\u201d IEEE Geoscience and Remote Sensing Letters, vol. 19, pp. 1\u20135, 2021.","journal-title":"IEEE Geoscience and Remote Sensing Letters"},{"key":"e_1_3_2_1_35_1","first-page":"077","article-title":"Segformer: Simple and efficient design for semantic segmentation with transformers","volume":"34","author":"Xie W.","year":"2021","unstructured":"E. Xie, W. Wang, Z. Yu, A. Anandkumar, J. M. Alvarez, and P. Luo, \u201cSegformer: Simple and efficient design for semantic segmentation with transformers,\u201d Advances in Neural Information Processing Systems, vol. 34, pp. 12 077\u201312 090, 2021.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"ICIGP 2024: 2024 the 7th International Conference on Image and Graphics Processing","location":"Beijing China","acronym":"ICIGP 2024"},"container-title":["Proceedings of the 2024 7th International Conference on Image and Graphics Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3647649.3647682","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3647649.3647682","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T14:55:04Z","timestamp":1769525704000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3647649.3647682"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,19]]},"references-count":35,"alternative-id":["10.1145\/3647649.3647682","10.1145\/3647649"],"URL":"https:\/\/doi.org\/10.1145\/3647649.3647682","relation":{},"subject":[],"published":{"date-parts":[[2024,1,19]]},"assertion":[{"value":"2024-05-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}