{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,23]],"date-time":"2025-12-23T05:32:28Z","timestamp":1766467948381,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":46,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819981809"},{"type":"electronic","value":"9789819981816"}],"license":[{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,27]],"date-time":"2023-11-27T00:00:00Z","timestamp":1701043200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-99-8181-6_42","type":"book-chapter","created":{"date-parts":[[2023,11,26]],"date-time":"2023-11-26T23:02:30Z","timestamp":1701039750000},"page":"552-564","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["PyraBiNet: A Hybrid Semantic Segmentation Network Combining PVT and\u00a0BiSeNet for\u00a0Deformable Objects in\u00a0Indoor Environments"],"prefix":"10.1007","author":[{"given":"Zehan","family":"Tan","sequence":"first","affiliation":[]},{"given":"Weidong","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Zhiwei","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,11,27]]},"reference":[{"key":"42_CR1","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s10462-020-09854-1","volume":"54","author":"S Asgari Taghanaki","year":"2021","unstructured":"Asgari Taghanaki, S., Abhishek, K., Cohen, J.P., Cohen-Adad, J., Hamarneh, G.: Deep semantic segmentation of natural and medical images: a review. Artif. Intell. Rev. 54, 137\u2013178 (2021)","journal-title":"Artif. Intell. Rev."},{"key":"42_CR2","first-page":"9355","volume":"34","author":"X Chu","year":"2021","unstructured":"Chu, X., et al.: Twins: Revisiting the design of spatial attention in vision transformers. Adv. Neural. Inf. Process. Syst. 34, 9355\u20139366 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"2","key":"42_CR3","doi-asserted-by":"publisher","first-page":"497","DOI":"10.3390\/app10020497","volume":"10","author":"J Crespo","year":"2020","unstructured":"Crespo, J., Castillo, J.C., Mozos, O.M., Barber, R.: Semantic information for robot navigation: A survey. Appl. Sci. 10(2), 497 (2020)","journal-title":"Appl. Sci."},{"key":"42_CR4","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255. Ieee (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"42_CR5","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"3","key":"42_CR6","doi-asserted-by":"publisher","first-page":"1341","DOI":"10.1109\/TITS.2020.2972974","volume":"22","author":"D Feng","year":"2020","unstructured":"Feng, D., et al.: Deep multi-modal object detection and semantic segmentation for autonomous driving: datasets, methods, and challenges. IEEE Trans. Intell. Transp. Syst. 22(3), 1341\u20131360 (2020)","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"key":"42_CR7","doi-asserted-by":"publisher","unstructured":"Gao, L., Nie, D., Li, B., Ren, X.: Doubly-fused vit: Fuse information from vision transformer doubly with local representation. In: Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXIII, pp. 744\u2013761. Springer (2022). https:\/\/doi.org\/10.1007\/978-3-031-20050-2_43","DOI":"10.1007\/978-3-031-20050-2_43"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Graham, B., El-Nouby, A., Touvron, H., Stock, P., Joulin, A., J\u00e9gou, H., Douze, M.: Levit: a vision transformer in convnet\u2019s clothing for faster inference. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12259\u201312269 (2021)","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"42_CR9","unstructured":"Guo, M.H., Lu, C.Z., Hou, Q., Liu, Z., Cheng, M.M., Hu, S.M.: Segnext: rethinking convolutional attention design for semantic segmentation. arXiv preprint arXiv:2209.08575 (2022)"},{"key":"42_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"42_CR11","unstructured":"Ho, J., Kalchbrenner, N., Weissenborn, D., Salimans, T.: Axial attention in multidimensional transformers. arXiv preprint arXiv:1912.12180 (2019)"},{"key":"42_CR12","doi-asserted-by":"crossref","unstructured":"Howard, A., et al.: Searching for mobilenetv3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019)","DOI":"10.1109\/ICCV.2019.00140"},{"key":"42_CR13","unstructured":"Howard, A.G., et al.: Mobilenets: efficient convolutional neural networks for mobile vision applications. arXiv preprint arXiv:1704.04861 (2017)"},{"issue":"10s","key":"42_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3505244","volume":"54","author":"S Khan","year":"2022","unstructured":"Khan, S., Naseer, M., Hayat, M., Zamir, S.W., Khan, F.S., Shah, M.: Transformers in vision: a survey. ACM Comput. Surv. (CSUR) 54(10s), 1\u201341 (2022)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"42_CR15","doi-asserted-by":"crossref","unstructured":"Kim, W., Seok, J.: Indoor semantic segmentation for robot navigating on mobile. In: 2018 Tenth International Conference on Ubiquitous and Future Networks (ICUFN), pp. 22\u201325. IEEE (2018)","DOI":"10.1109\/ICUFN.2018.8436956"},{"key":"42_CR16","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Girshick, R., He, K., Doll\u00e1r, P.: Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6399\u20136408 (2019)","DOI":"10.1109\/CVPR.2019.00656"},{"key":"42_CR17","doi-asserted-by":"publisher","first-page":"302","DOI":"10.1007\/s11263-008-0202-0","volume":"82","author":"P Kohli","year":"2009","unstructured":"Kohli, P., Ladick\u1ef3, L., Torr, P.H.: Robust higher order potentials for enforcing label consistency. Int. J. Comput. Vision 82, 302\u2013324 (2009)","journal-title":"Int. J. Comput. Vision"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Ladick\u1ef3, L., Russell, C., Kohli, P., Torr, P.H.: Associative hierarchical crfs for object class image segmentation. In: 2009 IEEE 12th International Conference on Computer Vision, pp. 739\u2013746. IEEE (2009)","DOI":"10.1109\/ICCV.2009.5459248"},{"key":"42_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: A survey of visual transformers. IEEE Trans. Neural Networks Learn. Syst. (2023)","DOI":"10.1109\/TNNLS.2022.3227717"},{"key":"42_CR20","doi-asserted-by":"crossref","unstructured":"Liu, Z., et al.: Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"42_CR21","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., Darrell, T.: Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3431\u20133440 (2015)","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"42_CR22","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"42_CR23","unstructured":"Mehta, S., Rastegari, M.: Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178 (2021)"},{"key":"42_CR24","unstructured":"Mehta, S., Rastegari, M.: Separable self-attention for mobile vision transformers. arXiv preprint arXiv:2206.02680 (2022)"},{"key":"42_CR25","doi-asserted-by":"publisher","first-page":"626","DOI":"10.1016\/j.neucom.2022.01.005","volume":"493","author":"Y Mo","year":"2022","unstructured":"Mo, Y., Wu, Y., Yang, X., Liu, F., Liao, Y.: Review the state-of-the-art technologies of semantic segmentation based on deep learning. Neurocomputing 493, 626\u2013646 (2022)","journal-title":"Neurocomputing"},{"key":"42_CR26","doi-asserted-by":"crossref","unstructured":"Pan, H., Hong, Y., Sun, W., Jia, Y.: Deep dual-resolution networks for real-time and accurate semantic segmentation of traffic scenes. IEEE Trans. Intell. Transp. Syst. (2022)","DOI":"10.1109\/TITS.2022.3228042"},{"key":"42_CR27","doi-asserted-by":"publisher","unstructured":"Russell, B.C., Torralba, A., Murphy, K.P., Freeman, W.T.: Labelme: a database and web-based tool for image. Int. J. of Comput. Vis. 77(1) (2008). https:\/\/doi.org\/10.1007\/s11263-007-0090-8","DOI":"10.1007\/s11263-007-0090-8"},{"key":"42_CR28","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.C.: Mobilenetv 2: Inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"42_CR29","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1007\/s11263-007-0109-1","volume":"81","author":"J Shotton","year":"2009","unstructured":"Shotton, J., Winn, J., Rother, C., Criminisi, A.: Textonboost for image understanding: multi-class object recognition and segmentation by jointly modeling texture, layout, and context. Int. J. Comput. Vision 81, 2\u201323 (2009)","journal-title":"Int. J. Comput. Vision"},{"key":"42_CR30","unstructured":"Touvron, H., Cord, M., Douze, M., Massa, F., Sablayrolles, A., J\u00e9gou, H.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021)"},{"key":"42_CR31","doi-asserted-by":"publisher","first-page":"33","DOI":"10.1016\/j.neucom.2023.02.025","volume":"532","author":"TH Tsai","year":"2023","unstructured":"Tsai, T.H., Tseng, Y.W.: Bisenet v3: bilateral segmentation network with coordinate attention for real-time semantic segmentation. Neurocomputing 532, 33\u201342 (2023)","journal-title":"Neurocomputing"},{"issue":"1","key":"42_CR32","doi-asserted-by":"publisher","first-page":"2032924","DOI":"10.1080\/08839514.2022.2032924","volume":"36","author":"I Ulku","year":"2022","unstructured":"Ulku, I., Akag\u00fcnd\u00fcz, E.: A survey on deep learning-based architectures for semantic segmentation on 2d images. Appl. Artif. Intell. 36(1), 2032924 (2022)","journal-title":"Appl. Artif. Intell."},{"key":"42_CR33","unstructured":"Wadekar, S.N., Chaurasia, A.: Mobilevitv3: mobile-friendly vision transformer with simple and effective fusion of local, global and input features. arXiv preprint arXiv:2209.15159 (2022)"},{"key":"42_CR34","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"42_CR35","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01234-2_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Woo","year":"2018","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: CBAM: convolutional block attention module. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 3\u201319. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1"},{"key":"42_CR36","doi-asserted-by":"crossref","unstructured":"Wu, H., Xiao, B., Codella, N., Liu, M., Dai, X., Yuan, L., Zhang, L.: Cvt: introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22\u201331 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"42_CR37","doi-asserted-by":"crossref","unstructured":"Xu, J., Xiong, Z., Bhattacharyya, S.P.: Pidnet: a real-time semantic segmentation network inspired by pid controllers. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19529\u201319539 (2023)","DOI":"10.1109\/CVPR52729.2023.01871"},{"key":"42_CR38","doi-asserted-by":"crossref","unstructured":"Yang, C., et al.: Lite vision transformer with enhanced self-attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11998\u201312008 (2022)","DOI":"10.1109\/CVPR52688.2022.01169"},{"key":"42_CR39","doi-asserted-by":"crossref","unstructured":"Yao, J., Fidler, S., Urtasun, R.: Describing the scene as a whole: Joint object detection, scene classification and semantic segmentation. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition, pp. 702\u2013709. IEEE (2012)","DOI":"10.1109\/CVPR.2012.6247739"},{"key":"42_CR40","doi-asserted-by":"publisher","first-page":"3051","DOI":"10.1007\/s11263-021-01515-2","volume":"129","author":"C Yu","year":"2021","unstructured":"Yu, C., Gao, C., Wang, J., Yu, G., Shen, C., Sang, N.: Bisenet v2: bilateral network with guided aggregation for real-time semantic segmentation. Int. J. Comput. Vision 129, 3051\u20133068 (2021)","journal-title":"Int. J. Comput. Vision"},{"key":"42_CR41","doi-asserted-by":"crossref","unstructured":"Yu, C., Wang, J., Peng, C., Gao, C., Yu, G., Sang, N.: Bisenet: bilateral segmentation network for real-time semantic segmentation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 325\u2013341 (2018)","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"42_CR42","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: Metaformer is actually what you need for vision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10819\u201310829 (2022)","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"42_CR43","doi-asserted-by":"crossref","unstructured":"Yuan, K., Guo, S., Liu, Z., Zhou, A., Yu, F., Wu, W.: Incorporating convolution designs into visual transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 579\u2013588 (2021)","DOI":"10.1109\/ICCV48922.2021.00062"},{"key":"42_CR44","doi-asserted-by":"crossref","unstructured":"Yuan, L., et al.: Tokens-to-token vit: training vision transformers from scratch on imagenet. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 558\u2013567 (2021)","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"42_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: Topformer: token pyramid transformer for mobile semantic segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12083\u201312093 (2022)","DOI":"10.1109\/CVPR52688.2022.01177"},{"key":"42_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, B., Zhao, H., Puig, X., Fidler, S., Barriuso, A., Torralba, A.: Scene parsing through ade20k dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 633\u2013641 (2017)","DOI":"10.1109\/CVPR.2017.544"}],"container-title":["Communications in Computer and Information Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-99-8181-6_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T10:58:21Z","timestamp":1730631501000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-99-8181-6_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,27]]},"ISBN":["9789819981809","9789819981816"],"references-count":46,"URL":"https:\/\/doi.org\/10.1007\/978-981-99-8181-6_42","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2023,11,27]]},"assertion":[{"value":"27 November 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Changsha","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 November 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 November 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/iconip2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1274","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"650","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"51% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.14","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.46","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}