{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:33:44Z","timestamp":1763202824060,"version":"3.40.3"},"publisher-location":"Cham","reference-count":24,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031510229"},{"type":"electronic","value":"9783031510236"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-51023-6_32","type":"book-chapter","created":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T07:02:36Z","timestamp":1705993356000},"page":"383-394","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Optimize Vision Transformer Architecture via\u00a0Efficient Attention Modules: A Study on\u00a0the\u00a0Monocular Depth Estimation Task"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8897-3910","authenticated-orcid":false,"given":"Claudio","family":"Schiavella","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5266-8957","authenticated-orcid":false,"given":"Lorenzo","family":"Cirillo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9393-5248","authenticated-orcid":false,"given":"Lorenzo","family":"Papa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1886-3491","authenticated-orcid":false,"given":"Paolo","family":"Russo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6461-1391","authenticated-orcid":false,"given":"Irene","family":"Amerini","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,1,24]]},"reference":[{"key":"32_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"issue":"10","key":"32_CR2","doi-asserted-by":"publisher","first-page":"16940","DOI":"10.1109\/TITS.2022.3160741","volume":"23","author":"X Dong","year":"2022","unstructured":"Dong, X., et al.: Towards real-time monocular depth estimation for robotics: a survey. IEEE Trans. Intell. Transport. Syst. 23(10), 16940\u201316961 (2022)","journal-title":"IEEE Trans. Intell. Transport. Syst."},{"key":"32_CR3","unstructured":"Eigen, D., Puhrsch, C., Fergus, R.: Depth map prediction from a single image using a multi-scale deep network. Adv. Neural Inf. Process. Syst. 27 (2014)"},{"key":"32_CR4","doi-asserted-by":"crossref","unstructured":"Han, K., et al.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 87\u2013110 (2022)","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"32_CR5","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"32_CR6","unstructured":"Koohpayegani, S.A., Pirsiavash, H.: Sima: simple softmax-free attention for vision transformers. arXiv preprint arXiv:2206.08898 (2022)"},{"key":"32_CR7","unstructured":"Li, Z., et al.: Binsformer: revisiting adaptive bins for monocular depth estimation. arXiv preprint arXiv:2204.00987 (2022)"},{"key":"32_CR8","first-page":"21297","volume":"34","author":"L Jiachen","year":"2021","unstructured":"Jiachen, L., et al.: Soft: Softmax-free transformer with linear complexity. Adv. Neural. Inf. Process. Syst. 34, 21297\u201321309 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"32_CR9","doi-asserted-by":"crossref","unstructured":"Makarov, I., Borisenko, G.: Depth inpainting via vision transformer. In: 2021 IEEE International Symposium on Mixed and Augmented Reality Adjunct (ISMAR-Adjunct), pp. 286\u2013291. IEEE (2021)","DOI":"10.1109\/ISMAR-Adjunct54149.2021.00065"},{"key":"32_CR10","unstructured":"Mehta, S., Rastegari, M.: Mobilevit: light-weight, generalpurpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178 (2021)"},{"key":"32_CR11","doi-asserted-by":"crossref","unstructured":"Papa, L., Russo, P., Amerini, I.: METER: a mobile vision transformer architecture for monocular depth estimation. IEEE Trans. Circuits Syst. Video Technol. (2023)","DOI":"10.1109\/TCSVT.2023.3260310"},{"key":"32_CR12","doi-asserted-by":"crossref","unstructured":"Papa, L., et al.: Lightweight and energy-aware monocular depth estimation models for IoT embedded devices: challenges and performances in terrestrial and underwater scenarios. Sensors 23(4), 2223 (2023)","DOI":"10.3390\/s23042223"},{"key":"32_CR13","doi-asserted-by":"publisher","first-page":"44881","DOI":"10.1109\/ACCESS.2022.3170425","volume":"10","author":"L Papa","year":"2022","unstructured":"Papa, L., et al.: Speed: separable pyramidal pooling encoder-decoder for real-time monocular depth estimation on low-resource settings. IEEE Access 10, 44881\u201344890 (2022)","journal-title":"IEEE Access"},{"key":"32_CR14","doi-asserted-by":"crossref","unstructured":"Poggi, M., et al.: Towards real-time unsupervised monocular depth estimation on CPU. In: 2018 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS), pp. 5848\u20135854. IEEE (2018)","DOI":"10.1109\/IROS.2018.8593814"},{"key":"32_CR15","doi-asserted-by":"crossref","unstructured":"Ranftl, R., Bochkovskiy, A., Koltun, V.: Vision transformers for dense prediction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12179\u201312188 (2021)","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"32_CR16","doi-asserted-by":"crossref","unstructured":"Sandler, M., et al.: Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"32_CR17","doi-asserted-by":"publisher","unstructured":"Silberman, N., Hoiem, D., Kohli, P., Fergus, R.: Indoor segmentation and support inference from RGBD images. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) Computer Vision \u2013 ECCV 2012. LNCS, vol. 7576, pp. 746\u2013760. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33715-4_54","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"32_CR18","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021)","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"32_CR20","doi-asserted-by":"crossref","unstructured":"Wofk, D., et al.: Fastdepth: fast monocular depth estimation on embedded systems. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 6101\u20136108. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8794182"},{"key":"32_CR21","unstructured":"Wu, H., et al.: Flowformer: linearizing transformers with conservation flows. arXiv preprint arXiv:2202.06258 (2022)"},{"key":"32_CR22","doi-asserted-by":"crossref","unstructured":"Yu, W., et al.: Metaformer is actually what you need for vision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10819\u201310829 (2022)","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"32_CR23","doi-asserted-by":"crossref","unstructured":"Yucel, M.K., et al.: Real-time monocular depth estimation with sparse supervision on mobile. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2428\u20132437 (2021)","DOI":"10.1109\/CVPRW53098.2021.00275"},{"key":"32_CR24","doi-asserted-by":"crossref","unstructured":"Zhao, C.Q., Sun, Q.Y., Zhang, C.Z., Tang, Y., Qian, F.: Monocular depth estimation based on deep learning: an overview. Sci. China Technol. Sci. 63(9), 1612\u20131627 (2020)","DOI":"10.1007\/s11431-020-1582-8"}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing - ICIAP 2023 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-51023-6_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T07:07:50Z","timestamp":1705993670000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-51023-6_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031510229","9783031510236"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-51023-6_32","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"24 January 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Udine","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap2023.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"144","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"82","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"13","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"57% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"https:\/\/iciap2023.org\/satellite-event\/workshops\/","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}