{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T15:28:38Z","timestamp":1781018918118,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":65,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,3,23]],"date-time":"2026-03-23T00:00:00Z","timestamp":1774224000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,23]]},"DOI":"10.1145\/3748522.3779827","type":"proceedings-article","created":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:17:49Z","timestamp":1781014669000},"page":"1057-1064","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Co-SemDepth: Fast Joint Semantic Segmentation and Depth Estimation on Aerial Images"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-3414-1628","authenticated-orcid":false,"given":"Yara","family":"Alaaeldin","sequence":"first","affiliation":[{"name":"Dibris, University of Genova, Genova, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3463-2263","authenticated-orcid":false,"given":"Francesca","family":"Odone","sequence":"additional","affiliation":[{"name":"Dibris, University of Genova, Genova, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,9]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.52202\/079017-1977"},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 4009\u20134018","author":"Bhat Shariq Farooq","year":"2021","unstructured":"Shariq Farooq Bhat, Ibraheem Alhashim, and Peter Wonka. 2021. Adabins: depth estimation using adaptive bins. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 4009\u20134018."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2016.2621673"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018001"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.350"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00066"},{"key":"e_1_3_2_1_7_1","unstructured":"David Eigen Christian Puhrsch and Rob Fergus. 2014. Depth map prediction from a single image using a multi-scale deep network. Advances in neural information processing systems 27."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCP53602.2021.9733671"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/s22239374"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00081"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.699"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.126"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01078"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00884"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.3390\/electronics8101179"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00907"},{"key":"e_1_3_2_1_19_1","unstructured":"Jetson. 2025. Jetson modules. https:\/\/developer.nvidia.com\/embedded\/jetson-modules. Accessed: 2025-04-17. (2025)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW60793.2023.00280"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision, 4015\u20134026","author":"Alexander","unstructured":"Alexander Kirillov et al. 2023. Segment anything. In Proceedings of the IEEE\/CVF international conference on computer vision, 4015\u20134026."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00166"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00166"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.32"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475409"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.365"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2017.2787781"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings, Part X 16","author":"Liu Yifan","year":"2020","unstructured":"Yifan Liu, Chunhua Shen, Changqian Yu, and Jingdong Wang. 2020. Efficient semantic video segmentation with per-frame inference. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part X 16. Springer, 352\u2013368."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.224"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.10.073"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/TGRS.2021.3060513"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2016.69"},{"key":"e_1_3_2_1_34_1","volume-title":"2021 29th European Signal Processing Conference (EUSIPCO). IEEE, 731\u2013735","author":"Sergiu","unstructured":"Sergiu Nedevschi et al. 2021. Weakly supervised semantic segmentation learning on uav video sequences. In 2021 29th European Signal Processing Conference (EUSIPCO). IEEE, 731\u2013735."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794220"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00168"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00138-009-0188-9"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1017\/S096249291700006X"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3017478"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093520"},{"key":"e_1_3_2_1_41_1","unstructured":"Raspberry Pi. 2025. Raspberry pi. https:\/\/www.raspberrypi.com\/. Accessed: 2025-04-17. (2025)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00934-2_53"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2017.2750080"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-67361-5_40"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"crossref","unstructured":"Nathan Silberman Derek Hoiem Pushmeet Kohli and Rob Fergus. 2012. Indoor segmentation and support inference from rgbd images. ECCV (5) 7576 746\u2013760.","DOI":"10.1007\/978-3-642-33715-4_54"},{"key":"e_1_3_2_1_47_1","unstructured":"Tech Specs. [n. d.] Arduino portenta h7. https:\/\/docs.arduino.cc\/hardware\/portenta-h7\/#tech-specs. Accessed: 2025-04-17. ()."},{"key":"e_1_3_2_1_48_1","unstructured":"Tech Specs. [n. d.] Arduino uno. https:\/\/docs.arduino.cc\/hardware\/uno-rev3\/#tech-specs. Accessed: 2025-04-17. ()."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00313"},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings, Part IV 16","author":"Vandenhende Simon","year":"2020","unstructured":"Simon Vandenhende, Stamatios Georgoulis, and Luc Van Gool. 2020. Mti-net: multi-scale task interaction networks for multi-task learning. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IV 16. Springer, 527\u2013543."},{"key":"e_1_3_2_1_51_1","unstructured":"Martin Vel'as Michal \u0160pan\u011bl Zden\u011bk Materna and Adam Herout. 2014. Calibration of rgb camera with velodyne lidar."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Panqu Wang Pengfei Chen Ye Yuan Ding Liu Zehua Huang Xiaodi Hou and Garrison Cottrell. 2018. Understanding convolution for semantic segmentation. In 2018 IEEE winter conference on applications of computer vision (WACV). Ieee 1451\u20131460.","DOI":"10.1109\/WACV.2018.00163"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341801"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00122"},{"key":"e_1_3_2_1_55_1","unstructured":"Enze Xie Wenhai Wang Zhiding Yu Anima Anandkumar Jose M Alvarez and Ping Luo. 2021. Segformer: simple and efficient design for semantic segmentation with transformers. Advances in neural information processing systems 34 12077\u201312090."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2020.2986376"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00686"},{"key":"e_1_3_2_1_58_1","unstructured":"Lihe Yang Bingyi Kang Zilong Huang Zhen Zhao Xiaogang Xu Jiashi Feng and Hengshuang Zhao. 2024. Depth anything v2. arXiv:2406.09414."},{"key":"e_1_3_2_1_59_1","unstructured":"Hanrong Ye and Dan Xu. 2023. Taskprompter: spatial-channel multi-task prompting for dense scene understanding. In ICLR."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_20"},{"key":"e_1_3_2_1_61_1","unstructured":"Ziyao Zeng Yangchao Wu Hyoungseob Park Daniel Wang Fengyu Yang Stefano Soatto Dong Lao Byung-Woo Hong and Alex Wong. 2024. Rsa: resolving scale ambiguities in monocular depth estimators through language descriptions. arXiv preprint arXiv:2410.02924."},{"key":"e_1_3_2_1_62_1","volume-title":"Proceedings, Part II 16","author":"Zhang Feihu","year":"2020","unstructured":"Feihu Zhang, Xiaojuan Qi, Ruigang Yang, Victor Prisacariu, Benjamin Wah, and Philip Torr. 2020. Domain-invariant stereo matching networks. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part II 16. Springer, 420\u2013439."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01249-6_15"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_25"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.660"}],"event":{"name":"SAC '26: 41st ACM\/SIGAPP Symposium on Applied Computing","location":"Grand Hotel Palace Thessaloniki Greece","acronym":"SAC '26","sponsor":["SIGAPP ACM Special Interest Group on Applied Computing"]},"container-title":["Proceedings of the 41st ACM\/SIGAPP Symposium on Applied Computing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3748522.3779827","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T14:47:15Z","timestamp":1781016435000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3748522.3779827"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,23]]},"references-count":65,"alternative-id":["10.1145\/3748522.3779827","10.1145\/3748522"],"URL":"https:\/\/doi.org\/10.1145\/3748522.3779827","relation":{},"subject":[],"published":{"date-parts":[[2026,3,23]]},"assertion":[{"value":"2026-06-09","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}