{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T05:25:06Z","timestamp":1751520306788,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":53,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819609598"},{"type":"electronic","value":"9789819609604"}],"license":[{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,8]],"date-time":"2024-12-08T00:00:00Z","timestamp":1733616000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-0960-4_24","type":"book-chapter","created":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T07:36:03Z","timestamp":1733556963000},"page":"395-412","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["QR-DETR: Query Routing for\u00a0Detection Transformer"],"prefix":"10.1007","author":[{"given":"Tharsan","family":"Senthivel","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ngoc-Son","family":"Vu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,8]]},"reference":[{"key":"24_CR1","unstructured":"Cai, Z., Liu, S., Wang, G., Ge, Z., Zhang, X., Huang, D.: Align-detr: Improving detr with simple iou-aware bce loss. arXiv preprint arXiv:2304.07527 (2023)"},{"key":"24_CR2","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European conference on computer vision. pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"24_CR3","doi-asserted-by":"crossref","unstructured":"Chen, F., Zhang, H., Hu, K., Huang, Y.k., Zhu, C., Savvides, M.: Enhanced training of query-based object detection via selective query recollection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 23756\u201323765 (2023)","DOI":"10.1109\/CVPR52729.2023.02275"},{"key":"24_CR4","doi-asserted-by":"crossref","unstructured":"Chen, L., Yang, T., Zhang, X., Zhang, W., Sun, J.: Points as queries: Weakly semi-supervised object detection by points. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 8823\u20138832 (2021)","DOI":"10.1109\/CVPR46437.2021.00871"},{"key":"24_CR5","doi-asserted-by":"crossref","unstructured":"Chen, Q., Chen, X., Wang, J., Zhang, S., Yao, K., Feng, H., Han, J., Ding, E., Zeng, G., Wang, J.: Group detr: Fast detr training with group-wise one-to-many assignment. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition. pp. 6633\u20136642 (2023)","DOI":"10.1109\/ICCV51070.2023.00610"},{"key":"24_CR6","unstructured":"Chen, Q., Wang, J., Han, C., Zhang, S., Li, Z., Chen, X., Chen, J., Wang, X., Han, S., Zhang, G., et\u00a0al.: Group detr v2: Strong object detector with encoder-decoder pretraining. arXiv preprint arXiv:2211.03594 (2022)"},{"key":"24_CR7","unstructured":"Csord\u00e1s, R., Irie, K., Schmidhuber, J., Potts, C., Manning, C.D.: Moeut: Mixture-of-experts universal transformers. arXiv preprint arXiv:2405.16039 (2024)"},{"key":"24_CR8","unstructured":"Csord\u00e1s, R., Pi\u0119kos, P., Irie, K.: Switchhead: Accelerating transformers with mixture-of-experts attention. arXiv preprint arXiv:2312.07987 (2023)"},{"key":"24_CR9","doi-asserted-by":"publisher","unstructured":"Dai, X., Chen, Y., Yang, J., Zhang, P., Yuan, L., Zhang, L.: Dynamic DETR: End-to-End Object Detection with Dynamic Attention. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 2968\u20132977. IEEE, Montreal, QC, Canada (Oct 2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00298, https:\/\/ieeexplore.ieee.org\/document\/9709981\/","DOI":"10.1109\/ICCV48922.2021.00298"},{"key":"24_CR10","doi-asserted-by":"crossref","unstructured":"Fang, R., Gao, P., Zhou, A., Cai, Y., Liu, S., Dai, J., Li, H.: Feataug-detr: Enriching one-to-many matching for detrs with feature augmentation. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)","DOI":"10.1109\/TPAMI.2024.3381961"},{"issue":"120","key":"24_CR11","first-page":"1","volume":"23","author":"W Fedus","year":"2022","unstructured":"Fedus, W., Zoph, B., Shazeer, N.: Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. J. Mach. Learn. Res. 23(120), 1\u201339 (2022)","journal-title":"J. Mach. Learn. Res."},{"key":"24_CR12","doi-asserted-by":"crossref","unstructured":"Gao, P., Zheng, M., Wang, X., Dai, J., Li, H.: Fast convergence of detr with spatially modulated co-attention. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp. 3621\u20133630 (2021)","DOI":"10.1109\/ICCV48922.2021.00360"},{"key":"24_CR13","doi-asserted-by":"crossref","unstructured":"Gao, Z., Wang, L., Han, B., Guo, S.: Adamixer: A fast-converging query-based object detector. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 5364\u20135373 (2022)","DOI":"10.1109\/CVPR52688.2022.00529"},{"key":"24_CR14","doi-asserted-by":"crossref","unstructured":"Hou, X., Liu, M., Zhang, S., Wei, P., Chen, B.: Salience detr: Enhancing detection transformer with hierarchical salience filtering refinement. In: CVPR (2024)","DOI":"10.1109\/CVPR52733.2024.01664"},{"key":"24_CR15","unstructured":"Hu, Z., Sun, Y., Wang, J., Yang, Y.: Dac-detr: Divide the attention layers and conquer. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"24_CR16","unstructured":"Hwang, C., Cui, W., Xiong, Y., Yang, Z., Liu, Z., Hu, H., Wang, Z., Salas, R., Jose, J., Ram, P., et\u00a0al.: Tutel: Adaptive mixture-of-experts at scale. Proceedings of Machine Learning and Systems 5 (2023)"},{"issue":"1","key":"24_CR17","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"RA Jacobs","year":"1991","unstructured":"Jacobs, R.A., Jordan, M.I., Nowlan, S.J., Hinton, G.E.: Adaptive mixtures of local experts. Neural Comput. 3(1), 79\u201387 (1991)","journal-title":"Neural Comput."},{"key":"24_CR18","unstructured":"Jain, Y., Behl, H., Kira, Z., Vineet, V.: Damex: Dataset-aware mixture-of-experts for visual understanding of mixture-of-datasets. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"24_CR19","doi-asserted-by":"crossref","unstructured":"Jia, D., Yuan, Y., He, H., Wu, X., Yu, H., Lin, W., Sun, L., Zhang, C., Hu, H.: Detrs with hybrid matching. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 19702\u201319712 (2023)","DOI":"10.1109\/CVPR52729.2023.01887"},{"key":"24_CR20","unstructured":"Kong, C., Luo, A., Xia, S., Yu, Y., Li, H., Kot, A.C.: Moe-ffd: Mixture of experts for generalized and parameter-efficient face forgery detection. arXiv preprint arXiv:2404.08452 (2024)"},{"key":"24_CR21","doi-asserted-by":"crossref","unstructured":"Kouris, A., Venieris, S.I., Laskaridis, S., Lane, N.: Multi-exit semantic segmentation networks. In: European Conference on Computer Vision. pp. 330\u2013349. Springer (2022)","DOI":"10.1007\/978-3-031-19803-8_20"},{"key":"24_CR22","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"24_CR23","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13. pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"24_CR24","doi-asserted-by":"crossref","unstructured":"Lin, Y., Yuan, Y., Zhang, Z., Li, C., Zheng, N., Hu, H.: Detr does not need multi-scale or locality design. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 6545\u20136554 (2023)","DOI":"10.1109\/ICCV51070.2023.00602"},{"key":"24_CR25","unstructured":"Liu, S., Li, F., Zhang, H., Yang, X., Qi, X., Su, H., Zhu, J., Zhang, L.: DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR. In: ICLR (2022), https:\/\/openreview.net\/forum?id=oMI9PjOb9Jl"},{"key":"24_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Y., Zhang, Y., Wang, Y., Zhang, Y., Tian, J., Shi, Z., Fan, J., He, Z.: Sap-detr: bridging the gap between salient points and queries-based transformer detector for fast model convergency. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 15539\u201315547 (2023)","DOI":"10.1109\/CVPR52729.2023.01491"},{"key":"24_CR27","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: International Conference on Learning Representations (2018)"},{"key":"24_CR28","unstructured":"Lou, Y., Xue, F., Zheng, Z., You, Y.: Cross-token modeling with conditional computation. arXiv preprint arXiv:2109.02008 (2021)"},{"key":"24_CR29","doi-asserted-by":"crossref","unstructured":"Ma, J., Huang, P.Y., Xie, S., Li, S.W., Zettlemoyer, L., Chang, S.F., Yih, W.T., Xu, H.: Mode: Clip data experts via clustering. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 26354\u201326363 (2024)","DOI":"10.1109\/CVPR52733.2024.02489"},{"key":"24_CR30","doi-asserted-by":"crossref","unstructured":"Meng, D., Chen, X., Fan, Z., Zeng, G., Li, H., Yuan, Y., Sun, L., Wang, J.: Conditional DETR for Fast Training Convergence. In: ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"24_CR31","unstructured":"Nie, X., Miao, X., Cao, S., Ma, L., Liu, Q., Xue, J., Miao, Y., Liu, Y., Yang, Z., Cui, B.: Evomoe: An evolutional mixture-of-experts training framework via dense-to-sparse gate. arXiv preprint arXiv:2112.14397 (2021)"},{"key":"24_CR32","unstructured":"Oksuz, K., Kuzucu, S., Joy, T., Dokania, P.K.: Mocae: Mixture of calibrated experts significantly improves object detection. arXiv preprint arXiv:2309.14976 (2023)"},{"key":"24_CR33","unstructured":"Pu, Y., Liang, W., Hao, Y., Yuan, Y., Yang, Y., Zhang, C., Hu, H., Huang, G.: Rank-detr for high quality object detection. Advances in Neural Information Processing Systems 36 (2024)"},{"key":"24_CR34","unstructured":"florence regol, Chataoui, J., Coates, M.: Jointly-learned exit and inference for a dynamic neural network. In: The Twelfth International Conference on Learning Representations (2024), https:\/\/openreview.net\/forum?id=jX2DT7qDam"},{"key":"24_CR35","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems 28 (2015)"},{"key":"24_CR36","doi-asserted-by":"publisher","unstructured":"Roh, B., Shin, J., Shin, W., Kim, S.: Sparse DETR: Efficient End-to-End Object Detection with Learnable Sparsity. Tech. Rep. arXiv:2111.14330, arXiv (Mar 2022https:\/\/doi.org\/10.48550\/arXiv.2111.14330, http:\/\/arxiv.org\/abs\/2111.14330","DOI":"10.48550\/arXiv.2111.14330"},{"key":"24_CR37","unstructured":"Ruiz, C.R., Puigcerver, J., Mustafa, B., Neumann, M., Jenatton, R., Pinto, A.S., Keysers, D., Houlsby, N.: Scaling vision with sparse mixture of experts. In: Beygelzimer, A., Dauphin, Y., Liang, P., Vaughan, J.W. (eds.) Advances in Neural Information Processing Systems (2021), https:\/\/openreview.net\/forum?id=FrIDgjDOH1u"},{"key":"24_CR38","doi-asserted-by":"publisher","unstructured":"Senthivel, T., Vu, N.S.: Subgroups for detection transformer. In: 2024 IEEE International Conference on Image Processing (ICIP). pp. 2194\u20132200 (2024).https:\/\/doi.org\/10.1109\/ICIP51287.2024.10648285","DOI":"10.1109\/ICIP51287.2024.10648285"},{"key":"24_CR39","doi-asserted-by":"crossref","unstructured":"Senthivel, T., Vu, N.S., Borzic, B.: Detection Transformer with Diversified Object Queries. In: 2023 IEEE International Conference on Image Processing (ICIP). pp. 2515\u20132519. IEEE (2023)","DOI":"10.1109\/ICIP49359.2023.10221970"},{"key":"24_CR40","unstructured":"Shen, T., Ott, M., Auli, M., Ranzato, M.: Mixture models for diverse machine translation: Tricks of the trade. In: International conference on machine learning. pp. 5719\u20135728. PMLR (2019)"},{"key":"24_CR41","doi-asserted-by":"crossref","unstructured":"Teng, Y., Liu, H., Guo, S., Wang, L.: Stageinteractor: Query-based object detector with cross-stage interaction. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 6577\u20136588 (2023)","DOI":"10.1109\/ICCV51070.2023.00605"},{"key":"24_CR42","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in Neural Information Processing Systems. pp. 5998\u20136008 (2017)"},{"key":"24_CR43","doi-asserted-by":"publisher","unstructured":"Wang, T., Yuan, L., Chen, Y., Feng, J., Yan, S.: PnP-DETR: Towards Efficient Visual Analysis with Transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV). pp. 4641\u20134650. IEEE, Montreal, QC, Canada (Oct 2021https:\/\/doi.org\/10.1109\/ICCV48922.2021.00462, https:\/\/ieeexplore.ieee.org\/document\/9710805\/","DOI":"10.1109\/ICCV48922.2021.00462"},{"key":"24_CR44","doi-asserted-by":"crossref","unstructured":"Yang, Y., Jiang, P.T., Hou, Q., Zhang, H., Chen, J., Li, B.: Multi-task dense prediction via mixture of low-rank experts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 27927\u201327937 (2024)","DOI":"10.1109\/CVPR52733.2024.02638"},{"key":"24_CR45","unstructured":"Yao, Z., Ai, J., Li, B., Zhang, C.: Efficient detr: improving end-to-end object detector with dense prior. arXiv preprint arXiv:2104.01318 (2021)"},{"key":"24_CR46","doi-asserted-by":"crossref","unstructured":"Zeng, W., Jin, S., Liu, W., Qian, C., Luo, P., Ouyang, W., Wang, X.: Not All Tokens Are Equal: Human-Centric Visual Analysis via Token Clustering Transformer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (2022)","DOI":"10.1109\/CVPR52688.2022.01082"},{"key":"24_CR47","doi-asserted-by":"publisher","unstructured":"Zhang, G., Luo, Z., Yu, Y., Cui, K., Lu, S.: Accelerating DETR Convergence via Semantic-Aligned Matching. Tech. Rep. arXiv:2203.06883, arXiv (Mar 2022https:\/\/doi.org\/10.48550\/arXiv.2203.06883, http:\/\/arxiv.org\/abs\/2203.06883","DOI":"10.48550\/arXiv.2203.06883"},{"key":"24_CR48","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.Y.: DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection. In: ICLR (2022), _eprint: 2203.03605"},{"key":"24_CR49","doi-asserted-by":"publisher","unstructured":"Zhang, X., Shen, Y., Huang, Z., Zhou, J., Rong, W., Xiong, Z.: Mixture of attention heads: Selecting attention heads per token. In: Goldberg, Y., Kozareva, Z., Zhang, Y. (eds.) Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing. pp. 4150\u20134162. Association for Computational Linguistics, Abu Dhabi, United Arab Emirates (Dec 2022).https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.278, https:\/\/aclanthology.org\/2022.emnlp-main.278","DOI":"10.18653\/v1\/2022.emnlp-main.278"},{"key":"24_CR50","doi-asserted-by":"crossref","unstructured":"Zhao, C., Sun, Y., Wang, W., Chen, Q., Ding, E., Yang, Y., Wang, J.: Ms-detr: Efficient detr training with mixed supervision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 17027\u201317036 (2024)","DOI":"10.1109\/CVPR52733.2024.01611"},{"key":"24_CR51","doi-asserted-by":"crossref","unstructured":"Zheng, D., Dong, W., Hu, H., Chen, X., Wang, Y.: Less is more: Focus attention for efficient detr. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 6674\u20136683 (2023)","DOI":"10.1109\/ICCV51070.2023.00614"},{"key":"24_CR52","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: Deformable Transformers for End-to-End Object Detection. In: International Conference on Learning Representations (2020)"},{"key":"24_CR53","doi-asserted-by":"crossref","unstructured":"Zong, Z., Song, G., Liu, Y.: Detrs with collaborative hybrid assignments training. In: Proceedings of the IEEE\/CVF international conference on computer vision. pp. 6748\u20136758 (2023)","DOI":"10.1109\/ICCV51070.2023.00621"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ACCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-0960-4_24","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,7]],"date-time":"2024-12-07T08:35:26Z","timestamp":1733560526000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-0960-4_24"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,8]]},"ISBN":["9789819609598","9789819609604"],"references-count":53,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-0960-4_24","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,8]]},"assertion":[{"value":"8 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hanoi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vietnam","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"accv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}