{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T04:09:32Z","timestamp":1751083772706,"version":"3.33.0"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T00:00:00Z","timestamp":1642032000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T00:00:00Z","timestamp":1642032000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00521-021-06752-7","type":"journal-article","created":{"date-parts":[[2022,1,13]],"date-time":"2022-01-13T00:03:47Z","timestamp":1642032227000},"page":"695-705","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Greedy search algorithm for partial quantization of convolutional neural networks inspired by submodular optimization"],"prefix":"10.1007","volume":"37","author":[{"given":"Satoki","family":"Tsuji","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fuyuka","family":"Yamada","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroshi","family":"Kawaguchi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Atsuki","family":"Inoue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yasufumi","family":"Sakai","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,1,13]]},"reference":[{"key":"6752_CR1","unstructured":"Molchanov P, Tyree S, Karras T, Aila T, Kautz J (2016) Pruning convolutional neural networks for resource efficient inference arXiv preprint arXiv:1611.06440"},{"key":"6752_CR2","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network arXiv preprint arXiv:1503.02531"},{"key":"6752_CR3","doi-asserted-by":"crossref","unstructured":"Wu J, Leng C, Wang Y, Hu Q, Cheng J (2016) Quantized convolutional neural networks for mobile devices In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4820\u20134828","DOI":"10.1109\/CVPR.2016.521"},{"key":"6752_CR4","unstructured":"Zhou S, Wu Y, Ni Z, Zhou X, Wen H, Zou Y (2016) Dorefa-net: training low bitwidth convolutional neural networks with low bitwidth gradients arXiv preprint arXiv:1606.06160"},{"key":"6752_CR5","doi-asserted-by":"crossref","unstructured":"Jacob B, Kligys S, Chen B, Zhu M, Tang M, Howard A, Adam H, Kalenichenko D (2018) Quantization and training of neural networks for efficient integer-arithmetic-only inference. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 2704\u20132713","DOI":"10.1109\/CVPR.2018.00286"},{"key":"6752_CR6","doi-asserted-by":"publisher","unstructured":"Markidis S, Chien SWD, Laure E, Peng IB, Vetter JS (2018) Nvidia tensor core programmability, performance precision. In: 2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp 522\u2013531 https:\/\/doi.org\/10.1109\/IPDPSW.2018.00091","DOI":"10.1109\/IPDPSW.2018.00091"},{"key":"6752_CR7","doi-asserted-by":"crossref","unstructured":"Jouppi NP, Young C, Patil N, Patterson D, Agrawal G, Bajwa R, Bates S, Bhatia S, Boden N, Borchers A, et al. (2017) In-datacenter performance analysis of a tensor processing unit. In: Proceedings of the 44th Annual International Symposium on Computer Architecture, pp 1\u201312","DOI":"10.1145\/3079856.3080246"},{"key":"6752_CR8","doi-asserted-by":"crossref","unstructured":"Wu B, Dai X, Zhang P, Wang Y, Sun F, Wu Y, Tian Y, Vajda P, Jia Y, Keutzer K (2019) Fbnet: hardware-aware efficient convnet design via differentiable neural architecture search In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10734\u201310742","DOI":"10.1109\/CVPR.2019.01099"},{"key":"6752_CR9","doi-asserted-by":"crossref","unstructured":"Tan M, Chen B, Pang R, Vasudevan V, Sandler M, Howard A, Le QV (2019) Mnasnet: platform-aware neural architecture search for mobile In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2820\u20132828","DOI":"10.1109\/CVPR.2019.00293"},{"key":"6752_CR10","unstructured":"Zoph B, Le QV (2016) Neural architecture search with reinforcement learning arXiv preprint arXiv:1611.01578"},{"key":"6752_CR11","unstructured":"Wu B, Wang Y, Zhang P, Tian Y, Vajda P, Keutzer K (2018) Mixed precision quantization of convnets via differentiable neural architecture search arXiv preprint arXiv:1812.00090"},{"key":"6752_CR12","doi-asserted-by":"crossref","unstructured":"Guo Z, Zhang X, Mu H, Heng W, Liu Z, Wei Y, Sun J (2020) Single path one-shot neural architecture search with uniform sampling In: European Conference on Computer Vision, pp 544\u2013560. Springer","DOI":"10.1007\/978-3-030-58517-4_32"},{"key":"6752_CR13","unstructured":"Han S, Mao H, Dally WJ (2015) Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding arXiv preprint arXiv:1510.00149"},{"key":"6752_CR14","doi-asserted-by":"crossref","unstructured":"Zhou Y, Moosavi-Dezfooli S-M, Cheung N-M, Frossard P (2018) Adaptive quantization for deep neural network In: Proceedings of the AAAI Conference on Artificial Intelligence, vol 32","DOI":"10.1609\/aaai.v32i1.11623"},{"key":"6752_CR15","doi-asserted-by":"crossref","unstructured":"Wang K, Liu Z, Lin Y, Lin J, Han S (2019) Haq: Hardware-aware automated quantization with mixed precision In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 8612\u20138620","DOI":"10.1109\/CVPR.2019.00881"},{"key":"6752_CR16","unstructured":"Uhlich S, Mauch L, Cardinaux F, Yoshiyama K, Garcia JA, Tiedemann S, Kemp T, Nakamura A (2019) Mixed precision dnns: all you need is a good parametrization arXiv preprint arXiv:1905.11452"},{"key":"6752_CR17","doi-asserted-by":"crossref","unstructured":"Dong Z, Yao Z, Gholami A, Mahoney MW, Keutzer K (2019) Hawq: Hessian aware quantization of neural networks with mixed-precision In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 293\u2013302","DOI":"10.1109\/ICCV.2019.00038"},{"key":"6752_CR18","unstructured":"Wu H, Judd P, Zhang X, Isaev M, Micikevicius P (2020) Integer quantization for deep learning inference: principles and empirical evaluation arXiv preprint arXiv:2004.09602"},{"key":"6752_CR19","doi-asserted-by":"crossref","unstructured":"Nagel M, Baalen Mv, Blankevoort T, Welling M (2019) Data-free quantization through weight equalization and bias correction In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 1325\u20131334","DOI":"10.1109\/ICCV.2019.00141"},{"key":"6752_CR20","unstructured":"Nahshan Y, Chmiel B, Baskin C, Zheltonozhskii E, Banner R, Bronstein AM, Mendelson A (2019) Loss aware post-training quantization arXiv preprint arXiv:1911.07190"},{"key":"6752_CR21","doi-asserted-by":"crossref","unstructured":"Choukroun Y, Kravchik E, Yang F, Kisilev P (2019) Low-bit quantization of neural networks for efficient inference In: ICCV Workshops, pp 3009\u20133018","DOI":"10.1109\/ICCVW.2019.00363"},{"key":"6752_CR22","doi-asserted-by":"crossref","unstructured":"Nemhauser G. L., Wolsey L. A. (1981) Maximizing submodular set functions: formulations and analysis of algorithms In: North-Holland Mathematics Studies, vol. 59, pp. 279\u2013301. Elsevier","DOI":"10.1016\/S0304-0208(08)73471-6"},{"key":"6752_CR23","unstructured":"Lin H, Bilmes J (2010) Multi-document summarization via budgeted maximization of submodular functions In: Human language technologies: the 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics, pp 912\u2013920"},{"issue":"1","key":"6752_CR24","doi-asserted-by":"publisher","first-page":"267","DOI":"10.1111\/j.2517-6161.1996.tb02080.x","volume":"58","author":"R Tibshirani","year":"1996","unstructured":"Tibshirani R (1996) Regression shrinkage and selection via the lasso. J R Stat Soc Series B (Methodol) 58(1):267\u2013288","journal-title":"J R Stat Soc Series B (Methodol)"},{"key":"6752_CR25","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"6752_CR26","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pp 248\u2013255 Ieee","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"6752_CR27","unstructured":"Vanhoucke V, Senior A, Mao MZ (2011) Improving the speed of neural networks on cpus. In: Proceedings of the Deep Learning and Unsupervised Feature Learning NIPS Workshop, vol. 1"},{"key":"6752_CR28","unstructured":"Banner R, Nahshan Y, Hoffer E, Soudry D (2018) Post-training 4-bit quantization of convolution networks for rapid-deployment arXiv preprint arXiv:1810.05723"},{"key":"6752_CR29","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, Killeen T, Lin Z, Gimelshein N, Antiga L, et al. (2019) Pytorch: an imperative style, high-performance deep learning library arXiv preprint arXiv:1912.01703"},{"key":"6752_CR30","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"6752_CR31","unstructured":"Tan M, Le Q (2019) Efficientnet: rethinking model scaling for convolutional neural networks In: International Conference on Machine Learning, pp 6105\u20136114 PMLR"},{"key":"6752_CR32","doi-asserted-by":"crossref","unstructured":"Chollet F (2017) Xception: deep learning with depthwise separable convolutions In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 1251\u20131258","DOI":"10.1109\/CVPR.2017.195"},{"key":"6752_CR33","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"6752_CR34","doi-asserted-by":"crossref","unstructured":"Xie Q, Luong M-T, Hovy E, Le QV (2020) Self-training with noisy student improves imagenet classification In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 10687\u201310698","DOI":"10.1109\/CVPR42600.2020.01070"},{"key":"6752_CR35","unstructured":"Nagel M, Amjad RA, Van Baalen M, Louizos C, Blankevoort T (2020) Up or down? adaptive rounding for post-training quantization In: International Conference on Machine Learning, pp 7197\u20137206 PMLR"},{"key":"6752_CR36","doi-asserted-by":"crossref","unstructured":"Wang T, Wang K, Cai H, Lin J, Liu Z, Wang H, Lin Y, Han S (2020) Apq: joint search for network architecture, pruning and quantization policy In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2078\u20132087","DOI":"10.1109\/CVPR42600.2020.00215"},{"key":"6752_CR37","unstructured":"Gupta S, Agrawal A, Gopalakrishnan K, Narayanan P (2015) Deep learning with limited numerical precision In: International Conference on Machine Learning, pp 1737\u20131746 PMLR"},{"key":"6752_CR38","unstructured":"Krishnamoorthi R (2018) Quantizing deep convolutional networks for efficient inference: a whitepaper arXiv preprint arXiv:1806.08342"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06752-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-021-06752-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-021-06752-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,23]],"date-time":"2025-01-23T18:27:51Z","timestamp":1737656871000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-021-06752-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,13]]},"references-count":38,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["6752"],"URL":"https:\/\/doi.org\/10.1007\/s00521-021-06752-7","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2022,1,13]]},"assertion":[{"value":"2 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 November 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 January 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All the authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}