{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T02:57:07Z","timestamp":1742957827321,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030921842"},{"type":"electronic","value":"9783030921859"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-92185-9_30","type":"book-chapter","created":{"date-parts":[[2021,12,5]],"date-time":"2021-12-05T17:02:46Z","timestamp":1638723766000},"page":"367-378","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["DAP-BERT: Differentiable Architecture Pruning of\u00a0BERT"],"prefix":"10.1007","author":[{"given":"Chung-Yiu","family":"Yau","sequence":"first","affiliation":[]},{"given":"Haoli","family":"Bai","sequence":"additional","affiliation":[]},{"given":"Irwin","family":"King","sequence":"additional","affiliation":[]},{"given":"Michael R.","family":"Lyu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,12,6]]},"reference":[{"key":"30_CR1","unstructured":"Bai, H., Hou, L., Shang, L., Jiang, X., King, I., Lyu, M.R.: Towards efficient post-training quantization of pre-trained language models. Preprint arXiv:2109.15082 (2021)"},{"key":"30_CR2","doi-asserted-by":"crossref","unstructured":"Bai, H., Wu, J., King, I., Lyu, M.: Few shot network compression via cross distillation. In: AAAI, vol. 34, pp. 3203\u20133210 (2020)","DOI":"10.1609\/aaai.v34i04.5718"},{"key":"30_CR3","doi-asserted-by":"crossref","unstructured":"Bai, H., et al.: BinaryBERT: pushing the limit of BERT quantization. In: ACL (2020)","DOI":"10.18653\/v1\/2021.acl-long.334"},{"key":"30_CR4","unstructured":"Bernstein, J., Wang, Y.X., Azizzadenesheli, K., Anandkumar, A.: signSGD: Compressed optimisation for non-convex problems. In: ICML (2018)"},{"key":"30_CR5","doi-asserted-by":"crossref","unstructured":"Chen, D., et al.: AdaBERT: task-adaptive BERT compression with differentiable neural architecture search. In: IJCAI (2021)","DOI":"10.24963\/ijcai.2020\/341"},{"key":"30_CR6","unstructured":"Chen, T., et al.: The lottery ticket hypothesis for pre-trained BERT networks. In: NeurIPS (2020)"},{"key":"30_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT (2019)"},{"key":"30_CR8","unstructured":"Dong, X., Yang, Y.: Network pruning via transformable architecture search. In: NeurIPS (2019)"},{"key":"30_CR9","unstructured":"Frankle, J., Carbin, M.: The lottery ticket hypothesis: finding sparse, trainable neural networks. In: ICML (2018)"},{"key":"30_CR10","doi-asserted-by":"crossref","unstructured":"Gordon, M.A., Duh, K., Andrews, N.: Compressing BERT: studying the effects of weight pruning on transfer learning. In: ACL (2020)","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"30_CR11","unstructured":"Hou, L., Huang, Z., Shang, L., Jiang, X., Chen, X., Liu, Q.: DynaBERT: dynamic BERT with adaptive width and depth. In: NeurIPS (2020)"},{"key":"30_CR12","doi-asserted-by":"crossref","unstructured":"Jiao, X., et al.: TinyBERT: distilling BERT for natural language understanding. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"30_CR13","unstructured":"Li, Y., Wang, W., Bai, H., Gong, R., Dong, X., Yu, F.: Efficient bitwidth search for practical mixed precision neural network. Preprint arXiv:2003.07577 (2020)"},{"key":"30_CR14","unstructured":"Liu, H., Simonyan, K., Yang, Y.: DARTS: differentiable architecture search. In: ICLR (2019)"},{"key":"30_CR15","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. Preprint arXiv:1907.11692 (2019)"},{"key":"30_CR16","unstructured":"McCarley, J.S., Chakravarti, R., Sil, A.: Structured pruning of a BERT-based question answering model. Preprint arXiv:1910.06360 (2021)"},{"key":"30_CR17","unstructured":"Michel, P., Levy, O., Neubig, G.: Are sixteen heads really better than one? In: NeurIPS (2019)"},{"key":"30_CR18","unstructured":"Pascanu, R., Mikolov, T., Bengio, Y.: On the difficulty of training recurrent neural networks. In: ICML (2013)"},{"key":"30_CR19","unstructured":"Pham, H., Guan, M.Y., Zoph, B., Le, Q.V., Dean, J.: Efficient neural architecture search via parameter sharing. In: ICML, pp. 4092\u20134101 (2018)"},{"key":"30_CR20","doi-asserted-by":"crossref","unstructured":"Prasanna, S., Rogers, A., Rumshisky, A.: When BERT plays the lottery, all tickets are winning. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.259"},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Real, E., Aggarwal, A., Huang, Y., Le, Q.V.: Regularized evolution for image classifier architecture search. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"30_CR22","unstructured":"Sanh, V., Debut, L., Chaumond, J., Wolf, T.: DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. In: NeurIPS (2020)"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Shen, S., et al.: Q-BERT: hessian based ultra low precision quantization of BERT. In: AAAI (2019)","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"30_CR24","doi-asserted-by":"crossref","unstructured":"Sun, Z., Yu, H., Song, X., Liu, R., Yang, Y., Zhou, D.: MobileBERT: a compact task-agnostic BERT for resource-limited devices. In: ACL (2020)","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"30_CR25","unstructured":"Turc, I., Chang, M.W., Lee, K., Toutanova, K.: Well-read students learn better: on the importance of pre-training compact models. Preprint arXiv:1908.08962v2 (2019)"},{"key":"30_CR26","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., Bowman, S.R.: GLUE: a multi-task benchmark and analysis platform for natural language understanding. In: ICLR (2019)","DOI":"10.18653\/v1\/W18-5446"},{"key":"30_CR27","unstructured":"Wang, J., et al.: Revisiting parameter sharing for automatic neural channel number search. In: NeurIPS, vol. 33 (2020)"},{"issue":"4","key":"30_CR28","first-page":"727","volume":"14","author":"J Wang","year":"2020","unstructured":"Wang, J., Bai, H., Wu, J., Cheng, J.: Bayesian automatic model compression. IEEE JSTSP 14(4), 727\u2013736 (2020)","journal-title":"IEEE JSTSP"},{"key":"30_CR29","doi-asserted-by":"crossref","unstructured":"Wang, K., Liu, Z., Lin, Y., Lin, J., Han, S.: HAQ: hardware-aware automated quantization with mixed precision. In: CVPR, pp. 8612\u20138620 (2019)","DOI":"10.1109\/CVPR.2019.00881"},{"key":"30_CR30","first-page":"134","volume":"123","author":"L Wen","year":"2020","unstructured":"Wen, L., Zhang, X., Bai, H., Xu, Z.: Structured pruning of recurrent neural networks through neuron selection. NN 123, 134\u2013141 (2020)","journal-title":"NN"},{"key":"30_CR31","unstructured":"Wu, J., et al.: PocketFlow: an automated framework for compressing and accelerating deep neural networks. In: NeurIPS, CDNNRIA workshop (2018)"},{"key":"30_CR32","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: Nas-BERT: task-agnostic and adaptive-size BERT compression with neural architecture search. In: KDD (2021)","DOI":"10.1145\/3447548.3467262"},{"key":"30_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, W., et al.: TernaryBERT: distillation-aware ultra-low bit BERT. In: EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.37"},{"key":"30_CR34","doi-asserted-by":"crossref","unstructured":"Zhao, S., Gupta, R., Song, Y., Zhou, D.: Extremely small BERT models from mixed-vocabulary training. In: EACL (2021)","DOI":"10.18653\/v1\/2021.eacl-main.238"},{"key":"30_CR35","doi-asserted-by":"crossref","unstructured":"Zoph, B., Vasudevan, V., Shlens, J., Le, Q.V.: Learning transferable architectures for scalable image recognition. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["Lecture Notes in Computer Science","Neural Information Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-92185-9_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T18:44:32Z","timestamp":1710355472000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-92185-9_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030921842","9783030921859"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-92185-9_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"6 December 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICONIP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Neural Information Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sanur, Bali","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Indonesia","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 December 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 December 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iconip2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/iconip2021.apnns.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1093","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"226","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"177","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"21% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2.57","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic the conference was held online.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}