{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T19:37:43Z","timestamp":1743017863821,"version":"3.40.3"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031434112"},{"type":"electronic","value":"9783031434129"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-43412-9_2","type":"book-chapter","created":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:28:38Z","timestamp":1694896118000},"page":"20-37","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Real: A Representative Error-Driven Approach for\u00a0Active Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-6805-5894","authenticated-orcid":false,"given":"Cheng","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0092-0793","authenticated-orcid":false,"given":"Yong","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9973-3305","authenticated-orcid":false,"given":"Lizi","family":"Liao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2239-4472","authenticated-orcid":false,"given":"Yueguo","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5757-9135","authenticated-orcid":false,"given":"Xiaoyong","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,9,17]]},"reference":[{"key":"2_CR1","doi-asserted-by":"crossref","unstructured":"Aharoni, R., Goldberg, Y.: Unsupervised domain clusters in pretrained language models. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7747\u20137763 (2020)","DOI":"10.18653\/v1\/2020.acl-main.692"},{"key":"2_CR2","unstructured":"Arthur, D., Vassilvitskii, S.: K-means++: the advantages of careful seeding. In: Proceedings of the Eighteenth Annual ACM-SIAM Symposium on Discrete Algorithms, pp. 1027\u20131035 (2007)"},{"key":"2_CR3","unstructured":"Ash, J.T., Zhang, C., Krishnamurthy, A., Langford, J., Agarwal, A.: Deep batch active learning by diverse, uncertain gradient lower bounds. In: Proceedings of the International Conference on Learning Representations (2020)"},{"key":"2_CR4","doi-asserted-by":"crossref","unstructured":"Balcan, M.F., Broder, A., Zhang, T.: Margin based active learning. In: 20th Annual Conference on Learning Theory, pp. 35\u201350 (2007)","DOI":"10.1007\/978-3-540-72927-3_5"},{"key":"2_CR5","first-page":"255","volume":"5","author":"Y Baram","year":"2004","unstructured":"Baram, Y., Yaniv, R.E., Luz, K.: Online choice of active learning algorithms. J. Mach. Learn. Res. 5, 255\u2013291 (2004)","journal-title":"J. Mach. Learn. Res."},{"key":"2_CR6","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607 (2020)"},{"key":"2_CR7","doi-asserted-by":"crossref","unstructured":"Choi, J., et al.: VaB-AL: incorporating class imbalance and difficulty with variational Bayes for active learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6749\u20136758 (2021)","DOI":"10.1109\/CVPR46437.2021.00668"},{"key":"2_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3476037","volume":"5","author":"C Chung","year":"2021","unstructured":"Chung, C., et al.: Understanding human-side impact of sampling image batches in subjective attribute labeling. Proc. ACM Hum. Comput. Interact. 5, 1\u201326 (2021)","journal-title":"Proc. ACM Hum. Comput. Interact."},{"key":"2_CR9","first-page":"11933","volume":"34","author":"G Citovsky","year":"2021","unstructured":"Citovsky, G., et al.: Batch active learning at scale. Adv. Neural. Inf. Process. Syst. 34, 11933\u201311944 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"2_CR10","unstructured":"Coucke, A., et al.: SNIPS voice platform: an embedded spoken language understanding system for private-by-design voice interfaces. arXiv preprint arXiv:1805.10190 (2018)"},{"key":"2_CR11","doi-asserted-by":"crossref","unstructured":"Dernoncourt, F., Lee, J.Y.: PubMed 200k RCT: a dataset for sequential sentence classification in medical abstracts. In: Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 308\u2013313 (2017)","DOI":"10.18653\/v1\/E17-2110"},{"key":"2_CR12","doi-asserted-by":"crossref","unstructured":"Desai, S., Durrett, G.: Calibration of pre-trained transformers. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, pp. 295\u2013302 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.21"},{"key":"2_CR13","unstructured":"Ducoffe, M., Precioso, F.: Adversarial active learning for deep networks: a margin based approach. arXiv preprint arXiv:1802.09841 (2018)"},{"key":"2_CR14","doi-asserted-by":"crossref","unstructured":"Fang, M., Li, Y., Cohn, T.: Learning how to active learn: a deep reinforcement learning approach. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, pp. 595\u2013605 (2017)","DOI":"10.18653\/v1\/D17-1063"},{"key":"2_CR15","unstructured":"Gal, Y., Ghahramani, Z.: Bayesian convolutional neural networks with Bernoulli approximate variational inference. arXiv preprint arXiv:1506.02158 (2015)"},{"key":"2_CR16","unstructured":"Gal, Y., Islam, R., Ghahramani, Z.: Deep Bayesian active learning with image data. In: Proceedings of the 34th International Conference on Machine Learning, vol. 70, pp. 1183\u20131192 (2017)"},{"key":"2_CR17","unstructured":"Gissin, D., Shalev-Shwartz, S.: Discriminative active learning. arXiv preprint arXiv:1907.06347 (2019)"},{"key":"2_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Fan, H., Wu, Y., Xie, S., Girshick, R.: Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9729\u20139738 (2020)","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2_CR19","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Lin, H.T.: Active learning by learning. In: Proceedings of the AAAI Conference on Artificial Intelligence (2015)","DOI":"10.1609\/aaai.v29i1.9597"},{"key":"2_CR20","doi-asserted-by":"crossref","unstructured":"Huang, S., Wang, T., Xiong, H., Huan, J., Dou, D.: Semi-supervised active learning with temporal output discrepancy. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3447\u20133456 (2021)","DOI":"10.1109\/ICCV48922.2021.00343"},{"key":"2_CR21","doi-asserted-by":"crossref","unstructured":"Huijser, M., van Gemert, J.C.: Active decision boundary annotation with deep generative models. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5286\u20135295 (2017)","DOI":"10.1109\/ICCV.2017.565"},{"issue":"3","key":"2_CR22","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"2_CR23","doi-asserted-by":"crossref","unstructured":"Kim, Y., Shin, B.: In defense of core-set: a density-aware core-set selection for active learning. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 804\u2013812 (2022)","DOI":"10.1145\/3534678.3539476"},{"key":"2_CR24","unstructured":"Konyushkova, K., Sznitman, R., Fua, P.: Learning active learning from data. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"2_CR25","doi-asserted-by":"publisher","first-page":"449","DOI":"10.1007\/s10994-015-5504-1","volume":"100","author":"G Krempl","year":"2015","unstructured":"Krempl, G., Kottke, D., Lemaire, V.: Optimised probabilistic active learning (OPAL) for fast, non-myopic, cost-sensitive active classification. Mach. Learn. 100, 449\u2013476 (2015)","journal-title":"Mach. Learn."},{"key":"2_CR26","doi-asserted-by":"crossref","unstructured":"Lai, S., Xu, L., Liu, K., Zhao, J.: Recurrent convolutional neural networks for text classification. In: Proceedings of the AAAI Conference on Artificial Intelligence (2015)","DOI":"10.1609\/aaai.v29i1.9513"},{"key":"2_CR27","doi-asserted-by":"crossref","unstructured":"Lewis, D.D.: A sequential algorithm for training text classifiers. In: ACM SIGIR Forum, vol. 29, pp. 13\u201319 (1995)","DOI":"10.1145\/219587.219592"},{"key":"2_CR28","doi-asserted-by":"crossref","unstructured":"Lewis, D.D., Catlett, J.: Heterogeneous uncertainty sampling for supervised learning. In: Machine Learning Proceedings, pp. 148\u2013156 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50026-X"},{"issue":"8","key":"2_CR29","doi-asserted-by":"publisher","first-page":"1251","DOI":"10.1109\/TPAMI.2006.156","volume":"28","author":"M Li","year":"2006","unstructured":"Li, M., Sethi, I.K.: Confidence-based active learning. IEEE Trans. Pattern Anal. Mach. Intell. 28(8), 1251\u20131261 (2006)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"2_CR30","doi-asserted-by":"crossref","unstructured":"Liu, M., Buntine, W., Haffari, G.: Learning how to actively learn: a deep imitation learning approach. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 1874\u20131883 (2018)","DOI":"10.18653\/v1\/P18-1174"},{"key":"2_CR31","unstructured":"Liu, Y., et al.: RoBERTa: a robustly optimized BERT pretraining approach. arXiv preprint arXiv:1907.11692 (2019)"},{"key":"2_CR32","doi-asserted-by":"crossref","unstructured":"Luo, J., Wang, J., Cheng, N., Xiao, J.: Loss prediction: end-to-end active learning approach for speech recognition. In: 2021 International Joint Conference on Neural Networks, pp. 1\u20137 (2021)","DOI":"10.1109\/IJCNN52387.2021.9533839"},{"key":"2_CR33","doi-asserted-by":"crossref","unstructured":"Margatina, K., Vernikos, G., Barrault, L., Aletras, N.: Active learning by acquiring contrastive examples. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pp. 650\u2013663 (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.51"},{"key":"2_CR34","doi-asserted-by":"crossref","unstructured":"Muller, M., et al.: Designing ground truth and the social life of labels. In: Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems, pp. 1\u201316 (2021)","DOI":"10.1145\/3411764.3445402"},{"key":"2_CR35","unstructured":"Perrigo, B.: Inside Facebook\u2019s African sweatshop. Time https:\/\/time.com\/6147458\/facebook-africa-content-moderation-employee-treatment\/. Accessed 28 Mar 2023"},{"key":"2_CR36","doi-asserted-by":"crossref","unstructured":"Roth, D., Small, K.: Margin-based active learning for structured output spaces. In: Proceedings of the 17th European Conference on Machine Learning, pp. 413\u2013424 (2006)","DOI":"10.1007\/11871842_40"},{"key":"2_CR37","doi-asserted-by":"crossref","unstructured":"Ru, D., et al.: Active sentence learning by adversarial uncertainty sampling in discrete space. In: Findings of the Association for Computational Linguistics, EMNLP 2020, pp. 4908\u20134917 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.441"},{"key":"2_CR38","unstructured":"Sener, O., Savarese, S.: Active learning for convolutional neural networks: a core-set approach. In: International Conference on Learning Representations (2018)"},{"key":"2_CR39","doi-asserted-by":"crossref","unstructured":"Sia, S., Dalmia, A., Mielke, S.J.: Tired of topic models? Clusters of pretrained word embeddings make for fast and good topics too! In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, pp. 1728\u20131736 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.135"},{"key":"2_CR40","unstructured":"Socher, R., et al.: Recursive deep models for semantic compositionality over a sentiment treebank. In: Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing, pp. 1631\u20131642 (2013)"},{"key":"2_CR41","unstructured":"Wan, C., Jin, F., Qiao, Z., Zhang, W., Yuan, Y.: Unsupervised active learning with loss prediction. Neural Computing and Applications, pp. 1\u20139 (2021)"},{"key":"2_CR42","unstructured":"Wolf, T., et al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 38\u201345 (2020)"},{"key":"2_CR43","doi-asserted-by":"crossref","unstructured":"Xu, J., Wang, P., Tian, G., Xu, B., Zhao, J., Wang, F., Hao, H.: Short text clustering via convolutional neural networks. In: Proceedings of the 1st Workshop on Vector Space Modeling for Natural Language Processing, pp. 62\u201369 (2015)","DOI":"10.3115\/v1\/W15-1509"},{"key":"2_CR44","doi-asserted-by":"crossref","unstructured":"Yoo, D., Kweon, I.S.: Learning loss for active learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 93\u2013102 (2019)","DOI":"10.1109\/CVPR.2019.00018"},{"key":"2_CR45","doi-asserted-by":"crossref","unstructured":"Yu, Y., Kong, L., Zhang, J., Zhang, R., Zhang, C.: AcTune: uncertainty-based active self-training for active fine-tuning of pretrained language models. In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 1422\u20131436 (2022)","DOI":"10.18653\/v1\/2022.naacl-main.102"},{"key":"2_CR46","doi-asserted-by":"crossref","unstructured":"Yuan, M., Lin, H.T., Boyd-Graber, J.: Cold-start active learning through self-supervised language modeling. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, pp. 7935\u20137948 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.637"},{"key":"2_CR47","doi-asserted-by":"crossref","unstructured":"Yuan, T., et al.: Multiple instance active learning for object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5330\u20135339 (2021)","DOI":"10.1109\/CVPR46437.2021.00529"},{"key":"2_CR48","unstructured":"Zhang, X., Zhao, J., LeCun, Y.: Character-level convolutional networks for text classification. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"2_CR49","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Fang, M., Chen, L., Namazi Rad, M.R.: Is neural topic modelling better than clustering? An empirical study on clustering with contextual embeddings for topics. In: Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, pp. 3886\u20133893 (2022)","DOI":"10.18653\/v1\/2022.naacl-main.285"},{"key":"2_CR50","unstructured":"Zhu, J.J., Bento, J.: Generative adversarial active learning. arXiv preprint arXiv:1702.07956 (2017)"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases: Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-43412-9_2","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,9,16]],"date-time":"2023-09-16T20:29:44Z","timestamp":1694896184000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-43412-9_2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031434112","9783031434129"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-43412-9_2","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"17 September 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"All the datasets are widely-used benchmark text classification datasets and are publicly-available online, which do not have any privacy issues. Also, our approach can benefit data labeling workers and bring welfare to them. Data labeling is very costly and labour-intensive. For example, labeling toxic content is reported to be a \u201cmental torture\u201d\u00a0[]. Our approach aims to make active learning more label-efficient and can reduce the workload of data labeling workers, which is beneficial to the mental health of data labeling workers.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Statement"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"829","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"196","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"24% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.63","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4.5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Applied Data Science Track: 239 submissions, 58 accepted papers; Demo Track: 31 submissions, 16 accepted papers.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}