{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T00:01:55Z","timestamp":1743033715612,"version":"3.40.3"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031703409"},{"type":"electronic","value":"9783031703416"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70341-6_20","type":"book-chapter","created":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T20:26:39Z","timestamp":1725049599000},"page":"332-349","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Error Types in\u00a0Transformer-Based Paraphrasing Models: A Taxonomy, Paraphrase Annotation Model and\u00a0Dataset"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2411-5761","authenticated-orcid":false,"given":"Auday","family":"Berro","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8805-1130","authenticated-orcid":false,"given":"Boualem","family":"Benatallah","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8206-9559","authenticated-orcid":false,"given":"Yacine","family":"Gaci","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4324-924X","authenticated-orcid":false,"given":"Khalid","family":"Benabdeslem","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,22]]},"reference":[{"key":"20_CR1","doi-asserted-by":"crossref","unstructured":"Alikaniotis, D., Raheja, V.: The unreasonable effectiveness of transformer language models in grammatical error correction. In: BEA@ACL (2019)","DOI":"10.18653\/v1\/W19-4412"},{"key":"20_CR2","doi-asserted-by":"crossref","unstructured":"Bannard, C., Callison, C.: Paraphrasing with bilingual parallel corpora. In: ACL\u201905, pp. 597\u2013604 (2005). https:\/\/aclanthology.org\/P05-1074","DOI":"10.3115\/1219840.1219914"},{"key":"20_CR3","doi-asserted-by":"crossref","unstructured":"Berro, A., Fard, M.A.Y.Z., et\u00a0al.: An extensible and reusable pipeline for automated utterance paraphrases. In: PVLDB (2021)","DOI":"10.14778\/3476311.3476358"},{"key":"20_CR4","unstructured":"Brown, T.B., et\u00a0al.: Language models are few-shot learners. In: NeurIPS (2020)"},{"key":"20_CR5","doi-asserted-by":"crossref","unstructured":"Bui, T.C., Le, V.D., To, H.T., Cha, S.K.: Generative pre-training for paraphrase generation by representing and predicting spans in exemplars. In: 2021 IEEE International Conference on Big Data and Smart Computing (BigComp), pp. 83\u201390. IEEE (2021)","DOI":"10.1109\/BigComp51126.2021.00025"},{"key":"20_CR6","doi-asserted-by":"crossref","unstructured":"Cegin, J., Simko, J., Brusilovsky, P.: ChatGPT to replace crowdsourcing of paraphrases for intent classification: Higher diversity and comparable model robustness (2023). arXiv preprint arXiv:2305.12947","DOI":"10.18653\/v1\/2023.emnlp-main.117"},{"key":"20_CR7","unstructured":"Celikyilmaz, A., Clark, E., Gao, J.: Evaluation of text generation: A survey (2020)"},{"key":"20_CR8","unstructured":"Chen, D., Dolan, W.B.: Collecting highly parallel data for paraphrase evaluation. ACL-HLT, pp. 190\u2013200 (2011). https:\/\/aclanthology.org\/P11-1020"},{"key":"20_CR9","doi-asserted-by":"crossref","unstructured":"Chklovski, T.: Collecting paraphrase corpora from volunteer contributors. In: Proceedings of the 3rd International Conference on Knowledge Capture, pp. 115\u2013120 (2005)","DOI":"10.1145\/1088622.1088644"},{"key":"20_CR10","unstructured":"Dopierre, T., Gravier, C., Logerais, W.: ProtAugment: unsupervised diverse short-texts paraphrasing for intent detection meta-learning. In: ACL-IJCNLP (2021). https:\/\/aclanthology.org\/2021.acl-long.191"},{"key":"20_CR11","doi-asserted-by":"crossref","unstructured":"Dou, Y., Forbes, M., et\u00a0al.: Is GPT-3 text indistinguishable from human text? Scarecrow: a framework for scrutinizing machine text. In: ACL, pp. 7250\u20137274 (2022)","DOI":"10.18653\/v1\/2022.acl-long.501"},{"key":"20_CR12","doi-asserted-by":"crossref","unstructured":"Ethayarajh, K.: How contextual are contextualized word representations? Comparing the geometry of BERT, ELMo, and GPT-2 embeddings. EMNLP-IJCNLP (2019)","DOI":"10.18653\/v1\/D19-1006"},{"key":"20_CR13","doi-asserted-by":"crossref","unstructured":"Freitag, M., Foster, G., et\u00a0al.: Experts, errors, and context: a large-scale study of human evaluation for machine translation. Trans. Assoc. Comput. Linguist. 9, 1460\u20131474 (2021). https:\/\/aclanthology.org\/2021.tacl-1.87","DOI":"10.1162\/tacl_a_00437"},{"key":"20_CR14","unstructured":"Fujita, A.: Automatic generation of syntactically well-formed and semantically appropriate paraphrases. Ph.D. thesis, Ph. D. thesis, Nara Institute of Science and Technology (2005). https:\/\/api.semanticscholar.org\/CorpusID:16348044"},{"key":"20_CR15","doi-asserted-by":"crossref","unstructured":"Fujita, A., Furihata, K., Inui, K., Matsumoto, Y., Takeuchi, K.: Paraphrasing of japanese light-verb constructions based on lexical conceptual structure (2004)","DOI":"10.3115\/1613186.1613188"},{"key":"20_CR16","doi-asserted-by":"crossref","unstructured":"Goyal, T., Durrett, G.: Neural syntactic preordering for controlled paraphrase generation, pp. 238\u2013252 (2020)","DOI":"10.18653\/v1\/2020.acl-main.22"},{"key":"20_CR17","unstructured":"Hegde, C., Patil, S.: Unsupervised paraphrase generation using pre-trained language models (2020)"},{"key":"20_CR18","doi-asserted-by":"crossref","unstructured":"Huang, S., Wu, Y., Wei, F., Luan, Z.: Dictionary-guided editing networks for paraphrase generation 33, 6546\u20136553 (2019)","DOI":"10.1609\/aaai.v33i01.33016546"},{"key":"20_CR19","unstructured":"Huang, T.H., Chen, Y.N., Bigham, J.P.: Real-time on-demand crowd-powered entity extraction (2017). https:\/\/arxiv.org\/abs\/1704.03627"},{"key":"20_CR20","doi-asserted-by":"crossref","unstructured":"Iyyer, M., Wieting, J., Gimpel, K., Zettlemoyer, L.: Adversarial example generation with syntactically controlled paraphrase networks, pp. 1875\u20131885 (2018)","DOI":"10.18653\/v1\/N18-1170"},{"key":"20_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, Y., Kummerfeld, J.K., Lasecki, W.S.: Understanding task design trade-offs in crowdsourced paraphrase collection. In: ACL 55th Annual Meeting, pp. 103\u2013109. Vancouver, Canada (Jul 2017)","DOI":"10.18653\/v1\/P17-2017"},{"key":"20_CR22","doi-asserted-by":"crossref","unstructured":"Koponen, M.: Assessing machine translation quality with error analysis (2010)","DOI":"10.61200\/mikael.129675"},{"key":"20_CR23","doi-asserted-by":"crossref","unstructured":"Larson, S., Cheung, A., Mahendran, A., et\u00a0al.: Inconsistencies in crowdsourced slot-filling annotations: a typology and identification methods. In: COLING (2020). https:\/\/aclanthology.org\/2020.coling-main.442","DOI":"10.18653\/v1\/2020.coling-main.442"},{"key":"20_CR24","doi-asserted-by":"crossref","unstructured":"Li, Z., Jiang, X., Shang, L., Li, H.: Paraphrase generation with deep reinforcement learning. EMNLP (2018). https:\/\/aclanthology.org\/D18-1421","DOI":"10.18653\/v1\/D18-1421"},{"key":"20_CR25","doi-asserted-by":"crossref","unstructured":"Madnani, N., Dorr, B.J.: Generating phrasal and sentential paraphrases: A survey of data-driven methods. CL (2010). https:\/\/aclanthology.org\/J10-3003","DOI":"10.1162\/coli_a_00002"},{"key":"20_CR26","doi-asserted-by":"crossref","unstructured":"Mallinson, J., Sennrich, R., Lapata, M.: Paraphrasing revisited with neural machine translation. ACL European Chapter (2017). https:\/\/aclanthology.org\/E17-1083","DOI":"10.18653\/v1\/E17-1083"},{"key":"20_CR27","unstructured":"Metzler, D., Hovy, E., Zhang, C.: An empirical evaluation of data-driven paraphrase generation techniques. In: ACL 49th Annual Meeting, pp. 546\u2013551. Portland, Oregon, USA (2011)"},{"key":"20_CR28","unstructured":"Negri, M., Mehdad, Y., Marchetti, A., Giampiccolo, D., Bentivogli, L.: Chinese whispers: Cooperative paraphrase acquisition. In: LREC\u201912, pp. 2659\u20132665. Istanbul, Turkey (2012)"},{"key":"20_CR29","unstructured":"Nilforoshan, H., Wang, J., Wu, E.: PreCog: Improving crowdsourced data quality before acquisition (2017). arXiv preprint arXiv:1704.02384"},{"key":"20_CR30","unstructured":"Popovi\u0107, M.: On nature and causes of observed MT errors. MTSummitXVIII (2021)"},{"key":"20_CR31","unstructured":"Prakash, A., et al.: Neural paraphrase generation with stacked residual LSTM networks. In: COLING (2016)"},{"key":"20_CR32","unstructured":"Raffel, C., Shazeer, N., Roberts, A., Lee, K., et\u00a0al.: Exploring the limits of transfer learning with a unified text-to-text transformer. In: JMLR (2020)"},{"key":"20_CR33","unstructured":"Ram\u00edrez, J., Berro, A., Baez, M., Benatallah, B., Casati, F.: Crowdsourcing diverse paraphrases for training task-oriented bots (2021)"},{"key":"20_CR34","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-BERT: Sentence embeddings using siamese BERT-networks. EMNLP (2019). https:\/\/aclanthology.org\/D19-1410","DOI":"10.18653\/v1\/D19-1410"},{"key":"20_CR35","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., Wu, T., Guestrin, C., Singh, S.: Beyond accuracy: behavioral testing of NLP models with checklist. In: ACL, pp. 4902\u20134912 (2020). https:\/\/aclanthology.org\/2020.acl-main.442","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"20_CR36","doi-asserted-by":"crossref","unstructured":"Su, Y., Awadallah, A.H., Khabsa, M., Pantel, P., Gamon, M., Encarnacion, M.: Building natural language interfaces to web APIs (2017)","DOI":"10.1145\/3132847.3133009"},{"key":"20_CR37","doi-asserted-by":"crossref","unstructured":"Sun, X., Liu, J., Lyu, Y., et\u00a0al.: Answer-focused and position-aware neural question generation. EMNLP (2018). https:\/\/aclanthology.org\/D18-1427","DOI":"10.18653\/v1\/D18-1427"},{"key":"20_CR38","doi-asserted-by":"crossref","unstructured":"Thompson, B., Post, M.: Automatic machine translation evaluation in many languages via zero-shot paraphrasing. EMNLP (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.8"},{"key":"20_CR39","doi-asserted-by":"crossref","unstructured":"Thomson, C., Reiter, E.: A gold standard methodology for evaluating accuracy in data-to-text systems. In: INLG (2020). https:\/\/aclanthology.org\/2020.inlg-1.22","DOI":"10.18653\/v1\/2020.inlg-1.22"},{"key":"20_CR40","unstructured":"Van, E., Clinciu, M., et\u00a0al.: Underreporting of errors in NLG output, and what to do about it. INLG (2021). https:\/\/aclanthology.org\/2021.inlg-1.14"},{"key":"20_CR41","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., et\u00a0al.: Attention is all you need. In: Advances in Neural Information Processing Systems (2017)"},{"key":"20_CR42","doi-asserted-by":"crossref","unstructured":"Witteveen, S., Andrews, M.: Paraphrasing with large language models (2019)","DOI":"10.18653\/v1\/D19-5623"},{"key":"20_CR43","doi-asserted-by":"crossref","unstructured":"Yaghoub-Zadeh-Fard, M., Benatallah, B., et\u00a0al.: Dynamic word recommendation to obtain diverse crowdsourced paraphrases of user utterances. In: IUI (2020)","DOI":"10.1145\/3377325.3377486"},{"key":"20_CR44","doi-asserted-by":"crossref","unstructured":"Yaghoub-Zadeh-Fard, M.A., Benatallah, B., et\u00a0al.: User utterance acquisition for training task-oriented bots: A review of challenges, techniques and opportunities (2020)","DOI":"10.1109\/MIC.2020.2978157"},{"key":"20_CR45","unstructured":"Yaghoubzadeh, M., Benatallah, B., et\u00a0al.: A study of incorrect paraphrases in crowdsourced user utterances. NAACL\u201919 (2019). https:\/\/aclanthology.org\/N19-1026"},{"key":"20_CR46","unstructured":"Yaghoubzadehfard, M.: Scalable and Quality-Aware Training Data Acquisition for Conversational Cognitive Services. Ph.D. thesis, UNSW Sydney (2021)"},{"key":"20_CR47","unstructured":"Zamanirad, S.: Superimposition of natural language conversations over software enabled services. Ph.D. thesis, University of New South Wales, Sydney, Australia (2019)"},{"key":"20_CR48","doi-asserted-by":"publisher","first-page":"80542","DOI":"10.1109\/ACCESS.2019.2923057","volume":"7","author":"D Zeng","year":"2019","unstructured":"Zeng, D., Zhang, H., Xiang, L., Wang, J., Ji, G.: User-oriented paraphrase generation with keywords controlled network. IEEE Access 7, 80542\u201380551 (2019)","journal-title":"IEEE Access"},{"key":"20_CR49","doi-asserted-by":"crossref","unstructured":"Zhou, J., Bhat, S.: Paraphrase generation: a survey of the state of the art. In: EMNLP (2021). https:\/\/aclanthology.org\/2021.emnlp-main.414","DOI":"10.18653\/v1\/2021.emnlp-main.414"}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases. Research Track"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70341-6_20","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,30]],"date-time":"2024-08-30T20:31:21Z","timestamp":1725049881000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70341-6_20"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031703409","9783031703416"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70341-6_20","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"22 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Vilnius","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Lithuania","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2024.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}