{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,10]],"date-time":"2024-09-10T03:17:08Z","timestamp":1725938228836},"publisher-location":"Cham","reference-count":20,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319737058"},{"type":"electronic","value":"9783319737065"}],"license":[{"start":{"date-parts":[[2018,1,1]],"date-time":"2018-01-01T00:00:00Z","timestamp":1514764800000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018]]},"DOI":"10.1007\/978-3-319-73706-5_6","type":"book-chapter","created":{"date-parts":[[2018,1,5]],"date-time":"2018-01-05T15:55:40Z","timestamp":1515167740000},"page":"62-71","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Word and Sentence Segmentation in German: Overcoming Idiosyncrasies in the Use of Punctuation in Private Communication"],"prefix":"10.1007","author":[{"given":"Kyoko","family":"Sugisaki","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,1,6]]},"reference":[{"key":"6_CR1","doi-asserted-by":"crossref","first-page":"157","DOI":"10.21248\/jlcl.28.2013.172","volume":"28","author":"T Bartz","year":"2013","unstructured":"Bartz, T., Bei\u00dfwenger, M., Storrer, A.: Optimierung des Stuttgart-T\u00fcbingen-Tagset f\u00fcr die linguistische Annotation von Korpora zur internetbasierten Kommunikation: Ph\u00e4nomene, Herausforderungen, Erweiterungsvorschl\u00e4ge. JLCL 28, 157\u2013198 (2013)","journal-title":"JLCL"},{"doi-asserted-by":"crossref","unstructured":"Bojanowski, P., Grave, E., Joulin, A., Mikolov, T.: Enriching word vectors with subword information. arXiv preprint arXiv:1607.04606 (2016)","key":"6_CR2","DOI":"10.1162\/tacl_a_00051"},{"issue":"4","key":"6_CR3","first-page":"467","volume":"18","author":"PF Brown","year":"1992","unstructured":"Brown, P.F., Desouza, P.V., Mercer, R.L., Pietra, V.J.D., Lai, J.C.: Class-based n-gram models of natural language. Comput. Linguist. 18(4), 467\u2013479 (1992)","journal-title":"Comput. Linguist."},{"unstructured":"Candito, M., Anguiano, E.H., Seddah, D.: A word clustering approach to domain adaptation: effective parsing of biomedical texts. In: Proceedings of the 12th International Conference on Parsing Technologies, pp. 37\u201342 (2011)","key":"6_CR4"},{"unstructured":"El-Kahlout, I.D., Yvon, F.: The pay-offs of preprocessing for German-English statistical machine translation. In: Proceedings of the 7th International Workshop on Spoken Language Translation (IWSLT), pp. 251\u2013258 (2010)","key":"6_CR5"},{"unstructured":"Giesbrecht, E., Evert, S.: Is part-of-speech tagging a solved task? An evaluation of POS taggers for the German web as corpus. In: Proceedings of the Fifth Web as Corpus Workshop (WAC5), pp. 27\u201335 (2009)","key":"6_CR6"},{"unstructured":"Gildea, D.: Corpus variation and parser performance. In: Proceedings of the EMNLP, pp. 167\u2013202 (2001)","key":"6_CR7"},{"key":"6_CR8","doi-asserted-by":"crossref","first-page":"61","DOI":"10.21248\/jlcl.28.2013.176","volume":"28","author":"B Jurish","year":"2013","unstructured":"Jurish, B., W\u00fcrzner, K.M.: Word and sentence tokenization with Hidden Markov Models. JLCL 28, 61\u201383 (2013)","journal-title":"JLCL"},{"issue":"4","key":"6_CR9","doi-asserted-by":"crossref","first-page":"485","DOI":"10.1162\/coli.2006.32.4.485","volume":"32","author":"T Kiss","year":"2006","unstructured":"Kiss, T., Strunk, J.: Unsupervised multilingual sentence boundary detection. Comput. Linguis. 32(4), 485\u2013525 (2006)","journal-title":"Comput. Linguis."},{"unstructured":"Koo, T., Carreras, X., Collins, M.: Simple semi-supervised dependency parsing. In: Proceedings of the ACL\/HLT, pp. 595\u2013603 (2008)","key":"6_CR10"},{"unstructured":"Lafferty, J., McCallum, A., Pereira, F.C.N.: Conditional random fields: probabilistic models for segmenting and labeling sequence data. In: Proceedings of the 18th International Conference on Machine Learning 2001, pp. 282\u2013289 (2001)","key":"6_CR11"},{"unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)","key":"6_CR12"},{"unstructured":"Miller, S., Guinness, J., Zamanian, A.: Name tagging with word clusters and discriminative training. In: Proceedings of HLT-NAACL vol. 4, 337\u2013342 (2004)","key":"6_CR13"},{"key":"6_CR14","volume-title":"Tokenisation and Sentence Segmentation","author":"DD Palmer","year":"2000","unstructured":"Palmer, D.D.: Tokenisation and Sentence Segmentation. CRC Press, Boca Raton (2000)"},{"doi-asserted-by":"crossref","unstructured":"Proisl, T., Uhrig, P.: Somajo: state-of-the-art tokenization for German web and social media texts. In: Proceedings of the 10th Web as Corpus Workshop (WAC-X) and the EmpiriST Shared Task, pp. 57\u201362 (2016)","key":"6_CR15","DOI":"10.18653\/v1\/W16-2607"},{"doi-asserted-by":"crossref","unstructured":"Remus, S., Hintz, G., Benikova, D., Arnold, T., Eckle-Kohler, J., Meyer, C.M., Mieskes, M., Biemann, C.: EmpiriST: AIPHES robust tokenization and POS-tagging for different genres. In: Proceedings of the 10th Web as Corpus Workshop, pp. 106\u2013114 (2016)","key":"6_CR16","DOI":"10.18653\/v1\/W16-2613"},{"unstructured":"Schiller, A., Teufel, S., St\u00f6ckert, C., Thielen, C.: Guidelines f\u00fcr das tagging deutscher textcorpora mit STTS (Kleines und gro\u00dfes Tagset). Technical report. Universit\u00e4t Stuttgart, Universit\u00e4t T\u00fcbingen, Stuttgart, Germany (1999)","key":"6_CR17"},{"key":"6_CR18","doi-asserted-by":"publisher","first-page":"13","DOI":"10.1007\/978-94-017-2390-9_2","volume-title":"Natural Language Processing Using Very Large Corpora. Text, Speech and Language Technology","author":"H Schmid","year":"1999","unstructured":"Schmid, H.: Improvements in part-of-speech tagging with an application to German. In: Armstrong, S., Church, K., Isabelle, P., Manzi, S., Tzoukermann, E., Yarowsky, D. (eds.) Natural Language Processing Using Very Large Corpora. Text, Speech and Language Technology, pp. 13\u201326. Springer, Dordrecht (1999). https:\/\/doi.org\/10.1007\/978-94-017-2390-9_2"},{"issue":"4","key":"6_CR19","doi-asserted-by":"crossref","first-page":"267","DOI":"10.1561\/2200000013","volume":"4","author":"C Sutton","year":"2011","unstructured":"Sutton, C., McCallum, A.: An introduction to conditional random fields. Found. Trends Mach. Learn. 4(4), 267\u2013373 (2011)","journal-title":"Found. Trends Mach. Learn."},{"unstructured":"Telljohann, H., Hinrichs, E.W., Sandra, K., Heike, Z., Kathrin, B.: Stylebook for the T\u00fcbingen treebank of written German (T\u00fcBa-D\/Z). Technical report. Universit\u00e4t T\u00fcbingen (2012)","key":"6_CR20"}],"container-title":["Lecture Notes in Computer Science","Language Technologies for the Challenges of the Digital Age"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-73706-5_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,30]],"date-time":"2023-08-30T12:03:19Z","timestamp":1693396999000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-73706-5_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018]]},"ISBN":["9783319737058","9783319737065"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-73706-5_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2018]]}}}